forked from KolibriOS/kolibrios
3a9b8fb8f9
git-svn-id: svn://kolibrios.org@359 a494cfbc-eb01-0410-851d-a64ba20cac60
398 lines
12 KiB
NASM
398 lines
12 KiB
NASM
;*****************************************************************************
|
|
;*
|
|
;* Open Watcom Project
|
|
;*
|
|
;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
|
|
;*
|
|
;* ========================================================================
|
|
;*
|
|
;* This file contains Original Code and/or Modifications of Original
|
|
;* Code as defined in and that are subject to the Sybase Open Watcom
|
|
;* Public License version 1.0 (the 'License'). You may not use this file
|
|
;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
|
|
;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
|
|
;* provided with the Original Code and Modifications, and is also
|
|
;* available at www.sybase.com/developer/opensource.
|
|
;*
|
|
;* The Original Code and all software distributed under the License are
|
|
;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
|
|
;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
|
|
;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
|
|
;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
|
|
;* NON-INFRINGEMENT. Please see the License for the specific language
|
|
;* governing rights and limitations under the License.
|
|
;*
|
|
;* ========================================================================
|
|
;*
|
|
;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
|
|
;* DESCRIBE IT HERE!
|
|
;*
|
|
;*****************************************************************************
|
|
|
|
|
|
; static char sccs_id[] = "@(#)fpatan32.asm 1.7 12/21/94 08:33:45";
|
|
;
|
|
; This code is being published by Intel to users of the Pentium(tm)
|
|
; processor. Recipients are authorized to copy, modify, compile, use and
|
|
; distribute the code.
|
|
;
|
|
; Intel makes no warranty of any kind with regard to this code, including
|
|
; but not limited to, implied warranties or merchantability and fitness for
|
|
; a particular purpose. Intel assumes no responsibility for any errors that
|
|
; may appear in this code.
|
|
;
|
|
; No patent licenses are granted, express or implied.
|
|
;
|
|
;
|
|
include mdef.inc
|
|
|
|
.386
|
|
.387
|
|
|
|
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
|
|
_TEXT ENDS
|
|
|
|
CONST SEGMENT DWORD PUBLIC USE32 'DATA'
|
|
CONST ENDS
|
|
|
|
CONST2 SEGMENT DWORD PUBLIC USE32 'DATA'
|
|
CONST2 ENDS
|
|
|
|
DATA32 SEGMENT DWORD PUBLIC USE32 'DATA'
|
|
|
|
|
|
Y EQU 0
|
|
X EQU 12
|
|
PREV_CW EQU 24
|
|
PATCH_CW EQU 28
|
|
SPILL EQU 32
|
|
STACK_SIZE EQU 36
|
|
|
|
|
|
pos_1 DD 00000000H
|
|
DD 3ff00000H
|
|
|
|
neg_1 DD 00000000H
|
|
DD 0bff00000H
|
|
|
|
|
|
dispatch_table DD offset label0
|
|
DD offset label1
|
|
DD offset label2
|
|
DD offset label3
|
|
DD offset label4
|
|
DD offset label5
|
|
DD offset label6
|
|
DD offset label7
|
|
;end dispatch table
|
|
|
|
pi DB 35H
|
|
DB 0c2H
|
|
DD 0daa22168H
|
|
DD 4000c90fH
|
|
|
|
pi_by_2 DB 35H
|
|
DB 0c2H
|
|
DD 0daa22168H
|
|
DD 3fffc90fH
|
|
|
|
flt_sixteen DD 41800000H
|
|
|
|
one_by_sixteen DD 3d800000H
|
|
|
|
|
|
B1 DW 0AAA8H
|
|
DD 0AAAAAAAAH
|
|
DD 0BFFDAAAAH
|
|
|
|
B2 DW 2D6EH
|
|
DD 0CCCCCCCCH
|
|
DD 3FFCCCCCH
|
|
|
|
B3 DW 4892H
|
|
DD 249241F9H
|
|
DD 0BFFC9249H
|
|
|
|
B4 DW 0C592H
|
|
DD 3897CDECH
|
|
DD 3FFBE38EH
|
|
|
|
B5 DW 5DDDH
|
|
DD 0C17BC162H
|
|
DD 0BFFBBA2DH
|
|
|
|
B6 DW 4854H
|
|
DD 77C7C78EH
|
|
DD 3FFB9C80H
|
|
|
|
|
|
atan_k_by_16 dd 000000000H, 000000000H, 000000000H, 000000000H
|
|
dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H
|
|
dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H
|
|
dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H
|
|
dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H
|
|
dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H
|
|
dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H
|
|
dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H
|
|
dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H
|
|
dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H
|
|
dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H
|
|
dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H
|
|
dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H
|
|
dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H
|
|
dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H
|
|
dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H
|
|
dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H
|
|
|
|
DATA32 ENDS
|
|
|
|
BSS32 SEGMENT DWORD PUBLIC USE32 'BSS'
|
|
BSS32 ENDS
|
|
|
|
|
|
EXTRN __fdiv_fpr:NEAR
|
|
|
|
DGROUP GROUP CONST,CONST2,DATA32,BSS32
|
|
|
|
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
|
|
ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing
|
|
public __fpatan_chk
|
|
|
|
defpe __fpatan_chk
|
|
push eax
|
|
push ecx
|
|
push edx
|
|
sub esp, STACK_SIZE
|
|
fstp tbyte ptr [esp+X] ; save X
|
|
fstp tbyte ptr [esp+Y] ; save Y
|
|
|
|
mov ecx, [esp+Y+4]
|
|
add ecx, ecx
|
|
jnc hw_fpatan ; unnormals (explicit 1 missing)
|
|
mov eax, [esp+X+4]
|
|
add eax, eax
|
|
jnc hw_fpatan ; unnormals (explicit 1 missing)
|
|
mov ecx, [esp+Y+8] ; save high part of Y
|
|
mov eax, [esp+X+8] ; save high part of Y
|
|
and ecx, 7fffh ; Ey = exponent Y
|
|
jz hw_fpatan ; Ey = 0
|
|
and eax, 7fffh ; Ex = exponent X
|
|
jz hw_fpatan ; Ex = 0
|
|
cmp ecx, 7fffh ; check if Ey = 0x7fffh
|
|
je hw_fpatan
|
|
cmp eax, 7fffh ; check if Ex = 0x7fffh
|
|
je hw_fpatan
|
|
|
|
fld tbyte ptr [esp+X] ; reload X
|
|
fabs ; |X| = u
|
|
fld tbyte ptr [esp+Y] ; reload Y
|
|
fabs ; |Y| = v
|
|
|
|
; The following five lines turn off exceptions and set the
|
|
; precision control to 80 bits. The former is necessary to
|
|
; force any traps to be taken at the divide instead of the scaling
|
|
; code. The latter is necessary in order to get full precision for
|
|
; codes with incoming 32 and 64 bit precision settings. If
|
|
; it can be guaranteed that before reaching this point, the underflow
|
|
; exception is masked and the precision control is at 80 bits, these
|
|
; five lines can be omitted.
|
|
;
|
|
fnstcw [PREV_CW+esp] ; save caller's control word
|
|
mov edx, [PREV_CW+esp]
|
|
or edx, 033fh ; mask exceptions, pc=80
|
|
and edx, 0f3ffh
|
|
mov [PATCH_CW+esp], edx
|
|
fldcw [PATCH_CW+esp] ; mask exceptions & pc=80
|
|
|
|
|
|
xor edx, edx ; initialize sflag = 0
|
|
fcom ; |Y| > |x|
|
|
push eax
|
|
fstsw ax
|
|
sahf
|
|
pop eax
|
|
jb order_X_Y_ok
|
|
fxch
|
|
inc edx ; sflag = 1
|
|
order_X_Y_ok:
|
|
push eax
|
|
mov eax, 0fh
|
|
call __fdiv_fpr ; v/u = z
|
|
pop eax
|
|
fld dword ptr flt_sixteen ; 16.0
|
|
fmul st, st(1) ; z*16.0
|
|
; Top of stack looks like k, z
|
|
fistp dword ptr [SPILL+esp] ; store k as int
|
|
mov ecx, [SPILL+esp]
|
|
shl ecx, 4
|
|
fild dword ptr[SPILL+esp]
|
|
fmul dword ptr one_by_sixteen; 1.0/16.0
|
|
; Top of stack looks like g, z
|
|
fld st(1) ; duplicate g
|
|
fsub st, st(1) ; z-g = r
|
|
fxch
|
|
; Top of stack looks like g, r, z
|
|
fmulp st(2), st ; g*z
|
|
; Top of stack looks like r, g*z
|
|
fld qword ptr pos_1 ; load 1.0
|
|
faddp st(2), st ; 1+g*z
|
|
; Top of stack looks like r, 1+g*z
|
|
push eax
|
|
mov eax, 0fh
|
|
call __fdiv_fpr ; v/u = z
|
|
pop eax
|
|
fld st(0) ; duplicate s
|
|
fmul st,st(1) ; t = s*s
|
|
; Top of stack looks like t, s
|
|
|
|
fld st(0)
|
|
fmul st, st(1)
|
|
; Top of stack looks like t2, t, s
|
|
fld st(0)
|
|
fmul st, st(1)
|
|
fld tbyte ptr B6
|
|
fld tbyte ptr B5
|
|
; Top of stack looks like B5, B6, t4, t2, t, s
|
|
fxch
|
|
fmul st, st(2)
|
|
fld tbyte ptr B4
|
|
fxch st(2)
|
|
fmul st, st(3)
|
|
; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s
|
|
fld tbyte ptr B3
|
|
fxch st(2)
|
|
fmul st, st(5)
|
|
; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s
|
|
fxch st(3)
|
|
fmulp st(4), st
|
|
fld tbyte ptr B2
|
|
; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s
|
|
fxch st(3)
|
|
faddp st(4), st
|
|
mov eax, [esp+X+8]
|
|
fld tbyte ptr B1
|
|
fxch
|
|
shl eax, 16
|
|
; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s
|
|
fmul st, st(6)
|
|
fxch st(2)
|
|
add eax, eax
|
|
fmul st, st(5)
|
|
; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s
|
|
fxch st(3)
|
|
adc edx, edx ; |sflag|Sx|
|
|
fmulp st(5), st
|
|
fxch st(2)
|
|
mov eax, [Y+8+esp] ; save high part of Y
|
|
fmul st, st(5)
|
|
; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s
|
|
fxch st(2)
|
|
shl eax, 16
|
|
fmulp st(5), st
|
|
; Top of stack looks like B5t5, B3t3, even, B2t2, B1t, s
|
|
fxch st(2)
|
|
faddp st(3), st
|
|
add eax, eax
|
|
faddp st(1), st
|
|
adc edx, edx ; |sflag|Sx|Sy|
|
|
; Top of stack looks like odd, even, B1t, s
|
|
faddp st(2), st
|
|
faddp st(1), st
|
|
fmul st,st(1) ; s*(odd+even)
|
|
faddp st(1), st ; poly
|
|
|
|
fld tbyte ptr atan_k_by_16[ecx] ; arctan[k;16]
|
|
faddp st(1), st ; w = poly + arctan(g)
|
|
|
|
jmp dword ptr dispatch_table[edx*4]
|
|
|
|
label0:
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label1:
|
|
fchs
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label2:
|
|
fld tbyte ptr pi
|
|
fsubrp st(1), st ; pi - w
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label3:
|
|
fld tbyte ptr pi
|
|
fsubrp st(1), st ; pi - w
|
|
fchs ; - (pi - w)
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label4:
|
|
fld tbyte ptr pi_by_2
|
|
fsubrp st(1), st ; pi/2 - w
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label5:
|
|
fld tbyte ptr pi_by_2
|
|
fsubrp st(1), st ; pi/2 - w
|
|
fchs ; - (pi/2 - w)
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label6:
|
|
fld tbyte ptr pi_by_2
|
|
faddp st(1), st ; pi/2 + w
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
label7:
|
|
fld tbyte ptr pi_by_2
|
|
faddp st(1), st ; pi/2 + w
|
|
fchs ; -(pi/2+w)
|
|
fldcw [esp+PREV_CW]
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
|
|
|
|
hw_fpatan:
|
|
fld tbyte ptr [esp+Y] ; reload Y
|
|
fld tbyte ptr [esp+X] ; reload X
|
|
fpatan
|
|
add esp, STACK_SIZE
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
ret
|
|
__fpatan_chk ENDP
|
|
|
|
_TEXT ENDS
|
|
END
|