;***************************************************************************** ;* ;* Open Watcom Project ;* ;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. ;* ;* ======================================================================== ;* ;* This file contains Original Code and/or Modifications of Original ;* Code as defined in and that are subject to the Sybase Open Watcom ;* Public License version 1.0 (the 'License'). You may not use this file ;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO ;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is ;* provided with the Original Code and Modifications, and is also ;* available at www.sybase.com/developer/opensource. ;* ;* The Original Code and all software distributed under the License are ;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER ;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM ;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF ;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR ;* NON-INFRINGEMENT. Please see the License for the specific language ;* governing rights and limitations under the License. ;* ;* ======================================================================== ;* ;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE ;* DESCRIBE IT HERE! ;* ;***************************************************************************** ; static char sccs_id[] = "@(#)fprem32.asm 1.5 12/22/94 12:48:07"; ; ; This code is being published by Intel to users of the Pentium(tm) ; processor. Recipients are authorized to copy, modify, compile, use and ; distribute the code. ; ; Intel makes no warranty of any kind with regard to this code, including ; but not limited to, implied warranties or merchantability and fitness for ; a particular purpose. Intel assumes no responsibility for any errors that ; may appear in this code. ; ; No patent licenses are granted, express or implied. ; ; include mdef.inc .386 .387 ; ; PRELIMINARY VERSION of the software patch for the floating ; point remainder. ; CHECKSW MACRO ifdef DEBUG fnstsw [fpsw] fnstcw [fpcw] endif ENDM _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' ; ; Stack variables for remainder routines. ; FLT_SIZE EQU 12 DENOM EQU 0 DENOM_SAVE EQU DENOM + FLT_SIZE NUMER EQU DENOM_SAVE + FLT_SIZE PREV_CW EQU NUMER + FLT_SIZE PATCH_CW EQU PREV_CW + 4 FPREM_SW EQU PATCH_CW + 4 STACK_SIZE EQU FPREM_SW + 4 RET_SIZE EQU 4 PUSH_SIZE EQU 4 MAIN_FUDGE EQU RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE MAIN_DENOM EQU DENOM + MAIN_FUDGE MAIN_DENOM_SAVE EQU DENOM_SAVE + MAIN_FUDGE MAIN_NUMER EQU NUMER + MAIN_FUDGE MAIN_PREV_CW EQU PREV_CW + MAIN_FUDGE MAIN_PATCH_CW EQU PATCH_CW + MAIN_FUDGE MAIN_FPREM_SW EQU FPREM_SW + MAIN_FUDGE ONESMASK EQU 700h fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0 fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh ifdef DEBUG public fpcw public fpsw fpcw dw 0 fpsw dw 0 endif FPU_STATE STRUC CONTROL_WORD DW ? reserved_1 DW ? STATUS_WORD DD ? TAG_WORD DW ? reserved_3 DW ? IP_OFFSET DD ? CS_SLCT DW ? OPCODE DW ? DATA_OFFSET DD ? OPERAND_SLCT DW ? reserved_4 DW ? FPU_STATE ENDS ENV_SIZE EQU 28 _DATA ENDS _TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' _TEXT ENDS DGROUP GROUP _DATA _TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing fprem_common PROC NEAR push eax push ebx push ecx mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa xor eax, ONESMASK ; invert bits that have to be one test eax, ONESMASK ; check bits that have to be one jnz remainder_hardware_ok shr eax, 11 and eax, 0fh cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) jz remainder_hardware_ok ; The denominator has the bit pattern. Weed out the funny cases like NaNs ; before applying the software version. Our caller guarantees that the ; denominator is not a denormal. Here we check for: ; denominator inf, NaN, unnormal ; numerator inf, NaN, unnormal, denormal mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa and eax, 07fff0000h ; mask the exponent only cmp eax, 07fff0000h ; check for INF or NaN je remainder_hardware_ok mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa and eax, 07fff0000h ; mask the exponent only jz remainder_hardware_ok ; jif numerator denormal cmp eax, 07fff0000h ; check for INF or NaN je remainder_hardware_ok mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator add eax, eax ; set carry if explicit bit set jnz remainder_hardware_ok ; jmp if numerator is unnormal mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator add eax, eax ; set carry if explicit bit set jnz remainder_hardware_ok ; jmp if denominator is unnormal rem_patch: mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy add eax, 63 ; evaluate ey + 63 mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference (ex - ey) ja rem_large ; if ex > ey + 63, case of large arguments rem_patch_loop: mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy add eax, 10 ; evaluate ey + 10 mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference (ex - ey) js remainder_hardware_ok ; safe if ey + 10 > ex fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx mov ecx, ebx sub ebx, eax and ebx, 07h or ebx, 04h sub ecx, ebx mov ebx, eax and ebx, 08000h ; keep sy or ecx, ebx ; merge the sign of y mov dword ptr [MAIN_DENOM+8+esp], ecx fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator fxch fprem ; this rem is safe fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator fstp st(0) ; pop the stack jmp rem_patch_loop rem_large: test edx, 02h ; is denominator already saved jnz already_saved fld tbyte ptr[esp + MAIN_DENOM] fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator already_saved: ; Save user's precision control and institute 80. The fp ops in ; rem_large_loop must not round to user's precision (if it is less ; than 80) because the hardware would not have done so. We are ; aping the hardware here, which is all extended. fnstcw [esp+MAIN_PREV_CW] ; save caller's control word mov eax, dword ptr[esp + MAIN_PREV_CW] or eax, 033fh ; mask exceptions, pc=80 mov [esp + MAIN_PATCH_CW], eax fldcw [esp + MAIN_PATCH_CW] mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference and ebx, 03fh or ebx, 020h add ebx, 1 mov ecx, ebx mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx and eax, 08000h ; keep sy or ebx, eax ; merge the sign of y mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator fabs fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator fabs rem_large_loop: fcom fstsw ax and eax, 00100h jnz rem_no_sub fsub st, st(1) rem_no_sub: fxch fmul qword ptr half fxch sub ecx, 1 ; decrement the loop counter jnz rem_large_loop mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) fstp tbyte ptr[esp + MAIN_NUMER] ; save result fstp st ; toss modified denom fld tbyte ptr[esp + MAIN_DENOM_SAVE] fld tbyte ptr[big_number] ; force C2 to be set fprem fstp st fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result fldcw [esp + MAIN_PREV_CW] ; restore caller's control word and ebx, 08000h ; keep sx jz rem_done fchs jmp rem_done remainder_hardware_ok: fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator fprem ; and finally do a remainder ; prem_main_routine end rem_done: test edx, 03h jz rem_exit fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 test edx, 01h jz do_not_de_scale ; De-scale the result. Go to pc=80 to prevent from fmul ; from user precision (fprem does not round the result). fnstcw [esp + MAIN_PREV_CW] ; save callers control word mov eax, [esp + MAIN_PREV_CW] or eax, 0300h ; pc = 80 mov [esp + MAIN_PATCH_CW], eax fldcw [esp + MAIN_PATCH_CW] fmul qword ptr one_shr_64 fldcw [esp + MAIN_PREV_CW] ; restore callers CW do_not_de_scale: mov eax, [esp + MAIN_FPREM_SW] fxch fstp st fld tbyte ptr[esp + MAIN_DENOM_SAVE] fxch and eax, 04300h ; restore saved Q0, Q1, Q2 sub esp, ENV_SIZE fnstenv [esp] and [esp].STATUS_WORD, 0bcffh or [esp].STATUS_WORD, eax fldenv [esp] add esp, ENV_SIZE rem_exit: pop ecx pop ebx pop eax CHECKSW ; debug only: save status ret fprem_common ENDP comment ~**************************************************************** ; ; float frem_chk (float numer, float denom) ; public frem_chk frem_chk PROC NEAR push edx sub esp, STACK_SIZE fld dword ptr [STACK_SIZE+8+esp] fstp tbyte ptr [NUMER+esp] fld dword ptr [STACK_SIZE+12+esp] fstp tbyte ptr [DENOM+esp] mov edx, 0 ; dx = 1 if denormal extended divisor call fprem_common fxch fstp st add esp, STACK_SIZE pop edx ret frem_chk ENDP ; end frem_chk ; ; double drem_chk (double numer, double denom) ; public drem_chk drem_chk PROC NEAR push edx sub esp, STACK_SIZE fld qword ptr [STACK_SIZE+8+esp] fstp tbyte ptr [NUMER+esp] fld qword ptr [STACK_SIZE+16+esp] fstp tbyte ptr [DENOM+esp] mov edx, 0 ; dx = 1 if denormal extended divisor call fprem_common fxch fstp st add esp, STACK_SIZE pop edx ret drem_chk ENDP ; end drem_chk ; ; long double lrem_chk(long double number,long double denom) ; public lrem_chk lrem_chk PROC NEAR fld tbyte ptr [20+esp] fld tbyte ptr [4+esp] call fprem_chk fxch fstp st ret lrem_chk ENDP **********************************************************************~ ; ; FPREM: ST = remainder(ST, ST(1)) ; ; Compiler version of the FPREM must preserve the arguments in the floating ; point stack. public __fprem_chk defpe __fprem_chk push edx sub esp, STACK_SIZE fstp tbyte ptr [NUMER+esp] fstp tbyte ptr [DENOM+esp] xor edx, edx ; prem_main_routine begin mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa test eax,07fff0000h ; check for denormal jz denormal call fprem_common add esp, STACK_SIZE pop edx ret denormal: fld tbyte ptr [DENOM+esp] ; load the denominator fld tbyte ptr [NUMER+esp] ; load the numerator mov eax, [DENOM+esp] ; test for whole mantissa == 0 or eax, [DENOM+4+esp] ; test for whole mantissa == 0 jz remainder_hardware_ok_l ; denominator is zero fxch fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator fld tbyte ptr[esp + DENOM] fxch or edx, 02h ; ; For this we need pc=80. Also, mask exceptions so we don't take any ; denormal operand exceptions. It is guaranteed that the descaling ; later on will take underflow, which is what the hardware would have done ; on a normal fprem. ; fnstcw [PREV_CW+esp] ; save caller's control word mov eax, [PREV_CW+esp] or eax, 0033fh ; mask exceptions, pc=80 mov [PATCH_CW+esp], eax fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 ; The denominator is a denormal. For most numerators, scale both numerator ; and denominator to get rid of denormals. Then execute the common code ; with the flag set to indicate that the result must be de-scaled. ; For large numerators this won't work because the scaling would cause ; overflow. In this case we know the numerator is large, the denominator ; is small (denormal), so the exponent difference is also large. This means ; the rem_large code will be used and this code depends on the difference ; in exponents modulo 64. Adding 64 to the denominators exponent ; doesn't change the modulo 64 difference. So we can scale the denominator ; by 64, making it not denormal, and this won't effect the result. ; ; To start with, figure out if numerator is large mov eax, [esp + NUMER + 8] ; load numerator exponent and eax, 7fffh ; isolate numerator exponent cmp eax, 7fbeh ; compare Nexp to Maxexp-64 ja big_numer_rem_de ; jif big numerator ; So the numerator is not large scale both numerator and denominator or edx, 1 ; edx = 1, if denormal extended divisor fmul qword ptr one_shl_64 ; make numerator not denormal fstp tbyte ptr[esp + NUMER] fmul qword ptr one_shl_64 ; make denominator not denormal fstp tbyte ptr[esp + DENOM] jmp scaling_done ; The numerator is large. Scale only the denominator, which will not ; change the result which we know will be partial. Set the scale flag ; to false. big_numer_rem_de: ; We must do this with pc=80 to avoid rounding to single/double. ; In this case we do not mask exceptions so that we will take ; denormal operand, as would the hardware. fnstcw [PREV_CW+esp] ; save caller's control word mov eax, [PREV_CW+esp] or eax, 00300h ; pc=80 mov [PATCH_CW+esp], eax fldcw [PATCH_CW+esp] ; pc=80 fstp st ; Toss numerator fmul qword ptr one_shl_64 ; make denominator not denormal fstp tbyte ptr[esp + DENOM] ; Restore the control word which was fiddled to scale at 80-bit precision. ; Then call the common code. scaling_done: fldcw [esp + PREV_CW] ; restore callers control word call fprem_common add esp, STACK_SIZE pop edx ret remainder_hardware_ok_l: fprem ; and finally do a remainder CHECKSW add esp, STACK_SIZE pop edx ret __fprem_chk ENDP ; end fprem_chk ; ; FPREM1 code begins here ; fprem1_common PROC NEAR push eax push ebx push ecx mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa xor eax, ONESMASK ; invert bits that have to be one test eax, ONESMASK ; check bits that have to be one jnz remainder1_hardware_ok shr eax, 11 and eax, 0fh cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) jz remainder1_hardware_ok ; The denominator has the bit pattern. Weed out the funny cases like NaNs ; before applying the software version. Our caller guarantees that the ; denominator is not a denormal. Here we check for: ; denominator inf, NaN, unnormal ; numerator inf, NaN, unnormal, denormal mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa and eax, 07fff0000h ; mask the exponent only cmp eax, 07fff0000h ; check for INF or NaN je remainder1_hardware_ok mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa and eax, 07fff0000h ; mask the exponent only jz remainder1_hardware_ok ; jif numerator denormal cmp eax, 07fff0000h ; check for INF or NaN je remainder1_hardware_ok mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator add eax, eax ; set carry if explicit bit set jnz remainder1_hardware_ok ; jmp if numerator is unnormal mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator add eax, eax ; set carry if explicit bit set jnz remainder1_hardware_ok ; jmp if denominator is unnormal rem1_patch: mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy add eax, 63 ; evaluate ey + 63 mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference (ex - ey) ja rem1_large ; if ex > ey + 63, case of large arguments rem1_patch_loop: mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy add eax, 10 ; evaluate ey + 10 mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference (ex - ey) js remainder1_hardware_ok ; safe if ey + 10 > ex fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx mov ecx, ebx sub ebx, eax and ebx, 07h or ebx, 04h sub ecx, ebx mov ebx, eax and ebx, 08000h ; keep sy or ecx, ebx ; merge the sign of y mov dword ptr [MAIN_DENOM+8+esp], ecx fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator fxch fprem ; this rem is safe fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator fstp st(0) ; pop the stack jmp rem1_patch_loop rem1_large: test ebx, 02h ; is denominator already saved jnz already_saved1 fld tbyte ptr[esp + MAIN_DENOM] fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator already_saved1: ; Save user's precision control and institute 80. The fp ops in ; rem1_large_loop must not round to user's precision (if it is less ; than 80) because the hardware would not have done so. We are ; aping the hardware here, which is all extended. fnstcw [esp+MAIN_PREV_CW] ; save caller's control word mov eax, dword ptr[esp + MAIN_PREV_CW] or eax, 033fh ; mask exceptions, pc=80 mov [esp + MAIN_PATCH_CW], eax fldcw [esp + MAIN_PATCH_CW] mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) and eax, 07fffh ; clear sy mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx sub ebx, eax ; evaluate the exponent difference and ebx, 03fh or ebx, 020h add ebx, 1 mov ecx, ebx mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) and ebx, 07fffh ; clear sx and eax, 08000h ; keep sy or ebx, eax ; merge the sign of y mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator fabs fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator fabs rem1_large_loop: fcom fstsw ax and eax, 00100h jnz rem1_no_sub fsub st, st(1) rem1_no_sub: fxch fmul qword ptr half fxch sub ecx, 1 ; decrement the loop counter jnz rem1_large_loop mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) fstp tbyte ptr[esp + MAIN_NUMER] ; save result fstp st ; toss modified denom fld tbyte ptr[esp + MAIN_DENOM_SAVE] fld tbyte ptr[big_number] ; force C2 to be set fprem1 fstp st fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result fldcw [esp + MAIN_PREV_CW] ; restore caller's control word and ebx, 08000h ; keep sx jz rem1_done fchs jmp rem1_done remainder1_hardware_ok: fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator fprem1 ; and finally do a remainder ; prem1_main_routine end rem1_done: test edx, 03h jz rem1_exit fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 test edx, 01h jz do_not_de_scale1 ; De-scale the result. Go to pc=80 to prevent from fmul ; from user precision (fprem does not round the result). fnstcw [esp + MAIN_PREV_CW] ; save callers control word mov eax, [esp + MAIN_PREV_CW] or eax, 0300h ; pc = 80 mov [esp + MAIN_PATCH_CW], eax fldcw [esp + MAIN_PATCH_CW] fmul qword ptr one_shr_64 fldcw [esp + MAIN_PREV_CW] ; restore callers CW do_not_de_scale1: mov eax, [esp + MAIN_FPREM_SW] fxch fstp st fld tbyte ptr[esp + MAIN_DENOM_SAVE] fxch and eax, 04300h ; restore saved Q0, Q1, Q2 sub esp, ENV_SIZE fnstenv [esp] and [esp].STATUS_WORD, 0bcffh or [esp].STATUS_WORD, eax fldenv [esp] add esp, ENV_SIZE rem1_exit: pop ecx pop ebx pop eax CHECKSW ; debug only: save status ret fprem1_common ENDP comment ~*************************************************************** ; ; float frem1_chk (float numer, float denom) ; public frem1_chk frem1_chk PROC NEAR push edx sub esp, STACK_SIZE fld dword ptr [STACK_SIZE+8+esp] fstp tbyte ptr [NUMER+esp] fld dword ptr [STACK_SIZE+12+esp] fstp tbyte ptr [DENOM+esp] mov edx, 0 ; dx = 1 if denormal extended divisor call fprem1_common fxch fstp st add esp, STACK_SIZE pop edx ret frem1_chk ENDP ; end frem1_chk ; ; double drem1_chk (double numer, double denom) ; public drem1_chk drem1_chk PROC NEAR push edx sub esp, STACK_SIZE fld qword ptr [STACK_SIZE+8+esp] fstp tbyte ptr [NUMER+esp] fld qword ptr [STACK_SIZE+16+esp] fstp tbyte ptr [DENOM+esp] mov edx, 0 ; dx = 1 if denormal extended divisor call fprem1_common fxch fstp st add esp, STACK_SIZE pop edx ret drem1_chk ENDP ; end drem1_chk ; ; long double lrem1_chk(long double number,long double denom) ; public lrem1_chk lrem1_chk PROC NEAR fld tbyte ptr [20+esp] fld tbyte ptr [4+esp] call fprem1_chk fxch fstp st ret lrem1_chk ENDP ********************************************************************~ ; ; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding ; ; Compiler version of the FPREM must preserve the arguments in the floating ; point stack. public __fprem1_chk defpe __fprem1_chk push edx sub esp, STACK_SIZE fstp tbyte ptr [NUMER+esp] fstp tbyte ptr [DENOM+esp] mov edx, 0 ; prem1_main_routine begin mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa test eax,07fff0000h ; check for denormal jz denormal1 call fprem1_common add esp, STACK_SIZE pop edx ret denormal1: fld tbyte ptr [DENOM+esp] ; load the denominator fld tbyte ptr [NUMER+esp] ; load the numerator mov eax, [DENOM+esp] ; test for whole mantissa == 0 or eax, [DENOM+4+esp] ; test for whole mantissa == 0 jz remainder1_hardware_ok_l ; denominator is zero fxch fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator fld tbyte ptr[esp + DENOM] fxch or edx, 02h ; ; For this we need pc=80. Also, mask exceptions so we don't take any ; denormal operand exceptions. It is guaranteed that the descaling ; later on will take underflow, which is what the hardware would have done ; on a normal fprem. ; fnstcw [PREV_CW+esp] ; save caller's control word mov eax, [PREV_CW+esp] or eax, 0033fh ; mask exceptions, pc=80 mov [PATCH_CW+esp], eax fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 ; The denominator is a denormal. For most numerators, scale both numerator ; and denominator to get rid of denormals. Then execute the common code ; with the flag set to indicate that the result must be de-scaled. ; For large numerators this won't work because the scaling would cause ; overflow. In this case we know the numerator is large, the denominator ; is small (denormal), so the exponent difference is also large. This means ; the rem1_large code will be used and this code depends on the difference ; in exponents modulo 64. Adding 64 to the denominators exponent ; doesn't change the modulo 64 difference. So we can scale the denominator ; by 64, making it not denormal, and this won't effect the result. ; ; To start with, figure out if numerator is large mov eax, [esp + NUMER + 8] ; load numerator exponent and eax, 7fffh ; isolate numerator exponent cmp eax, 7fbeh ; compare Nexp to Maxexp-64 ja big_numer_rem1_de ; jif big numerator ; So the numerator is not large scale both numerator and denominator or edx, 1 ; edx = 1, if denormal extended divisor fmul qword ptr one_shl_64 ; make numerator not denormal fstp tbyte ptr[esp + NUMER] fmul qword ptr one_shl_64 ; make denominator not denormal fstp tbyte ptr[esp + DENOM] jmp scaling_done1 ; The numerator is large. Scale only the denominator, which will not ; change the result which we know will be partial. Set the scale flag ; to false. big_numer_rem1_de: ; We must do this with pc=80 to avoid rounding to single/double. ; In this case we do not mask exceptions so that we will take ; denormal operand, as would the hardware. fnstcw [PREV_CW+esp] ; save caller's control word mov eax, [PREV_CW+esp] or eax, 00300h ; pc=80 mov [PATCH_CW+esp], eax fldcw [PATCH_CW+esp] ; pc=80 fstp st ; Toss numerator fmul qword ptr one_shl_64 ; make denominator not denormal fstp tbyte ptr[esp + DENOM] ; Restore the control word which was fiddled to scale at 80-bit precision. ; Then call the common code. scaling_done1: fldcw [esp + PREV_CW] ; restore callers control word call fprem1_common add esp, STACK_SIZE pop edx ret remainder1_hardware_ok_l: fprem ; and finally do a remainder CHECKSW add esp, STACK_SIZE pop edx ret __fprem1_chk ENDP ; end fprem1_chk ifdef DEBUG public fpinit fpinit PROC NEAR fninit ret fpinit ENDP endif _TEXT ENDS END