Clib char & math functions

git-svn-id: svn://kolibrios.org@554 a494cfbc-eb01-0410-851d-a64ba20cac60
2007-06-26 11:19:49 +00:00
parent 836c97f0ac
commit 43bd1e645f
64 changed files with 3847 additions and 1 deletions
--- a/watcom/trunk/clib/fpu/chipa32.asm
+++ b/watcom/trunk/clib/fpu/chipa32.asm
@@ -0,0 +1,397 @@
+;*****************************************************************************
+;*
+;*                            Open Watcom Project
+;*
+;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+;*
+;*  ========================================================================
+;*
+;*    This file contains Original Code and/or Modifications of Original
+;*    Code as defined in and that are subject to the Sybase Open Watcom
+;*    Public License version 1.0 (the 'License'). You may not use this file
+;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+;*    provided with the Original Code and Modifications, and is also
+;*    available at www.sybase.com/developer/opensource.
+;*
+;*    The Original Code and all software distributed under the License are
+;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+;*    NON-INFRINGEMENT. Please see the License for the specific language
+;*    governing rights and limitations under the License.
+;*
+;*  ========================================================================
+;*
+;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+;*               DESCRIBE IT HERE!
+;*
+;*****************************************************************************
+
+
+; static char sccs_id[] = "@(#)fpatan32.asm     1.7  12/21/94  08:33:45";
+;
+; This code is being published by Intel to users of the Pentium(tm)
+; processor.  Recipients are authorized to copy, modify, compile, use and
+; distribute the code.
+;
+; Intel makes no warranty of any kind with regard to this code, including
+; but not limited to, implied warranties or merchantability and fitness for
+; a particular purpose. Intel assumes no responsibility for any errors that
+; may appear in this code.
+;
+; No patent licenses are granted, express or implied.
+;
+;
+include mdef.inc
+
+        .386
+        .387
+
+
+_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
+_TEXT  ENDS
+
+CONST   SEGMENT DWORD PUBLIC USE32 'DATA'
+CONST   ENDS
+
+CONST2  SEGMENT DWORD PUBLIC USE32 'DATA'
+CONST2  ENDS
+
+DATA32   SEGMENT DWORD PUBLIC USE32 'DATA'
+
+
+Y               EQU     0
+X               EQU     12
+PREV_CW         EQU     24
+PATCH_CW        EQU     28
+SPILL           EQU     32
+STACK_SIZE      EQU     36
+
+
+pos_1   DD   00000000H
+        DD   3ff00000H
+
+neg_1   DD   00000000H
+        DD   0bff00000H
+
+
+dispatch_table  DD      offset label0
+                DD      offset label1
+                DD      offset label2
+                DD      offset label3
+                DD      offset label4
+                DD      offset label5
+                DD      offset label6
+                DD      offset label7
+;end dispatch table
+
+pi      DB      35H
+        DB      0c2H
+        DD      0daa22168H
+        DD      4000c90fH
+
+pi_by_2 DB      35H
+        DB      0c2H
+        DD      0daa22168H
+        DD      3fffc90fH
+
+flt_sixteen DD  41800000H
+
+one_by_sixteen  DD 3d800000H
+
+
+B1      DW      0AAA8H
+        DD      0AAAAAAAAH
+        DD      0BFFDAAAAH
+
+B2      DW      2D6EH
+        DD      0CCCCCCCCH
+        DD      3FFCCCCCH
+
+B3      DW      4892H
+        DD      249241F9H
+        DD      0BFFC9249H
+
+B4      DW      0C592H
+        DD      3897CDECH
+        DD      3FFBE38EH
+
+B5      DW      5DDDH
+        DD      0C17BC162H
+        DD      0BFFBBA2DH
+
+B6      DW      4854H
+        DD      77C7C78EH
+        DD      3FFB9C80H
+
+
+atan_k_by_16    dd 000000000H, 000000000H, 000000000H, 000000000H
+                dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H
+                dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H
+                dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H
+                dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H
+                dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H
+                dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H
+                dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H
+                dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H
+                dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H
+                dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H
+                dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H
+                dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H
+                dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H
+                dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H
+                dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H
+                dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H
+
+DATA32  ENDS
+
+BSS32   SEGMENT DWORD PUBLIC USE32 'BSS'
+BSS32   ENDS
+
+
+EXTRN   __fdiv_fpr:NEAR
+
+DGROUP  GROUP CONST,CONST2,DATA32,BSS32
+
+
+_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
+        ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing
+        public __fpatan_chk
+
+        defpe   __fpatan_chk
+        push    eax
+        push    ecx
+        push    edx
+        sub     esp, STACK_SIZE
+        fstp    tbyte ptr [esp+X]       ; save X
+        fstp    tbyte ptr [esp+Y]       ; save Y
+
+        mov     ecx, [esp+Y+4]
+        add     ecx, ecx
+        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
+        mov     eax, [esp+X+4]
+        add     eax, eax
+        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
+        mov     ecx, [esp+Y+8]          ; save high part of Y
+        mov     eax, [esp+X+8]          ; save high part of Y
+        and     ecx, 7fffh              ; Ey = exponent Y
+        jz      hw_fpatan               ; Ey = 0
+        and     eax, 7fffh              ; Ex = exponent X
+        jz      hw_fpatan               ; Ex = 0
+        cmp     ecx, 7fffh              ; check if Ey = 0x7fffh
+        je      hw_fpatan
+        cmp     eax, 7fffh              ; check if Ex = 0x7fffh
+        je      hw_fpatan
+
+        fld     tbyte ptr [esp+X]       ; reload X
+        fabs                            ; |X| = u
+        fld     tbyte ptr [esp+Y]       ; reload Y
+        fabs                            ; |Y| = v
+
+;  The following five lines turn off exceptions and set the
+;  precision control to 80 bits.  The former is necessary to
+;  force any traps to be taken at the divide instead of the scaling
+;  code.  The latter is necessary in order to get full precision for
+;  codes with incoming 32 and 64 bit precision settings.  If
+;  it can be guaranteed that before reaching this point, the underflow
+;  exception is masked and the precision control is at 80 bits, these
+;  five lines can be omitted.
+;
+        fnstcw  [PREV_CW+esp]           ; save caller's control word
+        mov     edx, [PREV_CW+esp]
+        or      edx, 033fh              ; mask exceptions, pc=80
+        and     edx, 0f3ffh
+        mov     [PATCH_CW+esp], edx
+        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
+
+
+        xor     edx, edx                ; initialize sflag = 0
+        fcom                            ; |Y| > |x|
+        push    eax
+        fstsw  ax
+        sahf
+        pop     eax
+        jb      order_X_Y_ok
+        fxch
+        inc     edx                     ; sflag = 1
+order_X_Y_ok:
+        push    eax
+        mov     eax, 0fh
+        call    __fdiv_fpr                  ; v/u = z
+        pop     eax
+        fld     dword ptr flt_sixteen   ; 16.0
+        fmul    st, st(1)               ; z*16.0
+; Top of stack looks like k, z
+        fistp   dword ptr [SPILL+esp]   ; store k as int
+        mov     ecx, [SPILL+esp]
+        shl     ecx, 4
+        fild    dword ptr[SPILL+esp]
+        fmul    dword ptr one_by_sixteen; 1.0/16.0
+; Top of stack looks like g, z
+        fld     st(1)                   ; duplicate g
+        fsub    st, st(1)               ; z-g = r
+        fxch
+; Top of stack looks like g, r, z
+        fmulp   st(2), st               ; g*z
+; Top of stack looks like r, g*z
+        fld     qword ptr pos_1         ; load 1.0
+        faddp   st(2), st               ; 1+g*z
+; Top of stack looks like r, 1+g*z
+        push    eax
+        mov     eax, 0fh
+        call    __fdiv_fpr                  ; v/u = z
+        pop     eax
+        fld     st(0)                   ; duplicate s
+        fmul    st,st(1)                ; t = s*s
+; Top of stack looks like t, s
+
+        fld     st(0)
+        fmul    st, st(1)
+; Top of stack looks like t2, t, s
+        fld     st(0)
+        fmul    st, st(1)
+        fld     tbyte ptr B6
+        fld     tbyte ptr B5
+; Top of stack looks like B5, B6, t4, t2, t, s
+        fxch
+        fmul    st, st(2)
+        fld     tbyte ptr B4
+        fxch    st(2)
+        fmul    st, st(3)
+; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s
+        fld     tbyte ptr B3
+        fxch    st(2)
+        fmul    st, st(5)
+; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s
+        fxch    st(3)
+        fmulp   st(4), st
+        fld     tbyte ptr B2
+; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s
+        fxch    st(3)
+        faddp   st(4), st
+        mov     eax, [esp+X+8]
+        fld     tbyte ptr B1
+        fxch
+        shl     eax, 16
+; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s
+        fmul    st, st(6)
+        fxch    st(2)
+        add     eax, eax
+        fmul    st, st(5)
+; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s
+        fxch    st(3)
+        adc     edx, edx                ; |sflag|Sx|
+        fmulp   st(5), st
+        fxch    st(2)
+        mov     eax, [Y+8+esp]          ; save high part of Y
+        fmul    st, st(5)
+; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s
+        fxch    st(2)
+        shl     eax, 16
+        fmulp   st(5), st
+; Top of stack looks like  B5t5, B3t3, even, B2t2, B1t, s
+        fxch    st(2)
+        faddp   st(3), st
+        add     eax, eax
+        faddp   st(1), st
+        adc     edx, edx                ; |sflag|Sx|Sy|
+; Top of stack looks like  odd, even, B1t, s
+        faddp   st(2), st
+        faddp   st(1), st
+        fmul    st,st(1)                ; s*(odd+even)
+        faddp   st(1), st               ; poly
+
+        fld     tbyte ptr atan_k_by_16[ecx]     ; arctan[k;16]
+        faddp   st(1), st               ; w = poly + arctan(g)
+
+        jmp     dword ptr dispatch_table[edx*4]
+
+label0:
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label1:
+        fchs
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label2:
+        fld     tbyte ptr pi
+        fsubrp  st(1), st               ; pi - w
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label3:
+        fld     tbyte ptr pi
+        fsubrp  st(1), st               ; pi - w
+        fchs                            ; - (pi - w)
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label4:
+        fld     tbyte ptr pi_by_2
+        fsubrp  st(1), st               ; pi/2 - w
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label5:
+        fld     tbyte ptr pi_by_2
+        fsubrp  st(1), st               ; pi/2 - w
+        fchs                            ; - (pi/2 - w)
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label6:
+        fld     tbyte ptr pi_by_2
+        faddp   st(1), st               ; pi/2 + w
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+label7:
+        fld     tbyte ptr pi_by_2
+        faddp   st(1), st               ; pi/2 + w
+        fchs                            ; -(pi/2+w)
+        fldcw   [esp+PREV_CW]
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+
+
+hw_fpatan:
+        fld     tbyte ptr [esp+Y]       ; reload Y
+        fld     tbyte ptr [esp+X]       ; reload X
+        fpatan
+        add     esp, STACK_SIZE
+        pop     edx
+        pop     ecx
+        pop     eax
+        ret
+__fpatan_chk       ENDP
+
+_TEXT  ENDS
+        END
--- a/watcom/trunk/clib/fpu/chipd32.asm
+++ b/watcom/trunk/clib/fpu/chipd32.asm
@@ -0,0 +1,991 @@
+;*****************************************************************************
+;*
+;*                            Open Watcom Project
+;*
+;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+;*
+;*  ========================================================================
+;*
+;*    This file contains Original Code and/or Modifications of Original
+;*    Code as defined in and that are subject to the Sybase Open Watcom
+;*    Public License version 1.0 (the 'License'). You may not use this file
+;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+;*    provided with the Original Code and Modifications, and is also
+;*    available at www.sybase.com/developer/opensource.
+;*
+;*    The Original Code and all software distributed under the License are
+;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+;*    NON-INFRINGEMENT. Please see the License for the specific language
+;*    governing rights and limitations under the License.
+;*
+;*  ========================================================================
+;*
+;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+;*               DESCRIBE IT HERE!
+;*
+;*****************************************************************************
+
+
+; static char sccs_id[] = "@(#)patch32.asm      1.12  12/21/94  14:53:51";
+;
+; This code is being published by Intel to users of the Pentium(tm)
+; processor.  Recipients are authorized to copy, modify, compile, use and
+; distribute the code.
+;
+; Intel makes no warranty of any kind with regard to this code, including
+; but not limited to, implied warranties or merchantability and fitness for
+; a particular purpose. Intel assumes no responsibility for any errors that
+; may appear in this code.
+;
+; No patent licenses are granted, express or implied.
+;
+;
+include mdef.inc
+
+.386
+.387
+
+DENOM           EQU     0
+NUMER           EQU     12
+PREV_CW         EQU     28      ; 24 + 4 (return size)
+PATCH_CW        EQU     32      ; 28 + 4 (return size)
+
+DENOM_SAVE      EQU     32
+
+MAIN_DENOM      EQU     4
+MAIN_NUMER      EQU     16
+
+SPILL_SIZE      EQU     12
+MEM_OPERAND     EQU     8
+STACK_SIZE      EQU     44
+SPILL_MEM_OPERAND       EQU     20
+
+ONESMASK        EQU     0e000000h
+
+SINGLE_NAN      EQU     07f800000h
+DOUBLE_NAN      EQU     07ff00000h
+
+ILLEGAL_OPC     EQU     6
+
+f_stsw  macro   where
+        fstsw   where
+endm
+
+fdivr_st        MACRO   reg_index, reg_index_minus1
+        fstp    tbyte ptr [esp+DENOM]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fstp    tbyte ptr [esp+NUMER]
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     tbyte ptr [esp+NUMER]
+        fxch    st(reg_index)
+        add     esp, STACK_SIZE
+ENDM
+
+fdivr_sti       MACRO   reg_index, reg_index_minus1
+        fstp    tbyte ptr [esp+NUMER]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fstp    tbyte ptr [esp+DENOM]
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     tbyte ptr [esp+NUMER]
+        add     esp, STACK_SIZE
+ENDM
+
+fdivrp_sti      MACRO   reg_index, reg_index_minus1
+        fstp    tbyte ptr [esp+NUMER]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fstp    tbyte ptr [esp+DENOM]
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        add     esp, STACK_SIZE
+ENDM
+
+fdiv_st         MACRO   reg_index, reg_index_minus1
+        fstp    tbyte ptr [esp+NUMER]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     st
+        fstp    tbyte ptr [esp+DENOM]
+        fstp    tbyte ptr [esp+DENOM_SAVE]      ; save original denom,
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     tbyte ptr [esp+DENOM_SAVE]
+        fxch    st(reg_index)
+        add     esp, STACK_SIZE
+ENDM
+
+fdiv_sti        MACRO   reg_index, reg_index_minus1
+        fxch    st(reg_index)
+        fstp    tbyte ptr [esp+NUMER]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     st
+        fstp    tbyte ptr [esp+DENOM]
+        fstp    tbyte ptr [esp+DENOM_SAVE]      ; save original denom,
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fld     tbyte ptr [esp+DENOM_SAVE]
+        add     esp, STACK_SIZE
+ENDM
+
+fdivp_sti       MACRO   reg_index, reg_index_minus1
+        fstp    tbyte ptr [esp+DENOM]
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        fstp    tbyte ptr [esp+NUMER]
+        call    fdiv_main_routine
+IF      reg_index_minus1 GE 1
+        fxch    st(reg_index_minus1)
+ENDIF
+        add     esp, STACK_SIZE
+ENDM
+
+_TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
+_TEXT  ENDS
+
+DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
+DATA32  ENDS
+
+CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
+CONST32 ENDS
+
+BSS32   SEGMENT DWORD USE32 PUBLIC 'BSS'
+BSS32   ENDS
+
+DGROUP  GROUP CONST32, BSS32, DATA32
+
+
+DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
+
+fdiv_risc_table DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
+fdiv_scale_1    DD      03f700000h              ;0.9375
+fdiv_scale_2    DD      03f880000h              ;1.0625
+one_shl_63      DD      05f000000h
+
+
+dispatch_table DD       offset label0
+        DD      offset label1
+        DD      offset label2
+        DD      offset label3
+        DD      offset label4
+        DD      offset label5
+        DD      offset label6
+        DD      offset label7
+        DD      offset label8
+        DD      offset label9
+        DD      offset label10
+        DD      offset label11
+        DD      offset label12
+        DD      offset label13
+        DD      offset label14
+        DD      offset label15
+        DD      offset label16
+        DD      offset label17
+        DD      offset label18
+        DD      offset label19
+        DD      offset label20
+        DD      offset label21
+        DD      offset label22
+        DD      offset label23
+        DD      offset label24
+        DD      offset label25
+        DD      offset label26
+        DD      offset label27
+        DD      offset label28
+        DD      offset label29
+        DD      offset label30
+        DD      offset label31
+        DD      offset label32
+        DD      offset label33
+        DD      offset label34
+        DD      offset label35
+        DD      offset label36
+        DD      offset label37
+        DD      offset label38
+        DD      offset label39
+        DD      offset label40
+        DD      offset label41
+        DD      offset label42
+        DD      offset label43
+        DD      offset label44
+        DD      offset label45
+        DD      offset label46
+        DD      offset label47
+        DD      offset label48
+        DD      offset label49
+        DD      offset label50
+        DD      offset label51
+        DD      offset label52
+        DD      offset label53
+        DD      offset label54
+        DD      offset label55
+        DD      offset label56
+        DD      offset label57
+        DD      offset label58
+        DD      offset label59
+        DD      offset label60
+        DD      offset label61
+        DD      offset label62
+        DD      offset label63
+
+DATA32  ENDS
+
+
+_TEXT  SEGMENT   DWORD USE32 PUBLIC 'CODE'
+
+
+        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
+
+;
+;  PRELIMINARY VERSION for register-register divides.
+;
+
+
+                                        ; In this implementation the
+                                        ; fdiv_main_routine is called,
+                                        ; therefore all the stack frame
+                                        ; locations are adjusted for the
+                                        ; return pointer.
+
+fdiv_main_routine PROC  NEAR
+
+        fld     tbyte ptr [esp+MAIN_NUMER]      ; load the numerator
+        fld     tbyte ptr [esp+MAIN_DENOM]      ; load the denominator
+retry:
+
+;  The following three lines test for denormals and zeros.
+;  A denormal or zero has a 0 in the explicit digit to the left of the
+;  binary point.  Since that bit is the high bit of the word, adding
+;  it to itself will produce a carry if and only if the number is not
+;  denormal or zero.
+;
+        mov     eax, [esp+MAIN_DENOM+4] ; get mantissa bits 32-64
+        add     eax,eax                 ; shift the one's bit onto carry
+        jnc     denormal                ; if no carry, we're denormal
+
+;  The following three lines test the three bits after the four bit
+;  pattern (1,4,7,a,d).  If these three bits are not all one, then
+;  the denominator cannot expose the flaw.  This condition is tested by
+;  inverting the bits and testing that all are equal to zero afterward.
+
+        xor     eax, ONESMASK           ; invert the bits that must be ones
+        test    eax, ONESMASK           ; and make sure they are all ones
+        jz      scale_if_needed         ; if all are one scale numbers
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+;
+;  Now we test the four bits for one of the five patterns.
+;
+scale_if_needed:
+        shr     eax, 28                 ; keep first 4 bits after point
+        cmp     byte ptr fdiv_risc_table[eax], 0        ; check for (1,4,7,a,d)
+        jnz     divide_scaled           ; are in potential problem area
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+divide_scaled:
+        mov     eax, [esp + MAIN_DENOM+8]       ; test denominator exponent
+        and     eax, 07fffh             ; if pseudodenormal ensure that only
+        jz      invalid_denom           ; invalid exception flag is set
+        cmp     eax, 07fffh             ; if NaN or infinity  ensure that only
+        je      invalid_denom           ; invalid exception flag is set
+;
+;  The following six lines turn off exceptions and set the
+;  precision control to 80 bits.  The former is necessary to
+;  force any traps to be taken at the divide instead of the scaling
+;  code.  The latter is necessary in order to get full precision for
+;  codes with incoming 32 and 64 bit precision settings.  If
+;  it can be guaranteed that before reaching this point, the underflow
+;  exception is masked and the precision control is at 80 bits, these
+;  six lines can be omitted.
+;
+        fnstcw  [esp+PREV_CW]           ; save caller's control word
+        mov     eax, [esp+PREV_CW]
+        or      eax, 033fh              ; mask exceptions, pc=80
+        and     eax, 0f3ffh             ; set rounding mode to nearest
+        mov     [esp+PATCH_CW], eax
+        fldcw   [esp+PATCH_CW]          ; mask exceptions & pc=80
+
+;  The following lines check the numerator exponent before scaling.
+;  This in order to prevent undeflow when scaling the numerator,
+;  which will cause a denormal exception flag to be set when the
+;  actual divide is preformed. This flag would not have been set
+;  normally. If there is a risk of underflow, the scale factor is
+;  17/16 instead of 15/16.
+;
+        mov     eax, [esp+MAIN_NUMER+8] ; test numerator exponent
+        and     eax, 07fffh
+        cmp     eax, 00001h
+        je      small_numer
+
+        fmul    fdiv_scale_1            ; scale denominator by 15/16
+        fxch
+        fmul    fdiv_scale_1            ; scale numerator by 15/16
+        fxch
+
+;
+;  The next line restores the users control word.  If the incoming
+;  control word had the underflow exception masked and precision
+;  control set to 80 bits, this line can be omitted.
+;
+
+        fldcw   [esp+PREV_CW]           ; restore caller's control word
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+small_numer:
+        fmul    fdiv_scale_2            ; scale denominator by 17/16
+        fxch
+        fmul    fdiv_scale_2            ; scale numerator by 17/16
+        fxch
+
+;
+;  The next line restores the users control word.  If the incoming
+;  control word had the underflow exception masked and precision
+;  control set to 80 bits, this line can be omitted.
+;
+
+        fldcw   [esp+PREV_CW]           ; restore caller's control word
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+denormal:
+        mov     eax, [esp+MAIN_DENOM]   ; test for whole mantissa == 0
+        or      eax, [esp+MAIN_DENOM+4] ; test for whole mantissa == 0
+        jnz     denormal_divide_scaled  ; denominator is not zero
+invalid_denom:                          ; zero or invalid denominator
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+denormal_divide_scaled:
+        mov     eax, [esp + MAIN_DENOM + 8]     ; get exponent
+        and     eax, 07fffh             ; check for zero exponent
+        jnz     invalid_denom           ;
+;
+;  The following six lines turn off exceptions and set the
+;  precision control to 80 bits.  The former is necessary to
+;  force any traps to be taken at the divide instead of the scaling
+;  code.  The latter is necessary in order to get full precision for
+;  codes with incoming 32 and 64 bit precision settings.  If
+;  it can be guaranteed that before reaching this point, the underflow
+;  exception is masked and the precision control is at 80 bits, these
+;  five lines can be omitted.
+;
+
+        fnstcw  [esp+PREV_CW]           ; save caller's control word
+        mov     eax, [esp+PREV_CW]
+        or      eax, 033fh              ; mask exceptions, pc=80
+        and     eax, 0f3ffh             ; set rounding mode to nearest
+        mov     [esp+PATCH_CW], eax
+        fldcw   [esp+PATCH_CW]          ; mask exceptions & pc=80
+
+        mov     eax, [esp + MAIN_NUMER +8]      ; test numerator exponent
+        and     eax, 07fffh             ; check for denormal numerator
+        je      denormal_numer
+        cmp     eax, 07fffh             ; NaN or infinity
+        je      invalid_numer
+        mov     eax, [esp + MAIN_NUMER + 4]     ; get bits 32..63 of mantissa
+        add     eax, eax                ; shift the first bit into carry
+        jnc     invalid_numer           ; if there is no carry, we have an
+                                        ; invalid numer
+        jmp     numer_ok
+
+denormal_numer:
+        mov     eax, [esp + MAIN_NUMER + 4]     ; get bits 32..63 of mantissa
+        add     eax, eax                ; shift the first bit into carry
+        jc      invalid_numer           ; if there is a carry, we have an
+                                        ; invalid numer
+
+numer_ok:
+        fxch
+        fstp    st                      ; pop numerator
+        fld     st                      ; make copy of denominator
+        fmul    dword ptr[one_shl_63]   ; make denominator not denormal
+        fstp    tbyte ptr [esp+MAIN_DENOM]      ; save modified denominator
+        fld     tbyte ptr [esp+MAIN_NUMER]      ; load numerator
+        fxch                            ; restore proper order
+        fwait
+
+;  The next line restores the users control word.  If the incoming
+;  control word had the underflow exception masked and precision
+;  control set to 80 bits, this line can be omitted.
+;
+
+        fldcw   [esp+PREV_CW]           ; restore caller's control word
+        jmp     retry                   ; start the whole thing over
+
+invalid_numer:
+;
+;  The next line restores the users control word.  If the incoming
+;  control word had the underflow exception masked and precision
+;  control set to 80 bits, this line can be omitted.
+;
+        fldcw   [esp + PREV_CW]
+        fdivp   st(1), st               ; use of hardware is OK.
+        ret
+
+fdiv_main_routine       ENDP
+
+        public  __fdiv_fpr
+        defpe   __fdiv_fpr
+
+        sub     esp, STACK_SIZE
+        jmp     dword ptr dispatch_table[eax*4]
+
+
+label0:
+        fdiv    st,st(0)                ; D8 F0         FDIV    ST,ST(0)
+        add     esp, STACK_SIZE
+        ret
+label1:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label2:
+        fdivr   st,st(0)                ; D8 F8         FDIVR   ST,ST(0)
+        add     esp, STACK_SIZE
+        ret
+label3:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label4:
+        fdiv    st(0),st                ; DC F8/D8 F0   FDIV    ST(0),ST
+        add     esp, STACK_SIZE
+        ret
+label5:
+        fdivp   st(0),st                ; DE F8         FDIVP   ST(0),ST
+        add     esp, STACK_SIZE
+        ret
+label6:
+        fdivr   st(0),st                ; DC F0/DE F0   FDIVR   ST(0),ST
+        add     esp, STACK_SIZE
+        ret
+label7:
+        fdivrp  st(0),st                ; DE F0         FDIVRP  ST(0),ST
+        add     esp, STACK_SIZE
+        ret
+label8:
+        fdiv_st 1, 0
+        ret
+label9:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label10:
+        fdivr_st 1, 0
+        ret
+label11:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label12:
+        fdiv_sti 1, 0
+        ret
+label13:
+        fdivp_sti 1, 0
+        ret
+label14:
+        fdivr_sti 1, 0
+        ret
+label15:
+        fdivrp_sti 1, 0
+        ret
+label16:
+        fdiv_st 2, 1
+        ret
+label17:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label18:
+        fdivr_st 2, 1
+        ret
+label19:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label20:
+        fdiv_sti 2, 1
+        ret
+label21:
+        fdivp_sti 2, 1
+        ret
+label22:
+        fdivr_sti 2, 1
+        ret
+label23:
+        fdivrp_sti 2, 1
+        ret
+label24:
+        fdiv_st 3, 2
+        ret
+label25:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label26:
+        fdivr_st 3, 2
+        ret
+label27:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label28:
+        fdiv_sti 3, 2
+        ret
+label29:
+        fdivp_sti 3, 2
+        ret
+label30:
+        fdivr_sti 3, 2
+        ret
+label31:
+        fdivrp_sti 3, 2
+        ret
+label32:
+        fdiv_st 4, 3
+        ret
+label33:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label34:
+        fdivr_st 4, 3
+        ret
+label35:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label36:
+        fdiv_sti 4, 3
+        ret
+label37:
+        fdivp_sti 4, 3
+        ret
+label38:
+        fdivr_sti 4, 3
+        ret
+label39:
+        fdivrp_sti 4, 3
+        ret
+label40:
+        fdiv_st 5, 4
+        ret
+label41:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label42:
+        fdivr_st 5, 4
+        ret
+label43:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label44:
+        fdiv_sti 5, 4
+        ret
+label45:
+        fdivp_sti 5, 4
+        ret
+label46:
+        fdivr_sti 5, 4
+        ret
+label47:
+        fdivrp_sti 5, 4
+        ret
+label48:
+        fdiv_st 6, 5
+        ret
+label49:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label50:
+        fdivr_st 6, 5
+        ret
+label51:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label52:
+        fdiv_sti 6, 5
+        ret
+label53:
+        fdivp_sti 6, 5
+        ret
+label54:
+        fdivr_sti 6, 5
+        ret
+label55:
+        fdivrp_sti 6, 5
+        ret
+label56:
+        fdiv_st 7, 6
+        ret
+label57:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label58:
+        fdivr_st 7, 6
+        ret
+label59:
+        add     esp, STACK_SIZE
+        int     ILLEGAL_OPC
+label60:
+        fdiv_sti 7, 6
+        ret
+label61:
+        fdivp_sti 7, 6
+        ret
+label62:
+        fdivr_sti 7, 6
+        ret
+label63:
+        fdivrp_sti 7, 6
+        ret
+__fdiv_fpr      ENDP
+
+
+__fdivp_sti_st    PROC    NEAR
+                                ; for calling from mem routines
+        sub     esp, STACK_SIZE
+        fdivp_sti 1, 0
+        ret
+__fdivp_sti_st    ENDP
+
+__fdivrp_sti_st   PROC    NEAR
+                                ; for calling from mem routines
+        sub     esp, STACK_SIZE
+        fdivrp_sti 1, 0
+        ret
+__fdivrp_sti_st   ENDP
+
+        public  __fdiv_chk
+        defpe __fdiv_chk
+                                ; for calling from mem routines
+        sub     esp, STACK_SIZE
+        fdivrp_sti 1, 0
+        ret
+__fdiv_chk   ENDP
+
+;
+;  PRELIMINARY VERSIONS of the routines for register-memory
+;  divide instructions
+;
+
+;;; FDIV_M32 - FDIV m32real FIX
+;;
+;;      Input : Value of the m32real in the top of STACK
+;;
+;;      Output: Result of FDIV in ST
+
+        PUBLIC  __fdiv_m32
+        defpe   __fdiv_m32
+
+        push    eax                             ; save eax
+        mov     eax, [esp + MEM_OPERAND]        ; check for
+        and     eax, SINGLE_NAN                 ; NaN
+        cmp     eax, SINGLE_NAN                 ;
+        je      memory_divide_m32               ;
+
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack                   ; is FP stack full?
+        fld     dword ptr[esp + MEM_OPERAND]    ; load m32real in ST
+        call    __fdivp_sti_st                    ; do actual divide
+        pop     eax
+        ret     4
+spill_fpstack:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fld     dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
+        call    __fdivp_sti_st                    ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivrp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret     4
+memory_divide_m32:
+        fdiv    dword ptr[esp + MEM_OPERAND]    ; do actual divide
+        pop     eax
+        ret     4
+
+__fdiv_m32        ENDP
+
+
+;;; FDIV_M64 - FDIV m64real FIX
+;;
+;;      Input : Value of the m64real in the top of STACK
+;;
+;;      Output: Result of FDIV in ST
+
+        PUBLIC  __fdiv_m64
+        defpe   __fdiv_m64
+
+        push    eax                             ; save eax
+        mov     eax, [esp + MEM_OPERAND + 4]    ; check for
+        and     eax, DOUBLE_NAN                 ; NaN
+        cmp     eax, DOUBLE_NAN                 ;
+        je      memory_divide_m64               ;
+
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m64               ; is FP stack full?
+        fld     qword ptr[esp + MEM_OPERAND]    ; load m64real in ST
+        call    __fdivp_sti_st                    ; do actual divide
+        pop     eax
+        ret     8
+spill_fpstack_m64:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp]                  ; save user's ST(1)
+        fld     qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
+        call    __fdivp_sti_st                    ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivrp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret     8
+
+memory_divide_m64:
+        fdiv    qword ptr[esp + MEM_OPERAND]    ; do actual divide
+        pop     eax
+        ret     8
+
+__fdiv_m64        ENDP
+
+
+
+;;; FDIVR_M32 - FDIVR m32real FIX
+;;
+;;      Input : Value of the m32real in the top of STACK
+;;
+;;      Output: Result of FDIVR in ST
+
+        PUBLIC  __fdiv_m32r
+        defpe   __fdiv_m32r
+        push    eax                             ; save eax
+        mov     eax, [esp + MEM_OPERAND]        ; check for
+        and     eax, SINGLE_NAN                 ; NaN
+        cmp     eax, SINGLE_NAN                 ;
+        je      memory_divide_m32r              ;
+
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m32r              ; is FP stack full?
+        fld     dword ptr[esp + MEM_OPERAND]    ; load m32real in ST
+        call    __fdivrp_sti_st                   ; do actual divide
+        pop     eax
+        ret     4
+spill_fpstack_m32r:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fld     dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
+        call    __fdivrp_sti_st                   ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret     4
+memory_divide_m32r:
+        fdivr   dword ptr[esp + MEM_OPERAND]    ; do actual divide
+        pop     eax
+        ret     4
+
+__fdiv_m32r     ENDP
+
+
+;;; FDIVR_M64 - FDIVR m64real FIX
+;;
+;;      Input : Value of the m64real in the top of STACK
+;;
+;;      Output: Result of FDIVR in ST
+
+        PUBLIC  __fdiv_m64r
+        defpe   __fdiv_m64r
+        push    eax                             ; save eax
+        mov     eax, [esp + MEM_OPERAND + 4]    ; check for
+        and     eax, DOUBLE_NAN                 ; NaN
+        cmp     eax, DOUBLE_NAN                 ;
+        je      memory_divide_m64r              ;
+
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m64r              ; is FP stack full?
+        fld     qword ptr[esp + MEM_OPERAND]    ; load m64real in ST
+        call    __fdivrp_sti_st                   ; do actual divide
+        pop     eax
+        ret     8
+spill_fpstack_m64r:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fld     qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
+        call    __fdivrp_sti_st                   ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret     8
+memory_divide_m64r:
+        fdivr   qword ptr[esp + MEM_OPERAND]    ; do actual divide
+        pop     eax
+        ret     8
+
+
+__fdiv_m64r       ENDP
+
+comment ~******************************************************************
+;;; FDIV_M16I - FDIV m16int FIX
+;;
+;;      Input : Value of the m16int in the top of STACK
+;;
+;;      Output: Result of FDIV in ST
+
+        PUBLIC  FDIV_M16I
+FDIV_M16I       PROC    NEAR
+        push    eax                             ; save eax
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m16i              ; is FP stack full?
+        fild    word ptr[esp + MEM_OPERAND]     ; load m16int in ST
+        call    __fdivp_sti_st                    ; do actual divide
+        pop     eax
+        ret
+spill_fpstack_m16i:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fild    word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
+        call    __fdivp_sti_st                    ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivrp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret
+
+FDIV_M16I       ENDP
+
+;;; FDIV_M32I - FDIV m16int FIX
+;;
+;;      Input : Value of the m16int in the top of STACK
+;;
+;;      Output: Result of FDIV in ST
+
+        PUBLIC  FDIV_M32I
+FDIV_M32I       PROC    NEAR
+        push    eax                             ; save eax
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m32i              ; is FP stack full?
+        fild    dword ptr[esp + MEM_OPERAND]    ; load m32int in ST
+        call    __fdivp_sti_st                    ; do actual divide
+        pop     eax
+        ret
+spill_fpstack_m32i:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fild    dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
+        call    __fdivp_sti_st                    ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivrp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret
+
+
+FDIV_M32I       ENDP
+
+
+;;; FDIVR_M16I - FDIVR m16int FIX
+;;
+;;      Input : Value of the m16int in the top of STACK
+;;
+;;      Output: Result of FDIVR in ST
+
+        PUBLIC  FDIVR_M16I
+FDIVR_M16I      PROC    NEAR
+        push    eax                             ; save eax
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m16ir             ; is FP stack full?
+        fild    word ptr[esp + MEM_OPERAND]     ; load m16int in ST
+        call    __fdivrp_sti_st                   ; do actual divide
+        pop     eax
+        ret
+spill_fpstack_m16ir:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fild    word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
+        call    __fdivrp_sti_st                   ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret
+
+
+FDIVR_M16I      ENDP
+
+
+;;; FDIVR_M32I - FDIVR m32int FIX
+;;
+;;      Input : Value of the m32int in the top of STACK
+;;
+;;      Output: Result of FDIVR in ST
+
+        PUBLIC  FDIVR_M32I
+FDIVR_M32I      PROC    NEAR
+        push    eax                             ; save eax
+        f_stsw  ax                              ; get status word
+        and     eax, 3800h                      ; get top of stack
+        je      spill_fpstack_m32ir             ; is FP stack full?
+        fild    dword ptr[esp + MEM_OPERAND]    ; load m32int in ST
+        call    __fdivrp_sti_st                   ; do actual divide
+        pop     eax
+        ret
+spill_fpstack_m32ir:
+        fxch
+        sub     esp, SPILL_SIZE                 ; make temp space
+        fstp    tbyte ptr[esp ]                 ; save user's ST(1)
+        fild    dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
+        call    __fdivrp_sti_st                   ; do actual divide
+        fld     tbyte ptr[esp]                  ; restore user's ST(1)
+                                                ;esp is adjusted by fdivp fn
+        fxch
+        add     esp, SPILL_SIZE
+        pop     eax
+        ret
+
+FDIVR_M32I      ENDP
+**********************************************************************~
+
+
+
+_TEXT  ENDS
+
+        end
--- a/watcom/trunk/clib/fpu/chipr32.asm
+++ b/watcom/trunk/clib/fpu/chipr32.asm
@@ -0,0 +1,851 @@
+;*****************************************************************************
+;*
+;*                            Open Watcom Project
+;*
+;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+;*
+;*  ========================================================================
+;*
+;*    This file contains Original Code and/or Modifications of Original
+;*    Code as defined in and that are subject to the Sybase Open Watcom
+;*    Public License version 1.0 (the 'License'). You may not use this file
+;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+;*    provided with the Original Code and Modifications, and is also
+;*    available at www.sybase.com/developer/opensource.
+;*
+;*    The Original Code and all software distributed under the License are
+;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+;*    NON-INFRINGEMENT. Please see the License for the specific language
+;*    governing rights and limitations under the License.
+;*
+;*  ========================================================================
+;*
+;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+;*               DESCRIBE IT HERE!
+;*
+;*****************************************************************************
+
+
+; static char sccs_id[] = "@(#)fprem32.asm      1.5  12/22/94  12:48:07";
+;
+; This code is being published by Intel to users of the Pentium(tm)
+; processor.  Recipients are authorized to copy, modify, compile, use and
+; distribute the code.
+;
+; Intel makes no warranty of any kind with regard to this code, including
+; but not limited to, implied warranties or merchantability and fitness for
+; a particular purpose. Intel assumes no responsibility for any errors that
+; may appear in this code.
+;
+; No patent licenses are granted, express or implied.
+;
+;
+include mdef.inc
+
+        .386
+        .387
+
+;
+;  PRELIMINARY VERSION of the software patch for the floating
+;  point remainder.
+;
+
+
+CHECKSW MACRO
+ifdef   DEBUG
+        fnstsw  [fpsw]
+        fnstcw  [fpcw]
+endif
+ENDM
+
+
+DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
+
+;
+;  Stack variables for remainder routines.
+;
+
+FLT_SIZE        EQU     12
+DENOM           EQU     0
+DENOM_SAVE      EQU     DENOM + FLT_SIZE
+NUMER           EQU     DENOM_SAVE + FLT_SIZE
+PREV_CW         EQU     NUMER + FLT_SIZE
+PATCH_CW        EQU     PREV_CW + 4
+FPREM_SW        EQU     PATCH_CW + 4
+STACK_SIZE      EQU     FPREM_SW + 4
+RET_SIZE        EQU     4
+PUSH_SIZE       EQU     4
+
+MAIN_FUDGE      EQU     RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
+
+MAIN_DENOM              EQU     DENOM + MAIN_FUDGE
+MAIN_DENOM_SAVE         EQU     DENOM_SAVE + MAIN_FUDGE
+MAIN_NUMER              EQU     NUMER + MAIN_FUDGE
+MAIN_PREV_CW            EQU     PREV_CW + MAIN_FUDGE
+MAIN_PATCH_CW           EQU     PATCH_CW + MAIN_FUDGE
+MAIN_FPREM_SW           EQU     FPREM_SW + MAIN_FUDGE
+
+ONESMASK        EQU     700h
+
+fprem_risc_table        DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
+fprem_scale             DB      0, 0, 0, 0, 0, 0, 0eeh, 03fh
+one_shl_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 043h
+one_shr_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 03bh
+one                     DB      0, 0, 0, 0, 0, 0, 0f0h, 03fh
+half                    DB      0, 0, 0, 0, 0, 0, 0e0h, 03fh
+big_number              DB      0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
+
+ifdef   DEBUG
+        public  fpcw
+        public  fpsw
+fpcw    dw      0
+fpsw    dw      0
+endif
+
+FPU_STATE       STRUC
+        CONTROL_WORD    DW      ?
+        reserved_1      DW      ?
+        STATUS_WORD     DD      ?
+        TAG_WORD        DW      ?
+        reserved_3      DW      ?
+        IP_OFFSET       DD      ?
+        CS_SLCT         DW      ?
+        OPCODE          DW      ?
+        DATA_OFFSET     DD      ?
+        OPERAND_SLCT    DW      ?
+        reserved_4      DW      ?
+FPU_STATE       ENDS
+
+ENV_SIZE        EQU     28
+
+
+DATA32 ENDS
+
+_TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
+_TEXT  ENDS
+
+DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
+DATA32  ENDS
+
+CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
+CONST32 ENDS
+
+BSS32   SEGMENT DWORD USE32 PUBLIC 'BSS'
+BSS32   ENDS
+
+DGROUP  GROUP CONST32, BSS32, DATA32
+
+
+
+CODE32  SEGMENT   DWORD USE32 PUBLIC 'CODE'
+
+        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
+
+
+fprem_common    PROC    NEAR
+
+        push    eax
+        push    ebx
+        push    ecx
+        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
+        xor     eax, ONESMASK           ; invert bits that have to be one
+        test    eax, ONESMASK           ; check bits that have to be one
+        jnz     remainder_hardware_ok
+        shr     eax, 11
+        and     eax, 0fh
+        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
+        jz      remainder_hardware_ok
+
+; The denominator has the bit pattern. Weed out the funny cases like NaNs
+; before applying the software version. Our caller guarantees that the
+; denominator is not a denormal. Here we check for:
+;       denominator     inf, NaN, unnormal
+;       numerator       inf, NaN, unnormal, denormal
+
+        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
+        and     eax, 07fff0000h         ; mask the exponent only
+        cmp     eax, 07fff0000h         ; check for INF or NaN
+        je      remainder_hardware_ok
+        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
+        and     eax, 07fff0000h         ; mask the exponent only
+        jz      remainder_hardware_ok   ; jif numerator denormal
+        cmp     eax, 07fff0000h         ; check for INF or NaN
+        je      remainder_hardware_ok
+        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
+        add     eax, eax                ; set carry if explicit bit set
+        jnz     remainder_hardware_ok   ; jmp if numerator is unnormal
+        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
+        add     eax, eax                ; set carry if explicit bit set
+        jnz     remainder_hardware_ok   ; jmp if denominator is unnormal
+
+rem_patch:
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        and     eax, 07fffh              ; clear sy
+        add     eax, 63                  ; evaluate ey + 63
+        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
+        and     ebx, 07fffh              ; clear sx
+        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
+        ja      rem_large               ; if ex > ey + 63, case of large arguments
+rem_patch_loop:
+        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
+        and     eax, 07fffh             ; clear sy
+        add     eax, 10                 ; evaluate ey + 10
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
+        js      remainder_hardware_ok   ; safe if ey + 10 > ex
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        mov     ecx, ebx
+        sub     ebx, eax
+        and     ebx, 07h
+        or      ebx, 04h
+        sub     ecx, ebx
+        mov     ebx, eax
+        and     ebx, 08000h             ; keep sy
+        or      ecx, ebx                ; merge the sign of y
+        mov     dword ptr [MAIN_DENOM+8+esp], ecx
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
+        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
+        fxch
+        fprem                           ; this rem is safe
+        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
+        fstp    st(0)                   ; pop the stack
+        jmp rem_patch_loop
+rem_large:
+        test    edx, 02h                ; is denominator already saved
+        jnz     already_saved
+        fld     tbyte ptr[esp + MAIN_DENOM]
+        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
+already_saved:
+        ; Save user's precision control and institute 80.  The fp ops in
+        ; rem_large_loop must not round to user's precision (if it is less
+        ; than 80) because the hardware would not have done so.  We are
+        ; aping the hardware here, which is all extended.
+
+        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
+        mov     eax, dword ptr[esp + MAIN_PREV_CW]
+        or      eax, 033fh              ; mask exceptions, pc=80
+        mov     [esp + MAIN_PATCH_CW], eax
+        fldcw   [esp + MAIN_PATCH_CW]
+
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        and     eax, 07fffh             ; clear sy
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        sub     ebx, eax                ; evaluate the exponent difference
+        and     ebx, 03fh
+        or      ebx, 020h
+        add     ebx, 1
+        mov     ecx, ebx
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        and     eax, 08000h             ; keep sy
+        or      ebx, eax                ; merge the sign of y
+        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
+        fabs
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        fabs
+rem_large_loop:
+        fcom
+        fstsw  ax
+        and     eax, 00100h
+        jnz     rem_no_sub
+        fsub    st, st(1)
+rem_no_sub:
+        fxch
+        fmul    qword ptr half
+        fxch
+        sub     ecx, 1                  ; decrement the loop counter
+        jnz     rem_large_loop
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
+        fstp    st                      ; toss modified denom
+        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
+        fld     tbyte ptr[big_number]   ; force C2 to be set
+        fprem
+        fstp    st
+        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
+
+        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
+        and     ebx, 08000h             ; keep sx
+        jz      rem_done
+        fchs
+        jmp     rem_done
+remainder_hardware_ok:
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        fprem                           ; and finally do a remainder
+; prem_main_routine end
+rem_done:
+        test    edx, 03h
+        jz      rem_exit
+        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
+        test    edx, 01h
+        jz      do_not_de_scale
+; De-scale the result. Go to pc=80 to prevent from fmul
+; from user precision (fprem does not round the result).
+        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
+        mov     eax, [esp + MAIN_PREV_CW]
+        or      eax, 0300h              ; pc = 80
+        mov     [esp + MAIN_PATCH_CW], eax
+        fldcw   [esp + MAIN_PATCH_CW]
+        fmul    qword ptr one_shr_64
+        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
+do_not_de_scale:
+        mov     eax, [esp + MAIN_FPREM_SW]
+        fxch
+        fstp    st
+        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
+        fxch
+        and     eax, 04300h             ; restore saved Q0, Q1, Q2
+        sub     esp, ENV_SIZE
+        fnstenv [esp]
+        and     [esp].STATUS_WORD, 0bcffh
+        or      [esp].STATUS_WORD, eax
+        fldenv  [esp]
+        add     esp, ENV_SIZE
+rem_exit:
+        pop     ecx
+        pop     ebx
+        pop     eax
+        CHECKSW                         ; debug only: save status
+        ret
+fprem_common    ENDP
+
+comment ~****************************************************************
+
+;
+; float frem_chk (float numer, float denom)
+;
+        public  frem_chk
+frem_chk        PROC    NEAR
+        push    edx
+        sub     esp, STACK_SIZE
+        fld     dword ptr [STACK_SIZE+8+esp]
+        fstp    tbyte ptr [NUMER+esp]
+        fld     dword ptr [STACK_SIZE+12+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        mov     edx, 0                  ; dx = 1 if denormal extended divisor
+        call    fprem_common
+        fxch
+        fstp    st
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+frem_chk        ENDP
+; end frem_chk
+
+;
+; double drem_chk (double numer, double denom)
+;
+        public  drem_chk
+drem_chk        PROC    NEAR
+        push    edx
+        sub     esp, STACK_SIZE
+        fld     qword ptr [STACK_SIZE+8+esp]
+        fstp    tbyte ptr [NUMER+esp]
+        fld     qword ptr [STACK_SIZE+16+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        mov     edx, 0                  ; dx = 1 if denormal extended divisor
+        call    fprem_common
+        fxch
+        fstp    st
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+drem_chk        ENDP
+; end drem_chk
+
+;
+; long double lrem_chk(long double number,long double denom)
+;
+        public  lrem_chk
+lrem_chk        PROC    NEAR
+        fld     tbyte ptr [20+esp]
+        fld     tbyte ptr [4+esp]
+        call    fprem_chk
+        fxch
+        fstp    st
+        ret
+lrem_chk        ENDP
+
+**********************************************************************~
+
+;
+; FPREM: ST = remainder(ST, ST(1))
+;
+; Compiler version of the FPREM must preserve the arguments in the floating
+; point stack.
+
+        public  __fprem_chk
+        defpe   __fprem_chk
+        push    edx
+        sub     esp, STACK_SIZE
+        fstp    tbyte ptr [NUMER+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        xor     edx, edx
+; prem_main_routine begin
+        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
+        test    eax,07fff0000h          ; check for denormal
+        jz      denormal
+        call    fprem_common
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+denormal:
+        fld     tbyte ptr [DENOM+esp]   ; load the denominator
+        fld     tbyte ptr [NUMER+esp]   ; load the numerator
+        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
+        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
+        jz      remainder_hardware_ok_l ; denominator is zero
+        fxch
+        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
+        fld     tbyte ptr[esp + DENOM]
+        fxch
+        or      edx, 02h
+;
+; For this we need pc=80.  Also, mask exceptions so we don't take any
+; denormal operand exceptions.  It is guaranteed that the descaling
+; later on will take underflow, which is what the hardware would have done
+; on a normal fprem.
+;
+        fnstcw  [PREV_CW+esp]           ; save caller's control word
+        mov     eax, [PREV_CW+esp]
+        or      eax, 0033fh             ; mask exceptions, pc=80
+        mov     [PATCH_CW+esp], eax
+        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
+
+; The denominator is a denormal.  For most numerators, scale both numerator
+; and denominator to get rid of denormals.  Then execute the common code
+; with the flag set to indicate that the result must be de-scaled.
+; For large numerators this won't work because the scaling would cause
+; overflow.  In this case we know the numerator is large, the denominator
+; is small (denormal), so the exponent difference is also large.  This means
+; the rem_large code will be used and this code depends on the difference
+; in exponents modulo 64.  Adding 64 to the denominators exponent
+; doesn't change the modulo 64 difference.  So we can scale the denominator
+; by 64, making it not denormal, and this won't effect the result.
+;
+; To start with, figure out if numerator is large
+
+        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
+        and     eax, 7fffh              ; isolate numerator exponent
+        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
+        ja      big_numer_rem_de        ; jif big numerator
+
+; So the numerator is not large scale both numerator and denominator
+
+        or      edx, 1                  ; edx = 1, if denormal extended divisor
+        fmul    qword ptr one_shl_64    ; make numerator not denormal
+        fstp    tbyte ptr[esp + NUMER]
+        fmul    qword ptr one_shl_64    ; make denominator not denormal
+        fstp    tbyte ptr[esp + DENOM]
+        jmp     scaling_done
+
+; The numerator is large.  Scale only the denominator, which will not
+; change the result which we know will be partial.  Set the scale flag
+; to false.
+big_numer_rem_de:
+        ; We must do this with pc=80 to avoid rounding to single/double.
+        ; In this case we do not mask exceptions so that we will take
+        ; denormal operand, as would the hardware.
+        fnstcw  [PREV_CW+esp]           ; save caller's control word
+        mov     eax, [PREV_CW+esp]
+        or      eax, 00300h             ; pc=80
+        mov     [PATCH_CW+esp], eax
+        fldcw   [PATCH_CW+esp]          ;  pc=80
+
+        fstp    st                      ; Toss numerator
+        fmul    qword ptr one_shl_64    ; make denominator not denormal
+        fstp    tbyte ptr[esp + DENOM]
+
+; Restore the control word which was fiddled to scale at 80-bit precision.
+; Then call the common code.
+scaling_done:
+        fldcw   [esp + PREV_CW]         ; restore callers control word
+        call    fprem_common
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+remainder_hardware_ok_l:
+        fprem                           ; and finally do a remainder
+
+        CHECKSW
+
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+__fprem_chk       ENDP
+; end fprem_chk
+
+
+;
+; FPREM1 code begins here
+;
+
+
+fprem1_common   PROC    NEAR
+
+        push    eax
+        push    ebx
+        push    ecx
+        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
+        xor     eax, ONESMASK           ; invert bits that have to be one
+        test    eax, ONESMASK           ; check bits that have to be one
+        jnz     remainder1_hardware_ok
+        shr     eax, 11
+        and     eax, 0fh
+        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
+        jz      remainder1_hardware_ok
+
+; The denominator has the bit pattern. Weed out the funny cases like NaNs
+; before applying the software version. Our caller guarantees that the
+; denominator is not a denormal. Here we check for:
+;       denominator     inf, NaN, unnormal
+;       numerator       inf, NaN, unnormal, denormal
+
+        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
+        and     eax, 07fff0000h         ; mask the exponent only
+        cmp     eax, 07fff0000h         ; check for INF or NaN
+        je      remainder1_hardware_ok
+        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
+        and     eax, 07fff0000h         ; mask the exponent only
+        jz      remainder1_hardware_ok  ; jif numerator denormal
+        cmp     eax, 07fff0000h         ; check for INF or NaN
+        je      remainder1_hardware_ok
+        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
+        add     eax, eax                ; set carry if explicit bit set
+        jnz     remainder1_hardware_ok  ; jmp if numerator is unnormal
+        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
+        add     eax, eax                ; set carry if explicit bit set
+        jnz     remainder1_hardware_ok  ; jmp if denominator is unnormal
+
+rem1_patch:
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        and     eax, 07fffh              ; clear sy
+        add     eax, 63                  ; evaluate ey + 63
+        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
+        and     ebx, 07fffh              ; clear sx
+        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
+        ja      rem1_large              ; if ex > ey + 63, case of large arguments
+rem1_patch_loop:
+        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
+        and     eax, 07fffh             ; clear sy
+        add     eax, 10                 ; evaluate ey + 10
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
+        js      remainder1_hardware_ok  ; safe if ey + 10 > ex
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        mov     ecx, ebx
+        sub     ebx, eax
+        and     ebx, 07h
+        or      ebx, 04h
+        sub     ecx, ebx
+        mov     ebx, eax
+        and     ebx, 08000h             ; keep sy
+        or      ecx, ebx                ; merge the sign of y
+        mov     dword ptr [MAIN_DENOM+8+esp], ecx
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
+        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
+        fxch
+        fprem                           ; this rem is safe
+        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
+        fstp    st(0)                   ; pop the stack
+        jmp rem1_patch_loop
+rem1_large:
+        test    ebx, 02h                ; is denominator already saved
+        jnz     already_saved1
+        fld     tbyte ptr[esp + MAIN_DENOM]
+        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
+already_saved1:
+        ; Save user's precision control and institute 80.  The fp ops in
+        ; rem1_large_loop must not round to user's precision (if it is less
+        ; than 80) because the hardware would not have done so.  We are
+        ; aping the hardware here, which is all extended.
+
+        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
+        mov     eax, dword ptr[esp + MAIN_PREV_CW]
+        or      eax, 033fh              ; mask exceptions, pc=80
+        mov     [esp + MAIN_PATCH_CW], eax
+        fldcw   [esp + MAIN_PATCH_CW]
+
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        and     eax, 07fffh             ; clear sy
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        sub     ebx, eax                ; evaluate the exponent difference
+        and     ebx, 03fh
+        or      ebx, 020h
+        add     ebx, 1
+        mov     ecx, ebx
+        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        and     ebx, 07fffh             ; clear sx
+        and     eax, 08000h             ; keep sy
+        or      ebx, eax                ; merge the sign of y
+        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
+        fabs
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        fabs
+rem1_large_loop:
+        fcom
+        fstsw  ax
+        and     eax, 00100h
+        jnz     rem1_no_sub
+        fsub    st, st(1)
+rem1_no_sub:
+        fxch
+        fmul    qword ptr half
+        fxch
+        sub     ecx, 1                  ; decrement the loop counter
+        jnz     rem1_large_loop
+        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
+        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
+        fstp    st                      ; toss modified denom
+        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
+        fld     tbyte ptr[big_number]   ; force C2 to be set
+        fprem1
+        fstp    st
+        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
+
+        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
+        and     ebx, 08000h             ; keep sx
+        jz      rem1_done
+        fchs
+        jmp     rem1_done
+remainder1_hardware_ok:
+        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
+        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
+        fprem1                           ; and finally do a remainder
+; prem1_main_routine end
+rem1_done:
+        test    edx, 03h
+        jz      rem1_exit
+        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
+        test    edx, 01h
+        jz      do_not_de_scale1
+; De-scale the result. Go to pc=80 to prevent from fmul
+; from user precision (fprem does not round the result).
+        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
+        mov     eax, [esp + MAIN_PREV_CW]
+        or      eax, 0300h              ; pc = 80
+        mov     [esp + MAIN_PATCH_CW], eax
+        fldcw   [esp + MAIN_PATCH_CW]
+        fmul    qword ptr one_shr_64
+        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
+do_not_de_scale1:
+        mov     eax, [esp + MAIN_FPREM_SW]
+        fxch
+        fstp    st
+        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
+        fxch
+        and     eax, 04300h             ; restore saved Q0, Q1, Q2
+        sub     esp, ENV_SIZE
+        fnstenv [esp]
+        and     [esp].STATUS_WORD, 0bcffh
+        or      [esp].STATUS_WORD, eax
+        fldenv  [esp]
+        add     esp, ENV_SIZE
+rem1_exit:
+        pop     ecx
+        pop     ebx
+        pop     eax
+        CHECKSW                         ; debug only: save status
+        ret
+fprem1_common   ENDP
+
+
+comment ~***************************************************************
+;
+; float frem1_chk (float numer, float denom)
+;
+        public  frem1_chk
+frem1_chk       PROC    NEAR
+        push    edx
+        sub     esp, STACK_SIZE
+        fld     dword ptr [STACK_SIZE+8+esp]
+        fstp    tbyte ptr [NUMER+esp]
+        fld     dword ptr [STACK_SIZE+12+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        mov     edx, 0                  ; dx = 1 if denormal extended divisor
+        call    fprem1_common
+        fxch
+        fstp    st
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+frem1_chk       ENDP
+; end frem1_chk
+
+;
+; double drem1_chk (double numer, double denom)
+;
+        public  drem1_chk
+drem1_chk       PROC    NEAR
+        push    edx
+        sub     esp, STACK_SIZE
+        fld     qword ptr [STACK_SIZE+8+esp]
+        fstp    tbyte ptr [NUMER+esp]
+        fld     qword ptr [STACK_SIZE+16+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        mov     edx, 0                  ; dx = 1 if denormal extended divisor
+        call    fprem1_common
+        fxch
+        fstp    st
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+drem1_chk       ENDP
+; end drem1_chk
+
+;
+; long double lrem1_chk(long double number,long double denom)
+;
+        public  lrem1_chk
+lrem1_chk       PROC    NEAR
+        fld     tbyte ptr [20+esp]
+        fld     tbyte ptr [4+esp]
+        call    fprem1_chk
+        fxch
+        fstp    st
+        ret
+lrem1_chk       ENDP
+********************************************************************~
+
+;
+; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
+;
+; Compiler version of the FPREM must preserve the arguments in the floating
+; point stack.
+
+        public  __fprem1_chk
+        defpe   __fprem1_chk
+        push    edx
+        sub     esp, STACK_SIZE
+        fstp    tbyte ptr [NUMER+esp]
+        fstp    tbyte ptr [DENOM+esp]
+        mov     edx, 0
+; prem1_main_routine begin
+        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
+        test    eax,07fff0000h          ; check for denormal
+        jz      denormal1
+        call    fprem1_common
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+denormal1:
+        fld     tbyte ptr [DENOM+esp]   ; load the denominator
+        fld     tbyte ptr [NUMER+esp]   ; load the numerator
+        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
+        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
+        jz      remainder1_hardware_ok_l ; denominator is zero
+        fxch
+        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
+        fld     tbyte ptr[esp + DENOM]
+        fxch
+        or      edx, 02h
+;
+; For this we need pc=80.  Also, mask exceptions so we don't take any
+; denormal operand exceptions.  It is guaranteed that the descaling
+; later on will take underflow, which is what the hardware would have done
+; on a normal fprem.
+;
+        fnstcw  [PREV_CW+esp]           ; save caller's control word
+        mov     eax, [PREV_CW+esp]
+        or      eax, 0033fh             ; mask exceptions, pc=80
+        mov     [PATCH_CW+esp], eax
+        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
+
+; The denominator is a denormal.  For most numerators, scale both numerator
+; and denominator to get rid of denormals.  Then execute the common code
+; with the flag set to indicate that the result must be de-scaled.
+; For large numerators this won't work because the scaling would cause
+; overflow.  In this case we know the numerator is large, the denominator
+; is small (denormal), so the exponent difference is also large.  This means
+; the rem1_large code will be used and this code depends on the difference
+; in exponents modulo 64.  Adding 64 to the denominators exponent
+; doesn't change the modulo 64 difference.  So we can scale the denominator
+; by 64, making it not denormal, and this won't effect the result.
+;
+; To start with, figure out if numerator is large
+
+        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
+        and     eax, 7fffh              ; isolate numerator exponent
+        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
+        ja      big_numer_rem1_de       ; jif big numerator
+
+; So the numerator is not large scale both numerator and denominator
+
+        or      edx, 1                  ; edx = 1, if denormal extended divisor
+        fmul    qword ptr one_shl_64    ; make numerator not denormal
+        fstp    tbyte ptr[esp + NUMER]
+        fmul    qword ptr one_shl_64    ; make denominator not denormal
+        fstp    tbyte ptr[esp + DENOM]
+        jmp     scaling_done1
+
+; The numerator is large.  Scale only the denominator, which will not
+; change the result which we know will be partial.  Set the scale flag
+; to false.
+big_numer_rem1_de:
+        ; We must do this with pc=80 to avoid rounding to single/double.
+        ; In this case we do not mask exceptions so that we will take
+        ; denormal operand, as would the hardware.
+        fnstcw  [PREV_CW+esp]           ; save caller's control word
+        mov     eax, [PREV_CW+esp]
+        or      eax, 00300h             ; pc=80
+        mov     [PATCH_CW+esp], eax
+        fldcw   [PATCH_CW+esp]          ;  pc=80
+
+        fstp    st                      ; Toss numerator
+        fmul    qword ptr one_shl_64    ; make denominator not denormal
+        fstp    tbyte ptr[esp + DENOM]
+
+; Restore the control word which was fiddled to scale at 80-bit precision.
+; Then call the common code.
+scaling_done1:
+        fldcw   [esp + PREV_CW]         ; restore callers control word
+        call    fprem1_common
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+
+remainder1_hardware_ok_l:
+        fprem                           ; and finally do a remainder
+
+        CHECKSW
+
+        add     esp, STACK_SIZE
+        pop     edx
+        ret
+__fprem1_chk      ENDP
+; end fprem1_chk
+
+ifdef   DEBUG
+        public  fpinit
+fpinit  PROC    NEAR
+        fninit
+        ret
+fpinit  ENDP
+endif
+
+CODE32 ENDS
+       END
--- a/watcom/trunk/clib/fpu/chipt32.asm
+++ b/watcom/trunk/clib/fpu/chipt32.asm
@@ -0,0 +1,167 @@
+;*****************************************************************************
+;*
+;*                            Open Watcom Project
+;*
+;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+;*
+;*  ========================================================================
+;*
+;*    This file contains Original Code and/or Modifications of Original
+;*    Code as defined in and that are subject to the Sybase Open Watcom
+;*    Public License version 1.0 (the 'License'). You may not use this file
+;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+;*    provided with the Original Code and Modifications, and is also
+;*    available at www.sybase.com/developer/opensource.
+;*
+;*    The Original Code and all software distributed under the License are
+;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+;*    NON-INFRINGEMENT. Please see the License for the specific language
+;*    governing rights and limitations under the License.
+;*
+;*  ========================================================================
+;*
+;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+;*               DESCRIBE IT HERE!
+;*
+;*****************************************************************************
+
+
+; static char sccs_id[] = "@(#)fptan32.asm      1.4  12/20/94  16:51:51";
+;
+; This code is being published by Intel to users of the Pentium(tm)
+; processor.  Recipients are authorized to copy, modify, compile, use and
+; distribute the code.
+;
+; Intel makes no warranty of any kind with regard to this code, including
+; but not limited to, implied warranties or merchantability and fitness for
+; a particular purpose. Intel assumes no responsibility for any errors that
+; may appear in this code.
+;
+; No patent licenses are granted, express or implied.
+;
+;
+;  The following code is a PRELIMINARY IMPLEMENTATION of a
+;  software patch for the floating point divide instructions.
+;
+;
+include mdef.inc
+
+.386
+.387
+
+PATCH_CW        EQU     00ch
+PREV_CW         EQU     010h
+COSINE          EQU     0               ; These two are overlaid because they
+ANGLE           EQU     0               ; are not live at the same time.
+
+
+STACK_SIZE      EQU     014h
+
+ONESMASK        EQU     0e000000h
+
+
+DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
+
+fdiv_risk_table DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
+fdiv_scale      DD      03f700000h              ; 0.9375
+one_shl_63      DD      05f000000h
+
+DATA32  ENDS
+
+DGROUP GROUP DATA32
+
+_TEXT  SEGMENT   DWORD USE32 PUBLIC 'CODE'
+
+
+        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
+        public  __fptan_chk
+
+;
+;  PRELIMINARY VERSION for register-register divides.
+;
+
+
+        defpe   __fptan_chk
+
+        push    eax
+        sub     esp, STACK_SIZE
+        fstp    tbyte ptr [esp+ANGLE]
+        mov     eax, [esp+ANGLE+8]
+        and     eax, 07fffh
+        jz      use_hardware            ; denormals, ...
+        cmp     eax, 07fffh
+        je      use_hardware            ; NaNs, infinities, ...
+        mov     eax, [esp+ANGLE+4]
+        add     eax, eax
+        jnc     use_hardware            ; unnormals (explicit 1 missing)
+        fld     tbyte ptr [esp+ANGLE]
+
+;
+; Check for proper parameter range ( |<angle>| < 2^63)
+;
+        fabs
+        fcomp   one_shl_63
+        fstsw  ax
+        sahf
+        jae     use_hardware
+
+        fld     tbyte ptr [esp+ANGLE]
+        fsincos
+        fstp    tbyte ptr [esp+COSINE]
+        fld     tbyte ptr [esp+COSINE]  ; load the denominator (cos(x))
+        mov     eax, [esp+COSINE+4]     ; get mantissa bits 32-64
+        add     eax,eax                 ; shift the one's bit onto carry
+        xor     eax, ONESMASK           ; invert the bits that must be ones
+        test    eax, ONESMASK           ; and make sure they are all ones
+        jz      scale_if_needed         ; if all are one scale numbers
+        fdivp   st(1), st               ; use of hardware is OK.
+        fld1                            ; push 1.0 onto FPU stack
+        add     esp, STACK_SIZE
+        pop     eax
+        ret
+
+scale_if_needed:
+        shr     eax, 28                 ; keep first 4 bits after point
+        cmp     fdiv_risk_table[eax], ah        ; check for (1,4,7,a,d)
+        jnz     divide_scaled           ; are in potential problem area
+        fdivp   st(1), st               ; use of hardware is OK.
+        fld1                            ; push 1.0 onto FPU stack
+        add     esp, STACK_SIZE
+        pop     eax
+        ret
+
+divide_scaled:
+        fwait                           ; catch preceding exceptions
+        fstcw   [esp+PREV_CW]           ; save caller's control word
+        mov     eax, [esp+PREV_CW]
+        or      eax, 033fh              ; mask exceptions, pc=80
+        mov     [esp+PATCH_CW], eax
+        fldcw   [esp+PATCH_CW]          ; mask exceptions & pc=80
+        fmul    fdiv_scale              ; scale denominator by 15/16
+        fxch
+        fmul    fdiv_scale              ; scale numerator by 15/16
+        fxch
+
+; This assures correctly rounded result if pc=64 as well
+
+        fldcw   [esp+PREV_CW]           ; restore caller's control word
+        fdivp   st(1), st               ; use of hardware is OK.
+        fld1                            ; push 1.0 onto FPU stack
+        add     esp, STACK_SIZE
+        pop     eax
+        ret
+
+use_hardware:
+        fld     tbyte ptr [esp+ANGLE]
+        fptan
+        add     esp, STACK_SIZE
+        pop     eax
+        ret
+__fptan_chk       ENDP
+
+_TEXT  ENDS
+        end
--- a/watcom/trunk/clib/fpu/clearfpe.h
+++ b/watcom/trunk/clib/fpu/clearfpe.h
@@ -0,0 +1,32 @@
+/****************************************************************************
+*
+*                            Open Watcom Project
+*
+*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+*
+*  ========================================================================
+*
+*    This file contains Original Code and/or Modifications of Original
+*    Code as defined in and that are subject to the Sybase Open Watcom
+*    Public License version 1.0 (the 'License'). You may not use this file
+*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+*    provided with the Original Code and Modifications, and is also
+*    available at www.sybase.com/developer/opensource.
+*
+*    The Original Code and all software distributed under the License are
+*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+*    NON-INFRINGEMENT. Please see the License for the specific language
+*    governing rights and limitations under the License.
+*
+*  ========================================================================
+*
+* Description:  prototype for _ClearFPE clib internal function
+*
+****************************************************************************/
+
+
+extern void _ClearFPE( void );
--- a/watcom/trunk/clib/fpu/cntrl87.c
+++ b/watcom/trunk/clib/fpu/cntrl87.c
@@ -0,0 +1,149 @@
+/****************************************************************************
+*
+*                            Open Watcom Project
+*
+*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+*
+*  ========================================================================
+*
+*    This file contains Original Code and/or Modifications of Original
+*    Code as defined in and that are subject to the Sybase Open Watcom
+*    Public License version 1.0 (the 'License'). You may not use this file
+*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+*    provided with the Original Code and Modifications, and is also
+*    available at www.sybase.com/developer/opensource.
+*
+*    The Original Code and all software distributed under the License are
+*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+*    NON-INFRINGEMENT. Please see the License for the specific language
+*    governing rights and limitations under the License.
+*
+*  ========================================================================
+*
+* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+*               DESCRIBE IT HERE!
+*
+****************************************************************************/
+
+
+#include "variety.h"
+#include <math.h>
+#include <float.h>
+#include "rtdata.h"
+
+extern  void    __fstcw();
+extern  void    __fldcw();
+
+#if defined(__WINDOWS__) && !defined(__WINDOWS_386__)
+
+extern void __far _fpmath();
+#pragma aux _fpmath "__fpmath";
+
+void __win87em_fldcw(unsigned int);
+#pragma aux __win87em_fldcw = \
+        "push   bx"                                     \
+        "mov    bx, 4h"                                 \
+        "call   far ptr _fpmath"                        \
+        "pop    bx"                                     \
+        parm [ax]
+
+unsigned int __win87em_fstcw(void);
+#pragma aux __win87em_fstcw = \
+        "push   bx"                                     \
+        "mov    bx, 5h"                                 \
+        "call   far ptr _fpmath"                        \
+        "pop    bx"                                     \
+        value [ax]
+
+#elif defined( __DOS_086__ )
+
+extern unsigned char __dos87real;
+#pragma aux __dos87real "*";
+
+extern unsigned short __dos87emucall;
+#pragma aux __dos87emucall "*";
+
+void _WCI86NEAR __dos_emu_fldcw( unsigned short * );
+#pragma aux __dos_emu_fldcw "*" = \
+        "mov    ax,3" \
+        "call   __dos87emucall" \
+        parm [bx];
+        
+void _WCI86NEAR __dos_emu_fstcw( unsigned short * );
+#pragma aux __dos_emu_fstcw "*" = \
+        "mov    ax,4" \
+        "call   __dos87emucall" \
+        parm [bx];
+
+#endif
+
+#if defined(__386__)
+#pragma aux __fstcw = \
+        "fstcw ss:[edi]" \
+        "fwait"          \
+        parm caller [edi];
+#pragma aux __fldcw = \
+        "fldcw ss:[edi]" \
+        parm caller [edi];
+#else
+#pragma aux __fstcw = \
+        "xchg ax,bp"           \
+        "fstcw [bp]" \
+        "fwait"                \
+        "xchg ax,bp"           \
+        parm caller [ax];
+#pragma aux __fldcw = \
+        "xchg ax,bp"           \
+        "fldcw [bp]" \
+        "xchg ax,bp"           \
+        parm caller [ax];
+#endif
+
+_WCRTLINK unsigned _control87( unsigned new, unsigned mask )
+/**********************************************************/
+{
+    auto short unsigned int control_word;
+
+    control_word = 0;
+    if( _RWD_8087 ) {
+#if defined(__WINDOWS__) && !defined(__WINDOWS_386__)
+        __fstcw( &control_word );
+        control_word = __win87em_fstcw();
+        if( mask != 0 ) {
+            control_word = (control_word & ~mask) | (new & mask);
+            __fldcw( &control_word );
+            __fstcw( &control_word );               /* 17-sep-91 */
+            __win87em_fldcw(control_word);
+        }
+#elif defined( __DOS_086__ )
+        if( __dos87real ) {
+            __fstcw( &control_word );
+            if( mask != 0 ) {
+                control_word = (control_word & ~mask) | (new & mask);
+                __fldcw( &control_word );
+                __fstcw( &control_word );
+            }
+        }
+        if( __dos87emucall ) {
+            __dos_emu_fstcw( &control_word );
+            if( mask != 0 ) {
+                control_word = (control_word & ~mask) | (new & mask);
+                __dos_emu_fldcw( &control_word );
+                __dos_emu_fstcw( &control_word );
+            }
+        }
+#else
+        __fstcw( &control_word );
+        if( mask != 0 ) {
+            control_word = (control_word & ~mask) | (new & mask);
+            __fldcw( &control_word );
+            __fstcw( &control_word );               /* 17-sep-91 */
+        }
+#endif
+    }
+    return( control_word );
+}
--- a/watcom/trunk/clib/fpu/cntrlfp.c
+++ b/watcom/trunk/clib/fpu/cntrlfp.c
@@ -0,0 +1,132 @@
+/****************************************************************************
+*
+*                            Open Watcom Project
+*
+*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+*
+*  ========================================================================
+*
+*    This file contains Original Code and/or Modifications of Original
+*    Code as defined in and that are subject to the Sybase Open Watcom
+*    Public License version 1.0 (the 'License'). You may not use this file
+*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+*    provided with the Original Code and Modifications, and is also
+*    available at www.sybase.com/developer/opensource.
+*
+*    The Original Code and all software distributed under the License are
+*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+*    NON-INFRINGEMENT. Please see the License for the specific language
+*    governing rights and limitations under the License.
+*
+*  ========================================================================
+*
+* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+*               DESCRIBE IT HERE!
+*
+****************************************************************************/
+
+
+#include "variety.h"
+#include <math.h>
+#include <float.h>
+#include "rtdata.h"
+
+#if defined(__AXP__)
+
+/*
+ * FPCR Trap Disable Flags
+ */
+#define FPCR_INEXACT        0x40000000
+#define FPCR_UNDERFLOW      0x20000000
+#define FPCR_OVERFLOW       0x00080000
+#define FPCR_ZERODIVIDE     0x00040000
+#define FPCR_INVALID        0x00020000
+
+extern unsigned long _GetFPCR(void);
+extern void          _SetFPCR(unsigned long);
+
+static unsigned int MapToCW(unsigned long fpcr)
+{
+    unsigned int cw;
+
+    /*
+     * The rounding bits are identical but in the highword of the fpcr.
+     */
+    cw = (fpcr >> 16) & ~(_MCW_RC);
+
+    if (fpcr & FPCR_INEXACT)
+        cw &= ~_EM_INEXACT;
+
+    if (fpcr & FPCR_ZERODIVIDE)
+        cw &= ~_EM_ZERODIVIDE;
+
+    if (fpcr & FPCR_OVERFLOW)
+        cw &= ~_EM_OVERFLOW;
+
+    if (fpcr & FPCR_UNDERFLOW)
+        cw &= ~_EM_UNDERFLOW;
+
+    if (fpcr & FPCR_INVALID)
+        cw &= ~_EM_INVALID;
+
+    return cw;
+} /* MapToCW() */
+
+
+static unsigned long MapFromCW(unsigned int cw)
+{
+    unsigned long fpcr = 0L;
+
+    /*
+     * The rounding bits are identical but in the highword of the fpcr.
+     */
+    fpcr = (cw & ~_MCW_RC) << 16;
+
+    if (!(cw & _EM_INEXACT))
+        fpcr |= FPCR_INEXACT;
+
+    if (!(cw & _EM_INVALID))
+        fpcr |= FPCR_INVALID;
+
+    if (!(cw & _EM_ZERODIVIDE))
+        fpcr |= FPCR_ZERODIVIDE;
+
+    if (!(cw & _EM_OVERFLOW))
+        fpcr |= FPCR_OVERFLOW;
+
+    if (!(cw & _EM_UNDERFLOW))
+        fpcr |= FPCR_UNDERFLOW;
+
+    return fpcr;
+} /* MapFromCW() */
+#endif
+
+
+_WCRTLINK unsigned _controlfp(unsigned new, unsigned mask)
+{
+#if defined(_M_IX86)
+    return _control87(new, mask);               /* JBS 99/09/16 */
+#elif defined(__AXP__)
+    unsigned int  cw;
+
+    cw = MapToCW(_GetFPCR());
+
+    if (mask)
+    {
+        cw = (cw & ~mask) | (new & mask);
+        _SetFPCR(MapFromCW(cw));
+    }
+
+    return cw;
+#elif defined(__PPC__)
+    // No idea yet
+    return( 0 );
+#elif defined(__MIPS__)
+    // No idea yet either
+    return( 0 );
+#endif
+} /* _controlfp() */
--- a/watcom/trunk/clib/fpu/fclex387.c
+++ b/watcom/trunk/clib/fpu/fclex387.c
@@ -0,0 +1,43 @@
+/****************************************************************************
+*
+*                            Open Watcom Project
+*
+*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+*
+*  ========================================================================
+*
+*    This file contains Original Code and/or Modifications of Original
+*    Code as defined in and that are subject to the Sybase Open Watcom
+*    Public License version 1.0 (the 'License'). You may not use this file
+*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+*    provided with the Original Code and Modifications, and is also
+*    available at www.sybase.com/developer/opensource.
+*
+*    The Original Code and all software distributed under the License are
+*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+*    NON-INFRINGEMENT. Please see the License for the specific language
+*    governing rights and limitations under the License.
+*
+*  ========================================================================
+*
+* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
+*               DESCRIBE IT HERE!
+*
+****************************************************************************/
+
+
+#include "rtdata.h"
+#include "variety.h"
+#include "clearfpe.h"
+
+extern void __ClearFPE(void);
+#pragma aux __ClearFPE = "fnclex"
+
+void _ClearFPE( void )
+{
+    __ClearFPE();
+} /* _ClearFPE() */
--- a/watcom/trunk/clib/fpu/ini87386.asm
+++ b/watcom/trunk/clib/fpu/ini87386.asm
@@ -0,0 +1,68 @@
+;*****************************************************************************
+;*
+;*                            Open Watcom Project
+;*
+;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
+;*
+;*  ========================================================================
+;*
+;*    This file contains Original Code and/or Modifications of Original
+;*    Code as defined in and that are subject to the Sybase Open Watcom
+;*    Public License version 1.0 (the 'License'). You may not use this file
+;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
+;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
+;*    provided with the Original Code and Modifications, and is also
+;*    available at www.sybase.com/developer/opensource.
+;*
+;*    The Original Code and all software distributed under the License are
+;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
+;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
+;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
+;*    NON-INFRINGEMENT. Please see the License for the specific language
+;*    governing rights and limitations under the License.
+;*
+;*  ========================================================================
+;*
+;* Description:  routine for checking FPU type
+;*
+;*****************************************************************************
+
+
+include mdef.inc
+
+        modstart init8087
+
+        xdefp   __x87id
+
+__x87id proc
+        sub     EAX,EAX
+        push    EAX                     ; allocate space for status word
+        finit                           ; use default infinity mode
+        fstcw   word ptr [ESP]          ; save control word
+        fwait
+        pop     EAX
+        mov     AL,0
+        cmp     AH,3
+        jnz     nox87
+        push    EAX                     ; allocate space for status word
+        fld1                            ; generate infinity by
+        fldz                            ;   dividing 1 by 0
+        fdiv                            ; ...
+        fld     st                      ; form negative infinity
+        fchs                            ; ...
+        fcompp                          ; compare +/- infinity
+        fstsw   word ptr [ESP]          ; equal for 87/287
+        fwait                           ; wait fstsw to complete
+        pop     EAX                     ; get NDP status word
+        mov     AL,2                    ; assume 80287
+        sahf                            ; store condition bits in flags
+        jz      not387                  ; it's 287 if infinities equal
+        mov     AL,3                    ; indicate 80387
+not387: finit                           ; re-initialize the 8087
+nox87:  mov     AH,0
+        ret                             ; return
+__x87id endp
+
+        endmod
+        end