summaryrefslogtreecommitdiffstats
path: root/private/ntos/dll/i386
diff options
context:
space:
mode:
Diffstat (limited to 'private/ntos/dll/i386')
-rw-r--r--private/ntos/dll/i386/critsect.asm285
-rw-r--r--private/ntos/dll/i386/emarith.asm335
-rw-r--r--private/ntos/dll/i386/emdecode.asm39
-rw-r--r--private/ntos/dll/i386/emdisp.asm298
-rw-r--r--private/ntos/dll/i386/emerror.asm469
-rw-r--r--private/ntos/dll/i386/emf386.asm552
-rw-r--r--private/ntos/dll/i386/emfadd.asm396
-rw-r--r--private/ntos/dll/i386/emfcom.asm402
-rw-r--r--private/ntos/dll/i386/emfconst.asm126
-rw-r--r--private/ntos/dll/i386/emfdiv.asm473
-rw-r--r--private/ntos/dll/i386/emfinit.asm46
-rw-r--r--private/ntos/dll/i386/emfmisc.asm81
-rw-r--r--private/ntos/dll/i386/emfmul.asm238
-rw-r--r--private/ntos/dll/i386/emfprem.asm407
-rw-r--r--private/ntos/dll/i386/emfsqrt.asm267
-rw-r--r--private/ntos/dll/i386/emftran.asm1206
-rw-r--r--private/ntos/dll/i386/emload.asm416
-rw-r--r--private/ntos/dll/i386/emlsbcd.asm279
-rw-r--r--private/ntos/dll/i386/emlsenv.asm457
-rw-r--r--private/ntos/dll/i386/emround.asm712
-rw-r--r--private/ntos/dll/i386/emsincos.asm571
-rw-r--r--private/ntos/dll/i386/emstack.inc72
-rw-r--r--private/ntos/dll/i386/emstore.asm803
-rw-r--r--private/ntos/dll/i386/emtrig.asm863
-rw-r--r--private/ntos/dll/i386/emulator.asm267
-rw-r--r--private/ntos/dll/i386/emxtract.asm309
-rw-r--r--private/ntos/dll/i386/ldrthunk.asm89
-rw-r--r--private/ntos/dll/i386/npxemltr.def17
-rw-r--r--private/ntos/dll/i386/npxnp.c204
-rw-r--r--private/ntos/dll/i386/nt386npx.inc40
-rw-r--r--private/ntos/dll/i386/ntnapntr.asm449
-rw-r--r--private/ntos/dll/i386/sources14
32 files changed, 11182 insertions, 0 deletions
diff --git a/private/ntos/dll/i386/critsect.asm b/private/ntos/dll/i386/critsect.asm
new file mode 100644
index 000000000..4b1f87cb4
--- /dev/null
+++ b/private/ntos/dll/i386/critsect.asm
@@ -0,0 +1,285 @@
+ title "Critical Section Support"
+;++
+;
+; Copyright (c) 1991 Microsoft Corporation
+;
+; Module Name:
+;
+; critsect.asm
+;
+; Abstract:
+;
+; This module implements functions to support user mode critical sections.
+;
+; Author:
+;
+; Bryan M. Willman (bryanwi) 2-Oct-91
+;
+; Environment:
+;
+; Any mode.
+;
+; Revision History:
+;
+;
+;
+; WARNING!!!!!!!!!! This code is duplicated in
+; windows\base\client\i386\critsect.asm
+;
+; Some day we should put it in a .inc file that both include.
+;
+;--
+
+.486p
+ .xlist
+include ks386.inc
+include callconv.inc ; calling convention macros
+ .list
+
+_DATA SEGMENT DWORD PUBLIC 'DATA'
+ public _LdrpLockPrefixTable
+_LdrpLockPrefixTable label dword
+ dd offset FLAT:Lock1
+ dd offset FLAT:Lock2
+ dd offset FLAT:Lock3
+ dd offset FLAT:Lock4
+ dd offset FLAT:Lock5
+ dd 0
+_DATA ENDS
+
+_TEXT SEGMENT PARA PUBLIC 'CODE'
+ ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+ EXTRNP _RtlpWaitForCriticalSection,1
+ EXTRNP _RtlpUnWaitCriticalSection,1
+if DEVL
+ EXTRNP _RtlpNotOwnerCriticalSection,1
+endif
+
+CriticalSection equ [esp + 4]
+
+ page , 132
+ subttl "RtlEnterCriticalSection"
+
+;++
+;
+; NTSTATUS
+; RtlEnterCriticalSection(
+; IN PRTL_CRITICAL_SECTION CriticalSection
+; )
+;
+; Routine Description:
+;
+; This function enters a critical section.
+;
+; Arguments:
+;
+; CriticalSection - supplies a pointer to a critical section.
+;
+; Return Value:
+;
+; STATUS_SUCCESS or raises an exception if an error occured.
+;
+;--
+
+ align 16
+cPublicProc _RtlEnterCriticalSection,1
+cPublicFpo 1,0
+
+ mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb()
+ mov edx,CriticalSection ; interlocked inc of
+ mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+
+if DBG
+ cmp dword ptr TbSpare1[ecx],0
+ jz @f
+ int 3
+@@:
+endif ; DBG
+Lock1:
+ lock inc dword ptr CsLockCount[edx] ; ... CriticalSection->LockCount
+ jnz @F
+
+setowner:
+ mov CsOwningThread[edx],eax
+ mov dword ptr CsRecursionCount[edx],1
+
+if DBG
+ inc dword ptr TbCountOfOwnedCriticalSections[ecx]
+ push edi
+ mov edi,CsDebugInfo[edx]
+ inc dword ptr CsEntryCount[edi]
+ pop edi
+endif ; DBG
+
+ xor eax,eax
+ stdRET _RtlEnterCriticalSection
+
+ align 16
+@@:
+ cmp CsOwningThread[edx],eax
+ jne @F
+ inc dword ptr CsRecursionCount[edx]
+if DBG
+ mov eax,CsDebugInfo[edx]
+ inc dword ptr CsEntryCount[eax]
+endif ; DBG
+ xor eax,eax
+ stdRET _RtlEnterCriticalSection
+
+@@:
+ stdCall _RtlpWaitForCriticalSection, <edx>
+ mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb()
+ mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+ mov edx,CriticalSection
+ jmp setowner
+
+stdENDP _RtlEnterCriticalSection
+
+ page , 132
+ subttl "RtlLeaveCriticalSection"
+;++
+;
+; NTSTATUS
+; RtlLeaveCriticalSection(
+; IN PRTL_CRITICAL_SECTION CriticalSection
+; )
+;
+; Routine Description:
+;
+; This function leaves a critical section.
+;
+; Arguments:
+;
+; CriticalSection - supplies a pointer to a critical section.
+;
+; Return Value:
+;
+; STATUS_SUCCESS or raises an exception if an error occured.
+;
+;--
+
+ align 16
+cPublicProc _RtlLeaveCriticalSection,1
+cPublicFpo 1,0
+
+ mov edx,CriticalSection
+if DBG
+ mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb()
+ mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+ cmp eax,CsOwningThread[edx]
+ je @F
+ stdCall _RtlpNotOwnerCriticalSection, <edx>
+ mov eax,STATUS_INVALID_OWNER
+ stdRET _RtlLeaveCriticalSection
+@@:
+endif ; DBG
+ xor eax,eax ; Assume STATUS_SUCCESS
+ dec dword ptr CsRecursionCount[edx]
+ jnz leave_recurs ; skip if only leaving recursion
+
+ mov CsOwningThread[edx],eax ; clear owning thread id
+
+if DBG
+ mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb()
+ dec dword ptr TbCountOfOwnedCriticalSections[ecx]
+endif ; DBG
+
+Lock2:
+ lock dec dword ptr CsLockCount[edx] ; interlocked dec of
+ ; ... CriticalSection->LockCount
+ jge @F
+ stdRET _RtlLeaveCriticalSection
+
+@@:
+ stdCall _RtlpUnWaitCriticalSection, <edx>
+ xor eax,eax ; return STATUS_SUCCESS
+ stdRET _RtlLeaveCriticalSection
+
+ align 16
+leave_recurs:
+Lock3:
+ lock dec dword ptr CsLockCount[edx] ; interlocked dec of
+ ; ... CriticalSection->LockCount
+ stdRET _RtlLeaveCriticalSection
+
+_RtlLeaveCriticalSection endp
+
+ page ,132
+ subttl "RtlTryEnterCriticalSection"
+;++
+;
+; BOOL
+; RtlTryEnterCriticalSection(
+; IN PRTL_CRITICAL_SECTION CriticalSection
+; )
+;
+; Routine Description:
+;
+; This function attempts to enter a critical section without blocking.
+;
+; Arguments:
+;
+; CriticalSection (a0) - Supplies a pointer to a critical section.
+;
+; Return Value:
+;
+; If the critical section was successfully entered, then a value of TRUE
+; is returned as the function value. Otherwise, a value of FALSE is returned.
+;
+;--
+
+CriticalSection equ [esp + 4]
+
+cPublicProc _RtlTryEnterCriticalSection,1
+cPublicFpo 1,0
+
+ mov ecx,CriticalSection ; interlocked inc of
+ mov eax, -1 ; set value to compare against
+ mov edx, 0 ; set value to set
+Lock4:
+ lock cmpxchg dword ptr CsLockCount[ecx],edx ; Attempt to acquire critsect
+ jnz short tec10 ; if nz, critsect already owned
+
+ mov eax,fs:TbClientId+4 ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+ mov CsOwningThread[ecx],eax
+ mov dword ptr CsRecursionCount[ecx],1
+
+if DBG
+ mov eax,fs:PcTeb ; (ecx) == NtCurrentTeb()
+ inc dword ptr TbCountOfOwnedCriticalSections[eax]
+endif ; DBG
+
+ mov eax, 1 ; set successful status
+
+ stdRET _RtlTryEnterCriticalSection
+
+tec10:
+;
+; The critical section is already owned. If it is owned by another thread,
+; return FALSE immediately. If it is owned by this thread, we must increment
+; the lock count here.
+;
+ mov eax, fs:TbClientId+4 ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+ cmp CsOwningThread[ecx], eax
+ jz tec20 ; if eq, this thread is already the owner
+ xor eax, eax ; set failure status
+ stdRET _RtlTryEnterCriticalSection
+
+tec20:
+;
+; This thread is already the owner of the critical section. Perform an atomic
+; increment of the LockCount and a normal increment of the RecursionCount and
+; return success.
+;
+Lock5:
+ lock inc dword ptr CsLockCount[ecx]
+ inc dword ptr CsRecursionCount[ecx]
+ mov eax, 1
+ stdRET _RtlTryEnterCriticalSection
+
+stdENDP _RtlTryEnterCriticalSection
+
+
+_TEXT ends
+ end
diff --git a/private/ntos/dll/i386/emarith.asm b/private/ntos/dll/i386/emarith.asm
new file mode 100644
index 000000000..3b09de0b6
--- /dev/null
+++ b/private/ntos/dll/i386/emarith.asm
@@ -0,0 +1,335 @@
+ subttl emarith.asm - Arithmetic Operations
+ page
+;*******************************************************************************
+;emarith.asm - Arithmetic Operations
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; Arithmetic Operations
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+ NextStackWrap esi,TwoOp ;Tied to NextStackElem below
+
+EM_ENTRY eFPREM
+eFPREM:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PremCont ;Return address if normal
+PremPointTopTwo:
+ push offset PremSpclDone ;Return address if special
+ mov ebp,offset tFpremDisp
+PointTopTwo:
+ mov esi,edi
+ NextStackElem esi,TwoOp
+TwoOpSiDi:
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+TwoOpSetResult:
+ mov EMSEG:[Result],edi ;Save result pointer
+TwoOpResultSet:
+ mov ah,EMSEG:[edi].bTag
+TwoOpDispAh:
+ mov al,cl
+TwoOpDispatch:
+ and eax,TAG_MASK + 100H*TAG_MASK ;Look at internal tags only
+ shl al,TAG_SHIFT
+ or al,ah
+ xor ah,ah ;Zero ah
+;UNDONE: masm bug! ebp + scaled index requires a displacement.
+;UNDONE: No displacement is needed here, so masm should generate a
+;UNDONE: zero. It doesn't! dec eax so we can add 4*1 back.
+ dec eax ;UNDONE
+ jmp dword ptr cs:[ebp+4*eax+4];UNDONE Go to appropriate routine.
+
+EM_ENTRY eFPREM1
+eFPREM1:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset Prem1Cont ;Return address if normal
+ jmp PremPointTopTwo
+
+EM_ENTRY eFSCALE
+eFSCALE:
+ mov ebp,offset tFscaleDisp
+ jmp PointTopTwo
+
+EM_ENTRY eFPATAN
+eFPATAN:
+ mov ebp,offset tFpatanDisp
+TopTwoPop:
+ push offset PopWhenDone
+ mov esi,edi
+ add edi,Reg87Len ;edi = ST(1)
+ cmp edi,ENDstk
+ jb TwoOpSiDi
+ mov edi,BEGstk
+ jmp TwoOpSiDi
+
+EM_ENTRY eFYL2X
+eFYL2X:
+ mov ebp,offset tFyl2xDisp
+ jmp TopTwoPop
+
+EM_ENTRY eFYL2XP1
+eFYL2XP1:
+ mov ebp,offset tFyl2xp1Disp
+ jmp TopTwoPop
+
+;*******************************************************************************
+
+page
+;-----------------------------------------------------------;
+; ;
+; Special Case Routines for Arithmetic Functions ;
+; ;
+;-----------------------------------------------------------;
+
+;There are four kinds of "specials", encoded in the tag:
+;
+; Empty
+; Infinity
+; NAN (which can be QNAN or SNAN)
+; Denormal
+;
+;Empty always results in an Invalid Operation exception with Stack Flag set
+;and C1 (O/U#) bit clear, and returns Indefinite (a specific QNAN).
+;
+;Operations on NAN return the same NAN except it is always modified to a
+;QNAN. If both operands are NAN, the one with the larger mantissa is
+;returned. An SNAN causes an Invalid Operation exception except for
+;internal FP stack operations, FCHS, and FABS. A QNAN does not cause
+;and exception.
+;
+;Operations on Infinity return a result depending on the operation.
+;
+;UNDONE: Old code plays with sign of NAN when two NANs with equal
+;mantissas are used. Why?
+
+;"***" means entry point from dispatch tables
+
+;***
+DivSpclSource:
+ cmp cl,bTAG_INF
+ jnz SpclSource
+;Division by infinity always returns zero
+ xor ch,EMSEG:[edi].bSgn
+ jmp SignedZero ;in emfmul.asm
+
+;***
+MulSpclSource:
+ cmp cl,bTAG_INF
+ jnz SpclSource
+MulByInf:
+ cmp EMSEG:[edi].bTag,bTAG_ZERO ;Infinity * zero?
+ jz ReturnIndefinite
+XorSourceSign:
+ xor ch,EMSEG:[edi].bSgn
+ jmp SaveResultEdi
+
+;***
+AddSpclSource:
+ cmp cl,bTAG_INF
+ jnz SpclSource
+ xor ch,dl ;Flip sign of infinity if subtracting
+ jmp SaveResultEdi
+
+DenormalSource:
+ mov cl,bTAG_VALID ;Change denormal to DOUBLE
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jnz TwoOpResultSet
+AbortOp:
+ mov cl,bTAG_NOPOP ;Unmasked, don't pop stack
+ ret
+
+DenormalDisp:
+;Repeat dispatch, but for normal ops
+ jmp dword ptr cs:[ebp+4*(TAG_VALID + TAG_VALID shl TAG_SHIFT)]
+
+;***
+DivrSpclSource:
+ cmp cl,bTAG_INF
+ jz XorSourceSign ;Return infinity
+SpclSource:
+ cmp cl,bTAG_DEN
+ jz DenormalSource
+ cmp cl,bTAG_EMPTY
+ jz StackError
+;Must be a NAN
+SourceNAN:
+ test ebx,1 shl 30 ;Check for SNAN
+ jnz SaveResultEdi ;If QNAN, just use it as result
+SourceSNAN:
+ or EMSEG:[CURerr],Invalid ;Flag the error
+ or ebx,1 shl 30 ;Make it into a QNAN
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jnz SaveResultEdi ;If so, update with masked response
+ mov cl,bTAG_NOPOP ;Unmasked, don't pop stack
+ ret
+
+
+;***
+DivrSpclDest:
+ mov eax,EMSEG:[edi].ExpSgn ;Pick up tag
+ cmp al,bTAG_INF
+ jnz SpclDest
+;Division by infinity always returns zero
+ xor ch,ah
+ jmp SignedZero ;in emfmul.asm
+
+;***
+MulSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF
+ jnz SpclDest
+ cmp cl,bTAG_ZERO ;Infinity * zero?
+ jz ReturnIndefinite
+XorDestSign:
+ xor EMSEG:[edi].bSgn,ch ;Xor signs
+ ret
+
+;***
+AddSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF
+ jnz SpclDest
+ xor EMSEG:[edi].bSgn,dh ;Flip sign of infinity if subtracting
+ ret
+
+DenormalDest:
+ mov ah,bTAG_VALID ;Change denormal to DOUBLE
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jnz TwoOpDispAh
+ mov cl,bTAG_NOPOP ;Unmasked, don't pop stack
+ ret
+
+;***
+DivSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF
+ jz XorDestSign ;Return infinity
+SpclDest:
+ cmp al,bTAG_DEN
+ jz DenormalDest
+SpclDestNotDen:
+ cmp al,bTAG_EMPTY
+ jz StackError
+;Must be a NAN
+DestNAN:
+ test EMSEG:[edi].bMan7,40H ;Check for SNAN
+ jnz ReturnDest ;If QNAN, just use it as result
+DestSNAN:
+ or EMSEG:[CURerr],Invalid ;Flag the error
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz AbortOp ;No - preserve value
+ or EMSEG:[edi].bMan7,40H ;Make it into a QNAN
+ ret
+
+StackError:
+ mov EMSEG:[CURerr],Invalid+StackFlag
+ReturnIndefinite:
+ or EMSEG:[CURerr],Invalid
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz AbortOp ;No - preserve value
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].lManHi,0C0000000H
+ mov EMSEG:[edi].ExpSgn,TexpMax shl 16 + bSign shl 8 + bTAG_NAN
+ReturnDest:
+ ret
+
+
+AddTwoInf:
+;Adding two infinites.
+;If signs are the same, return that infinity. Otherwise, Invalid Operation.
+ xor ch,dl ;Possibly subtracting source
+ xor ah,dh ;Possibly subtracting dest
+ xor ch,ah ;Compare signs
+ js ReturnIndefinite
+ mov EMSEG:[edi].bSgn,ah ;Correct the sign if subtracting
+ ret
+
+;***
+TwoOpBothSpcl:
+;ebp = dispatch table address
+ mov al,EMSEG:[edi].bTag
+ mov ah,cl
+ cmp ax,(bTAG_NAN shl 8) + bTag_NAN ;Are both NAN?
+ jz TwoNANs
+ cmp cl,bTAG_EMPTY
+ jz StackError
+ cmp al,bTAG_EMPTY
+ jz StackError
+ cmp cl,bTAG_NAN
+ jz SourceNAN
+ cmp al,bTAG_NAN
+ jz DestNAN
+ cmp ax,(bTAG_INF shl 8) + bTag_INF ;Are both infinity?
+ jz TwoInfs
+;At least one of the operands is a denormal
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jz AbortOp ;If not, don't do operation
+;Denormal exception is masked, treat denormals as VALID
+;Dispatch through operation table in ebp again
+ cmp ax,(bTAG_DEN shl 8) + bTag_DEN ;Are both denormal?
+ jz DenormalDisp
+;Have an infinity and a denormal
+ cmp al,bTAG_INF
+ jz DestInf
+;Source is denormal, Dest is infinity
+ jmp dword ptr [ebp+4*(TAG_SPCL + TAG_VALID shl TAG_SHIFT)]
+
+DestInf:
+;Source is infinity, Dest is denormal
+ jmp dword ptr [ebp+4*(TAG_VALID + TAG_SPCL shl TAG_SHIFT)]
+
+TwoNANs:
+;Two NANs. Use largest mantissa
+ cmp ebx,EMSEG:[edi].lManHi
+ ja BiggerNAN
+ jb DestBigger
+;Now we know they're both the same type, SNAN or QNAN
+ cmp esi,EMSEG:[edi].lManLo
+ ja SourceNAN
+;UNDONE: Old code did funny business with signs when mantissas were equal
+ jmp DestNAN
+
+BiggerNAN:
+ test EMSEG:[edi].bMan7,40H ;Is smaller one SNAN?
+ jz SourceSNAN
+ jmp SourceNAN
+
+DestBigger:
+ test ebx,40H ;Is smaller one SNAN?
+ jz DestSNAN
+ jmp DestNAN
+
+TwoInfs:
+ mov ah,EMSEG:[edi].bSgn
+ jmp dword ptr [ebp+4*16] ;Go do code for two infinites
+
+
+;***
+DivideByMinusZero:
+ mov ch,bSign
+;***
+DivideByZero:
+ or EMSEG:[CURerr],ZeroDivide
+ test EMSEG:[CWmask],ZeroDivide ;Is exception masked?
+ jz AbortOp ;No - preserve value
+;Set up a signed infinity
+ xor ch,EMSEG:[edi].bSgn ;Get result sign
+ and ecx,1 shl 15 ;Keep only sign bit
+ or ecx,(4000H+TexpBias) shl 16 + bTAG_INF ;ExpSgn of infinity
+ mov ebx,1 shl 31
+ xor esi,esi
+ jmp SaveResultEdi
diff --git a/private/ntos/dll/i386/emdecode.asm b/private/ntos/dll/i386/emdecode.asm
new file mode 100644
index 000000000..242a8aa69
--- /dev/null
+++ b/private/ntos/dll/i386/emdecode.asm
@@ -0,0 +1,39 @@
+ subttl emdecode.asm - Instruction decoding
+ page
+;***
+;emdecode.asm - Instruction decoding
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+;
+; All Rights Reserved
+;
+;Purpose:
+; Further decoding of instructions done here.
+;
+;Revision History:
+;
+; 8/23/91 TP Rewritten for 32 bits
+;
+;*******************************************************************************
+
+;On entry, eax = r/m bits * 4. This is used to jump directly to the
+;correct instruction within the group.
+
+GroupFCHS:
+ jmp tGroupFCHSdisp[eax]
+
+GroupFLD1:
+ jmp tGroupFLD1disp[eax]
+
+GroupF2XM1:
+ jmp tGroupF2XM1disp[eax]
+
+GroupFPREM:
+ jmp tGroupFPREMdisp[eax]
+
+GroupFENI:
+ jmp tGroupFENIdisp[eax]
+
+
diff --git a/private/ntos/dll/i386/emdisp.asm b/private/ntos/dll/i386/emdisp.asm
new file mode 100644
index 000000000..7e7402f66
--- /dev/null
+++ b/private/ntos/dll/i386/emdisp.asm
@@ -0,0 +1,298 @@
+ subttl emdisp.asm - Emulator Dispatch Tables
+ page
+;
+; IBM/Microsoft Confidential
+;
+; Copyright (c) IBM Corporation 1987, 1989
+; Copyright (c) Microsoft Corporation 1987, 1989
+;
+; All Rights Reserved
+;
+;Revision History: (also see emulator.hst)
+;
+; 1/21/92 JWM Minor modifications for DOSX32 emulator
+; 8/23/91 TP Direct dispatch off of 6-bit opcode
+; 10/30/89 WAJ Added this header.
+;
+;*******************************************************************************
+
+;*********************************************************************;
+; ;
+; Dispatch Tables ;
+; ;
+;*********************************************************************;
+
+
+; These tables are based upon the layout of the 8087 instructions
+;
+; 8087 instruction fields: |escape|MF|Arith|MOD|Op|r/m|disp1|disp2|
+; field length in bits: 5 2 1 2 3 3 8 8
+;
+; Disp1 and Disp2 are optional address bytes present only if MOD <> 11.
+; When (MOD <> 11) r/m describes which regs (SI,DI,BX,BP) are added to
+; Disp1 and Disp2 to calculate the effective address. This form
+; (memory format) is used for Loads, Stores, Compares, and Arithmetic
+; When using memory format MF determines the Type of the Memory operand
+; i.e. Single Real, Double real, Single Integer, or Double Integer
+; Arith is 0 for Arithmetic opetations (and compares), set to 1 otherwise
+; Op mostly determines which type of operation to do though when not in
+; memory format some of that is coded into MF and r/m
+; All of the tables are set up to do a jump based upon one or more of the
+; above fields. The outline for decoding instructions is:
+;
+; IF (memory format) THEN
+; Assemble Effective Address (using MOD and r/m and EffectiveAddressTab)
+; Jump through table to operation, using MF, Arith and Op bits
+; ELSE (Register format)
+; Jump through table to operation, using MF, Arith and Op bits
+
+ ALIGN 4
+
+;*********************************************************************;
+;
+; Memory address calculation tables
+
+EA386Tab label dword ; Uses |r/m|MOD+1| for indexing
+ dd NoEffectiveAddress
+ dd Exx00 ; eax
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd Exx00 ; ecx
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd Exx00 ; edx
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd Exx00 ; ebx
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd SIB00 ; esp (S-I-B follows)
+ dd SIB01
+ dd SIB10
+ dd NoEffectiveAddress
+ dd Direct386 ; ebp (00 = direct addressing)
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd Exx00 ; esi
+ dd Exx01
+ dd Exx10
+ dd NoEffectiveAddress
+ dd Exx00 ; edi
+ dd Exx01
+ dd Exx10
+
+;*********************************************************************;
+;
+;Opcode dispatching tables
+;Indexed by | op1 | op2 |0 0| (op1 = MF|Arith)
+
+ public tOpRegDisp
+tOpRegDisp label dword
+ dd eFADDtop
+ dd eFMULtop
+ dd eFCOM
+ dd eFCOMP
+ dd eFSUBtop
+ dd eFSUBRtop
+ dd eFDIVtop
+ dd eFDIVRtop
+
+ dd eFLDreg
+ dd eFXCH
+ dd eFNOP ;UNDONE: also reserved on 387
+ dd eFSTP ;Special form 1
+ dd GroupFCHS ;FCHS,FABS,FTST,FXAM
+ dd GroupFLD1 ;FLD1,FLDL2T,FLDL2E,FLDPI,FLDLG2,FLDLN2,FLDZ
+ dd GroupF2XM1 ;F2XM1,FYL2X,FPTAN,FPATAN,FXTRACT,FPREM1,FDECSTP,FINCSTP
+ dd GroupFPREM ;FPREM,FYL2XP1,FSQRT,FSINCOS,FRNDINT,FSCALE,FSIN,FCOS
+
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+ dd eFUCOMPP ;UNDONE: also reserved on 387
+ dd UNUSED
+ dd UNUSED
+
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+ dd GroupFENI ;FENI,FDISI,FCLEX,FINIT
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+
+ dd eFADDreg
+ dd eFMULreg
+ dd eFCOM ;Special form 2
+ dd eFCOMP ;Special form 3
+ dd eFSUBRreg
+ dd eFSUBreg
+ dd eFDIVRreg
+ dd eFDIVreg
+
+ dd eFFREE
+ dd eFXCH ;Special form 4
+ dd eFST
+ dd eFSTP
+ dd eFUCOM
+ dd eFUCOMP
+ dd UNUSED
+ dd UNUSED
+
+ dd eFADDPreg
+ dd eFMULPreg
+ dd eFCOMP ;Special form 5
+ dd eFCOMPP ;UNDONE: also reserved on 387
+ dd eFSUBRPreg
+ dd eFSUBPreg
+ dd eFDIVRPreg
+ dd eFDIVPreg
+
+ dd eFFREE ;Special form 6 UNDONE: "and pop stack"?
+ dd eFXCH ;Special form 7
+ dd eFSTP ;Special form 8
+ dd eFSTP ;Special form 9
+ dd eFSTSWax ;UNDONE: also reserved on 387
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+
+
+tOpMemDisp label dword
+;MF = 00 (32-bit Real), Arith = 0
+ dd eFADD32
+ dd eFMUL32
+ dd eFCOM32
+ dd eFCOMP32
+ dd eFSUB32
+ dd eFSUBR32
+ dd eFDIV32
+ dd eFDIVR32
+;MF = 00 (32-bit Real), Arith = 1
+ dd eFLD32
+ dd UNUSED
+ dd eFST32
+ dd eFSTP32
+ dd eFLDENV
+ dd eFLDCW
+ dd eFSTENV
+ dd eFSTCW
+;MF = 01 (32-bit Int), Arith = 0
+ dd eFIADD32
+ dd eFIMUL32
+ dd eFICOM32
+ dd eFICOMP32
+ dd eFISUB32
+ dd eFISUBR32
+ dd eFIDIV32
+ dd eFIDIVR32
+;MF = 01 (32-bit Int), Arith = 1
+ dd eFILD32
+ dd UNUSED
+ dd eFIST32
+ dd eFISTP32
+ dd UNUSED
+ dd eFLD80
+ dd UNUSED
+ dd eFSTP80
+;MF = 10 (64-bit Real), Arith = 0
+ dd eFADD64
+ dd eFMUL64
+ dd eFCOM64
+ dd eFCOMP64
+ dd eFSUB64
+ dd eFSUBR64
+ dd eFDIV64
+ dd eFDIVR64
+;MF = 10 (64-bit Real), Arith = 1
+ dd eFLD64
+ dd UNUSED
+ dd eFST64
+ dd eFSTP64
+ dd eFRSTOR
+ dd UNUSED
+ dd eFSAVE
+ dd eFSTSW
+;MF = 11 (16-bit Int), Arith = 0
+ dd eFIADD16
+ dd eFIMUL16
+ dd eFICOM16
+ dd eFICOMP16
+ dd eFISUB16
+ dd eFISUBR16
+ dd eFIDIV16
+ dd eFIDIVR16
+;MF = 11 (16-bit Int), Arith = 1
+ dd eFILD16
+ dd UNUSED
+ dd eFIST16
+ dd eFISTP16
+ dd eFBLD
+ dd eFILD64
+ dd eFBSTP
+ dd eFISTP64
+
+
+tGroupFLD1disp label dword
+ dd eFLD1
+ dd eFLDL2T
+ dd eFLDL2E
+ dd eFLDPI
+ dd eFLDLG2
+ dd eFLDLN2
+ dd eFLDZ
+ dd UNUSED
+
+
+tGroupF2XM1disp label dword
+ dd eF2XM1
+ dd eFYL2X
+ dd eFPTAN
+ dd eFPATAN
+ dd eFXTRACT
+ dd eFPREM1
+ dd eFDECSTP
+ dd eFINCSTP
+
+
+tGroupFCHSdisp label dword
+ dd eFCHS
+ dd eFABS
+ dd UNUSED
+ dd UNUSED
+ dd eFTST
+ dd eFXAM
+ dd UNUSED
+ dd UNUSED
+
+
+tGroupFPREMdisp label dword
+ dd eFPREM
+ dd eFYL2XP1
+ dd eFSQRT
+ dd eFSINCOS
+ dd eFRNDINT
+ dd eFSCALE
+ dd eFSIN
+ dd eFCOS
+
+
+tGroupFENIdisp label dword
+ dd eFENI
+ dd eFDISI
+ dd eFCLEX
+ dd eFINIT
+ dd eFSETPM
+ dd UNUSED
+ dd UNUSED
+ dd UNUSED
+
+
diff --git a/private/ntos/dll/i386/emerror.asm b/private/ntos/dll/i386/emerror.asm
new file mode 100644
index 000000000..3fc135a06
--- /dev/null
+++ b/private/ntos/dll/i386/emerror.asm
@@ -0,0 +1,469 @@
+ page ,132
+ subttl emerror.asm - Emulator error handler
+;***
+;emerror.asm - Emulator error handler
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1987, 1991
+;
+; All Rights Reserved
+;
+;Purpose:
+; Emulator error handler
+;
+;Revision History: (also see emulator.hst)
+;
+; 10/30/89 WAJ Added this header.
+; 11/15/89 WAJ Major changes for Dos32RaiseExcpetion().
+; 12/01/89 WAJ Now set cbExceptionInfo correctly.
+; 02/08/90 WAJ Fixed GP fault in 32 bit exception handler.
+; 09/03/91 JWM Modified entry/exit sequence for DOSX32.
+; 02/15/92 JWM Adapted for NT.
+;
+;*******************************************************************************
+
+ifdef _DOS32EXT
+include except32.inc
+endif
+
+;*** error_return - return to user code (regardless of error)
+;
+; This macro returns to user code. It goes to some lengths
+; to restore the flags on the instruction immediately before
+; the return so that any pending trace trap will be
+; acknowledged immediately after the retfd (and before the
+; next user instruction) instead of after the instruction
+; following the return as would be the case if we returned
+; using iretd.
+;
+; ENTRY ((SS:ESP)) = user's EAX
+; ((SS:ESP)+4) = return EIP
+; ((SS:ESP)+8) = return CS
+; ((SS:ESP)+12) = user's EFLAGS
+; EXIT return to user program, above arguments
+; popped off stack, user's EAX and EFLAGS
+; restored.
+
+error_return macro noerror
+ifdef _DOS32EXT
+ sti ; JWM, 9/3/91
+ push dword ptr [esp+8] ; JWM, 9/6/91
+ popfd ; JWM, 9/6/91
+endif ; DOS32EXT
+
+ifdef NT386
+if DBG
+ push dword ptr [esp+8] ; On checked build, allow
+ popfd ; single step to work
+endif
+endif
+ iretd
+ endm
+
+
+TESTif macro nam
+ mov bl,err&nam ; default error number
+ if (nam ge 100h)
+ test ah,nam/256
+ else ;not (nam ge 100h)
+ test al,nam
+ endif ;(nam ge 100h)
+ JSNZ signalerror
+ endm
+
+EM_ENTRY eCommonExceptions
+CommonExceptions:
+ mov ebx,[esp].[OldLongStatus]
+ and ebx,LongSavedFlags ;preserve condition codes, error flags
+ or EMSEG:[LongStatusWord],ebx ;merge saved status word, condition codes
+ pop eax
+ pop ecx
+ pop edx
+ pop ebx
+ add esp,4 ; toss esp value
+ pop ebp
+ pop esi
+ pop edi
+ add esp,8 ;toss old PrevCodeOff and StatusWord
+ pop ds
+ call Emexcept
+ error_return noerror
+
+ifdef _DOS32EXT
+
+EmExcept PROC C, OldEIP:DWORD, OldCS:DWORD, OldFlags:DWORD
+
+LOCAL SSAR:DWORD
+LOCAL ec:_DX32_CONTEXT
+
+ ;*
+ ;* Set up SS access rights.
+ ;*
+
+ push ds
+ mov [ec.R_Eax], eax
+ GetEmData ds,ax
+
+ mov eax, ss
+ lar eax, eax
+ mov [SSAR], eax
+
+ ;*
+ ;* Fill in ExceptionContext structure.
+ ;*
+
+
+ mov [ec.NPXContextFlags], NPX_CONTEXT_FULL
+ mov [ec.R_Edi], edi
+ mov [ec.R_Esi], esi
+
+ mov eax, [ebp]
+ mov [ec.R_Ebp], eax
+
+ lea eax, [OldFlags+4]
+ mov [ec.R_Esp], eax
+
+ mov [ec.R_Ebx], ebx
+ mov [ec.R_Edx], edx
+ mov [ec.R_Ecx], ecx
+
+ mov eax, EMSEG:[PrevCodeOff]
+
+ mov [ec.R_Eip], eax
+ mov eax, [OldFlags]
+ mov [ec.EFlags], eax
+
+ mov eax, [OldCS]
+ movzx eax,ax
+ mov [ec.SegCs], eax
+ mov ax,ss
+ movzx eax,ax
+ mov [ec.SegSs], eax
+
+ pop eax
+ movzx eax,ax
+ mov [ec.SegDs], eax ; ds was pushed on entry.
+
+ mov ax,es
+ movzx eax,ax
+ mov [ec.SegEs], eax
+
+ mov ax,fs
+ movzx eax,ax
+ mov [ec.SegFs], eax
+
+ mov ax,gs
+ movzx eax,ax
+ mov [ec.SegGs], eax
+
+ lea esi, [ec]
+ add esi, 4
+
+ push ebp
+ call SaveState
+ pop ebp
+
+ lea eax, [ec]
+ push ds
+ push es
+
+ mov bx, seg FLAT:CURstk
+ mov ds, ebx
+ mov es, ebx
+ push eax
+
+ call DOS32RAISEEXCEPTION
+
+ add esp, 4
+
+ pop es
+ pop ds
+
+RaiseExceptRet:
+ or eax, eax
+ JZ ExceptNotHandled
+
+ ;*
+ ;* Copy new flags, cs, eip to new stack.
+ ;*
+
+ mov ds, [ec.SegSs]
+ mov esi, [ec.R_Esp] ; ds:esi == new ss:esp
+
+ mov eax, [ec.Eflags] ; set up iretd frame
+ mov [esi-4], eax
+
+ mov eax, [ec.SegCs]
+ mov [esi-8], eax
+
+ mov eax, [ec.R_Eip]
+ mov [esi-12], eax
+
+ ;*
+ ;* Put new stack pointer on stack.
+ ;*
+
+ push ds
+ sub esi, 12
+ push esi
+
+ ;*
+ ;* Reset other registers.
+ ;*
+
+ mov edi, [ec.R_Edi]
+ mov esi, [ec.R_Esi]
+ mov ebx, [ec.R_Ebx]
+ mov edx, [ec.R_Edx]
+ mov ecx, [ec.R_Ecx]
+ mov eax, [ec.R_Eax]
+ mov ds, [ec.SegDs]
+ mov es, [ec.SegEs]
+ mov fs, [ec.SegFs]
+ mov gs, [ec.SegGs]
+
+ mov ebp, [ec.R_Ebp] ; must do this last.
+
+ lss esp, fword ptr [esp] ; reset ss:esp
+
+ sti ; JWM, 9/3/91
+ push [esp+8] ; JWM, 9/6/91
+ popfd ; JWM, 9/6/91
+
+ iretd ; reset flags, cs, eip
+
+ExceptNotHandled:
+EmExcept ENDP
+
+endif ; ifdef _DOS32EXT
+
+ifdef NT386
+
+ISIZE equ 4
+ISizeEC equ (ContextFrameLength + ISIZE - 1) and (not (ISIZE - 1))
+ISizeExceptStruct equ (ExceptionRecordLength + ISIZE - 1) and (not (ISIZE - 1))
+
+ec_off EQU 4+ISizeEc
+estruct_off EQU ec_off+ISizeExceptStruct
+
+SSAR EQU <[ebp][-4]>
+ec EQU <[ebp][-ec_off]>
+eStruct EQU <[ebp][-estruct_off]>
+
+OldEIP EQU <ebp+8>
+OldCS EQU <ebp+12>
+OldFlags EQU <ebp+16>
+
+
+EmExcept PROC NEAR
+
+ push ebp
+ mov ebp,esp
+ sub esp,estruct_off
+
+
+ ;*
+ ;* Set up SS access rights.
+ ;*
+
+ push ds
+ mov [ec.ctx_RegEax], eax
+ GetEmData ds,ax
+
+ mov eax, ss
+ lar eax, eax
+ mov [SSAR], eax
+
+ ;*
+ ;* Fill in ExceptionContext structure.
+ ;*
+
+
+ mov dword ptr [ec.ContextFlags], NPX_CONTEXT_FULL
+ mov dword ptr [ec.ctx_Cr0NpxState], CR0_EM
+ mov [ec.ctx_RegEdi], edi
+ mov [ec.ctx_RegEsi], esi
+
+ mov eax, [ebp]
+ mov [ec.ctx_RegEbp], eax
+
+ lea eax, [OldFlags+4]
+ mov [ec.ctx_RegEsp], eax
+
+ mov [ec.ctx_RegEbx], ebx
+ mov [ec.ctx_RegEdx], edx
+ mov [ec.ctx_RegEcx], ecx
+
+ mov eax, [OldEIP]
+
+ mov [ec.ctx_RegEip], eax
+ mov eax, [OldFlags]
+ mov [ec.ctx_EFlags], eax
+
+ mov eax, [OldCS]
+ movzx eax,ax
+ mov [ec.ctx_SegCs], eax
+ mov ax,ss
+ movzx eax,ax
+ mov [ec.ctx_SegSs], eax
+
+ pop eax
+ movzx eax,ax
+ mov [ec.ctx_SegDs], eax ; ds was pushed on entry.
+
+ mov ax,es
+ movzx eax,ax
+ mov [ec.ctx_SegEs], eax
+
+ mov ax,fs
+ movzx eax,ax
+ mov [ec.ctx_SegFs], eax
+
+ mov ax,gs
+ movzx eax,ax
+ mov [ec.ctx_SegGs], eax
+
+ lea esi, [ec]
+ add esi, ctx_env
+
+ or EMSEG:[StatusWord], 8000H ; set 'busy' bit
+ or EMSEG:[SWerr], Summary ; set Summary bit
+ or EMSEG:[CURerr], Summary
+
+ mov cl, EMSEG:[ErrMask]
+ push ecx
+ push ebp
+ call SaveState
+ pop ebp
+ pop ecx
+
+ call GetEMSEGStatusWord ; EAX = status word
+ test al, cl ; test status word against mask
+ jne short Err00
+
+ifdef TRACENPX
+ mov edx, 0C1020304h ; Raise bogus exception code, to trace with
+ jmp short Err50
+endif
+ mov al, Invalid
+
+;
+; According to the floating error priority, we test what is the cause of
+; the NPX error and raise an appropriate exception.
+;
+
+Err00:
+ test al, Invalid ; Invalid Op?
+ jz short Err10 ; No, go check next
+
+ mov edx, XCPT_FLOAT_INVALID_OPERATION
+ test al, StackFlag ; Stack fault?
+ jz short Err50 ; No, go raise invalid op
+ mov edx, XCPT_FLOAT_STACK_CHECK
+ jmp short Err50 ; Go raise stack fault
+
+Err10: mov edx, XCPT_FLOAT_DIVIDE_BY_ZERO
+ test al, ZeroDivide
+ jnz short Err50
+ mov edx, XCPT_FLOAT_DENORMAL_OPERAND
+ test al, Denormal
+ jnz short Err50
+ mov edx, XCPT_FLOAT_OVERFLOW
+ test al, Overflow
+ jnz short Err50
+ mov edx, XCPT_FLOAT_UNDERFLOW
+ test al, Underflow
+ jnz short Err50
+ mov edx, XCPT_FLOAT_INEXACT_RESULT
+
+Err50: mov [eStruct.ExceptionNum], edx
+
+ xor eax,eax
+ mov [eStruct.fHandlerFlags], eax
+ mov [eStruct.NestedExceptionReportRecord], eax
+ mov dword ptr [eStruct.CParameters], 1 ; GeorgioP convention
+ mov [eStruct.ErExceptionInformation], eax ; GeorgioP convention
+
+ mov eax, EMSEG:[PrevCodeOff]
+ mov [eStruct.ExceptionAddress], eax
+
+ lea edx, [eStruct]
+
+ lea eax, [ec]
+ push ds
+ push es
+
+
+;TRUE, this is a first-chance exception
+
+ stdCall _NtRaiseException,<edx, eax, 1>
+ stdCall _RtlRaiseStatus, <eax>
+
+ pop es
+ pop ds
+
+RaiseExceptRet:
+ or eax, eax
+ JZ ExceptNotHandled
+
+ ;*
+ ;* Copy new flags, cs, eip to new stack.
+ ;*
+
+ mov ds, [ec.ctx_SegSs]
+ mov esi, [ec.ctx_RegEsp] ; ds:esi == new ss:esp
+
+ mov eax, [ec.ctx_Eflags] ; set up iretd frame
+ mov [esi-4], eax
+
+ mov eax, [ec.ctx_SegCs]
+ mov [esi-8], eax
+
+ mov eax, [ec.ctx_RegEip]
+ mov [esi-12], eax
+
+ ;*
+ ;* Put new stack pointer on stack.
+ ;*
+
+ push ds
+ sub esi, 12
+ push esi
+
+ ;*
+ ;* Reset other registers.
+ ;*
+
+ mov edi, [ec.ctx_RegEdi]
+ mov esi, [ec.ctx_RegEsi]
+ mov ebx, [ec.ctx_RegEbx]
+ mov edx, [ec.ctx_RegEdx]
+ mov ecx, [ec.ctx_RegEcx]
+ mov eax, [ec.ctx_RegEax]
+ mov ds, [ec.ctx_SegDs]
+ mov es, [ec.ctx_SegEs]
+ mov fs, [ec.ctx_SegFs]
+ mov gs, [ec.ctx_SegGs]
+
+ mov ebp, [ec.ctx_RegEbp] ; must do this last.
+
+ lss esp, fword ptr [esp] ; reset ss:esp
+
+ sti ; JWM, 9/3/91
+ push [esp+8] ; JWM, 9/6/91
+ popfd ; JWM, 9/6/91
+
+ iretd ; reset flags, cs, eip
+
+ExceptNotHandled:
+EmExcept ENDP
+
+endif ; ifdef NT386
+
+ifdef DEBUG
+
+lab PageFault
+ mov al, byte ptr cs:[iax]
+ ret
+endif
diff --git a/private/ntos/dll/i386/emf386.asm b/private/ntos/dll/i386/emf386.asm
new file mode 100644
index 000000000..40e92abda
--- /dev/null
+++ b/private/ntos/dll/i386/emf386.asm
@@ -0,0 +1,552 @@
+ subttl emf386.asm - 32 bit Emulator Interrupt Handler
+ page
+;***
+;emf386.asm - 32 bit Emulator Interrupt Handler
+;
+; IBM/Microsoft Confidential
+;
+; Copyright (c) IBM Corporation 1987, 1989
+; Copyright (c) Microsoft Corporation 1987, 1989
+;
+; All Rights Reserved
+;
+;Purpose:
+; 32 bit Emulator Interrupt Handler
+;
+;Revision History: (also see emulator.hst)
+;
+; 1/21/92 JWM Minor modifications for DOSX32 emulator
+; 8/23/91 TP Reduce to only two decoding steps
+;
+;*******************************************************************************
+
+
+;*********************************************************************;
+; ;
+; Main Entry Point and Address Calculation Procedure ;
+; ;
+; 80386 version ;
+; ;
+;*********************************************************************;
+;
+; This routine fetches the 8087 instruction, calculates memory address
+; if necessary into ES:ESI and calls a routine to emulate the instruction.
+; Most of the dispatching is done through tables. (see comments in CONST)
+;
+; The instruction dispatching is designed to favor the 386 addressing modes
+
+
+ifdef _DOS32EXT ; JWM
+public __astart
+__astart:
+ mov eax, 1
+ ret
+
+public _Ms32KrnlHandler
+_Ms32KrnlHandler:
+endif
+
+ifdef NT386
+
+;
+; NPXEmulatorTable is a table read by the Windows/NT kernel in
+; order to support the R3 emulator
+;
+public _NPXEMULATORTABLE
+_NPXEMULATORTABLE label dword
+ dd offset NpxNpHandler ; Address of Ring3 Trap7 handler
+ dd offset tRoundMode ; Address of rounding vector table
+endif
+
+public NPXNPHandler
+NPXNPHandler:
+
+ifdef DEBUG
+ int 3
+endif
+ cld ; clear direction flag forever
+
+ifdef NT386
+
+
+;-- BUGBUG - bryanwi - 16Oct91 - Hack FP fix, not pointing IDT:7 at this
+; routine for 16bit code is the right thing to do.
+;
+; Check to see if we are running on flat SS. If so, assume things
+; are OK and proceed. (If a 16bit app loads the flat SS and then
+; does an FP instruction, they're hosed, no skin off our nose.)
+;
+; If SS not what we expect, then either (a) a flat apps is *very*
+; confused, or (b) a 16 bit app has hit an FP instuction. In either
+; case, this emulator is not going to work. Therefore, raise an exception.
+;
+
+ push ax ; use form that will word with any SS
+ mov ax,ss
+ or ax,RPL_MASK
+ cmp ax,(KGDT_R3_DATA OR RPL_MASK)
+ pop ax
+ jz OK_Segment ; Segments are OK, proceed normally.
+
+ jmp Around
+
+_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
+
+ align 4
+
+EmerStk db 1024 dup (?) ; *** SaveContext is assumed to be
+SaveContext db size ContextFrameLength dup (?) ; *** at the top of the EmerStk by
+SaveException db size ExceptionRecordLength dup (?) ; *** the function @ 13f:0
+
+_DATA ENDS
+
+Around:
+;
+; Trap occured in 16bit code, get to flat environment and raise exception
+;
+
+ push eax ; save EAX on old stack
+ mov ax, ds
+ push eax ; Save DS on old stack
+
+ mov ax,(KGDT_R3_DATA OR RPL_MASK)
+ mov ds,ax
+ ASSUME DS:FLAT
+
+ pop dword ptr [SaveContext.CsSegDs] ; remove ds from old stack
+ pop dword ptr [SaveContext.CsEax] ; remove eax from old stack
+ pop dword ptr [SaveContext.CsEip] ; copy eip from old stack
+ pop dword ptr [SaveContext.CsSegCs] ; copy cs from old stack
+ pop dword ptr [SaveContext.CsEflags] ; copy eflag from old stack
+
+ push dword ptr [SaveContext.CsEFlags] ; restore eflag to old stack
+ push dword ptr [SaveContext.CsSegCs] ; restore cs to old stack
+ push dword ptr [SaveContext.CsEip] ; restore eip to old stack
+ mov dword ptr [SaveContext.CsEsp], esp
+
+;
+; Build rest of context frame
+;
+
+ mov dword ptr [SaveContext.CsContextFlags],CONTEXT_CONTROL OR CONTEXT_SEGMENTS OR CONTEXT_INTEGER
+ mov dword ptr [SaveContext.CsEbx], ebx
+ mov dword ptr [SaveContext.CsEcx], ecx
+ mov dword ptr [SaveContext.CsEdx], edx
+ mov dword ptr [SaveContext.CsEsi], esi
+ mov dword ptr [SaveContext.CsEdi], edi
+ mov dword ptr [SaveContext.CsEbp], ebp
+ mov dword ptr [SaveContext.CsSegEs], es
+ mov dword ptr [SaveContext.CsSegFs], fs
+ mov dword ptr [SaveContext.CsSegGs], gs
+ mov dword ptr [SaveContext.CsSegSs], ss
+
+ mov ss,ax ; Switch to new stack
+ mov esp,(OFFSET FLAT:EmerStk) + 1024
+ ASSUME SS:FLAT
+
+;
+; ss: flat, esp -> EmerStk
+;
+
+ mov ax,KGDT_R3_TEB OR RPL_MASK
+ mov fs, ax
+ mov ecx, fs:[TbVdm]
+ or ecx, ecx
+ jne short DoVdmFault
+
+ mov ecx, offset SaveContext ; (ecx) -> context record
+ mov edx, offset SaveException ; (edx) -> exception record
+
+ mov dword ptr [edx.ErExceptionCode],STATUS_ILLEGAL_FLOAT_CONTEXT
+ mov dword ptr [edx.ErExceptionFlags],0
+ mov dword ptr [edx.ErExceptionRecord],0
+ mov ebx, [ecx.CsEip]
+ mov [edx.ErExceptionAddress],ebx
+ mov [edx.ErNumberParameters],0
+
+;
+; ZwRaiseException(edx=ExceptionRecord, ecx=ContextRecord, TRUE=FirstChance)
+;
+
+ stdCall _ZwRaiseException, <edx, ecx, 1>
+
+;
+; If we come back HERE, things are hosed. We cannot bugcheck because
+; we are in user space, so int-3 and loop forever instead.
+;
+
+Forever:
+ int 3
+ jmp short Forever
+
+DoVdmFault:
+;
+; Does the VDM want the fault, or should the instruction be skipped
+;
+ test ds:[ecx].VtVdmContext.CsFloatSave.FpCr0NpxState, CR0_EM
+ jz short SkipNpxInstruction
+
+ add dword ptr [SaveContext.CsEsp], 12 ; remove from old stack
+
+; jump to the dos extender NPX exception handler
+
+; jmp far ptr 013fh:0
+ db 0eah
+ dd 0
+ dw 013fh
+
+SkipNpxInstruction:
+ mov ax,(KGDT_R3_DATA OR RPL_MASK)
+ mov es,ax
+
+ stdCall _NpxNpSkipInstruction, <offset SaveContext>
+
+ mov ebx, dword ptr [SaveContext.CsEbx]
+ mov ecx, dword ptr [SaveContext.CsEcx]
+ mov edx, dword ptr [SaveContext.CsEdx]
+ mov edi, dword ptr [SaveContext.CsEdi]
+ mov esi, dword ptr [SaveContext.CsEsi]
+ mov ebp, dword ptr [SaveContext.CsEbp]
+ mov gs, dword ptr [SaveContext.CsSegGs]
+ mov fs, dword ptr [SaveContext.CsSegFs]
+ mov es, dword ptr [SaveContext.CsSegEs]
+
+ mov eax, dword ptr [SaveContext.CsEsp]
+ mov ss, dword ptr [SaveContext.CsSegSs] ; switch to original stack
+ mov esp, eax
+
+ add esp, 12 ; remove eflag, cs, eip
+ push dword ptr [SaveContext.CsEflags]
+ push dword ptr [SaveContext.CsSegCs]
+ push dword ptr [SaveContext.CsEip]
+ mov eax, dword ptr [SaveContext.CsEax]
+ mov ds, dword ptr [SaveContext.CsSegDs]
+
+ iretd ; restore eflag, cs, eip
+
+OK_Segment:
+endif
+
+
+ push ds ; save segment registers
+
+ GetEmData ds
+
+ push EMSEG:[LongStatusWord] ;In case we're saving status
+ push EMSEG:[PrevCodeOff] ;In case we save environment
+;Save registers in order of their index number
+ push edi
+ push esi
+ push ebp
+ push esp
+ add dword ptr [esp],regFlg-regESP ; adjust to original esp
+ push ebx
+ push edx
+ push ecx
+ push eax
+
+ cmp EMSEG:[Einstall], 0 ; Make sure emulator is initialized.
+ je InstalEm
+
+EmInstalled:
+ mov edi,[esp].regEIP ;edi = 387 instruction address
+ movzx edx, word ptr cseg:[edi] ;dx = esc and opcode
+
+; Check for unmasked errors
+ mov al, EMSEG:[CURerr] ; fetch errors
+ and al, EMSEG:[ErrMask]
+ jnz short PossibleException
+
+; UNDONE: rip test for FWAIT in final version
+ cmp dl, 9bh ;FWAIT?
+ je sawFWAIT
+
+NoException:
+Execute387inst:
+;Enter here if look-ahead found another 387 instruction
+ mov EMSEG:[PrevCodeOff],edi
+ mov EMSEG:[CurErrCond],0 ;clear error and cond. codes, show busy
+ add edi, 2 ; point past opcode
+
+;CONSIDER: remove the two instruction below and muck with EA386Tab
+;CONSIDER: to optimize for mem ops instead of reg ops.
+ add dh,40h ; No effective address?
+ jc NoEffectiveAddress0 ; yes, go do instruction
+ rol dh,2 ; rotate MOD field next to r/m field
+ mov bl,dh
+ and ebx,1FH ; Mask to MOD and r/m fields
+MemModeDispatch: ;Label for debugging
+ jmp EA386Tab[4*ebx]
+
+
+InstalEm:
+ call EmulatorInit
+ mov edi,DefaultControlWord ; Default mode to start in
+ mov eax, edi
+ call SetControlWord ; Set it
+ mov EMSEG:[LongControlWord], edi ; reset reserved bits
+ jmp EmInstalled
+
+; ************************
+
+;
+; We are about to execute a new FP instruction and there is an
+; unmasked expcetion. Check to see if the new FP instruction is
+; a "no wait" instruction. If so, let it proceede; otherwise, raise
+; the exception.
+;
+
+PossibleException:
+ cmp edx, 0E3DBh ; if fninit, no exception
+ je short NoException
+
+ cmp edx, 0E2DBh ; if fnclex, no exception
+ je short NoException
+
+ cmp edx, 0E0DFh ; if "fnstsw ax", no exception
+ je short NoException
+
+ cmp dl, 0D9h ; possible encoding for fnstenv or fnstcw?
+ je short pe20 ; yes, check mod r/m
+ cmp dl, 0DDh ; possible encoding for fnsave or fnstsw?
+ jne short pe30
+
+pe20: mov bl, dh ; bl = op2
+ shr bl, 3
+ and bl, 7 ; bl = mod r/m
+ cmp bl, 6 ; is it a 6 or 7?
+ jnc short NoException ; yes, no exception
+
+pe30:
+ jmp CommonExceptions ; unmasked exception is pending, raise it
+
+; ************************
+
+
+
+; 386 address modes
+
+; SIB does not handle SS overrides for ebp
+
+SIB macro modval
+ local SIBindex,SIBbase
+
+ movzx ebx,byte ptr cseg:[edi] ; ebx = SIB field
+ inc edi ; bump past SIB field
+ mov eax,ebx
+ and al,7 ; mask down to base register
+
+if modval eq 0
+ cmp al,5 ; base = ebp
+ jne short SIBbase ; yes - get base register value
+ mov eax,cseg:[edi] ; eax = disp32
+ add edi,4 ; bump past displacement
+ SKIP 3,SIBindex
+endif
+
+SIBbase:
+ mov eax,[esp+4*eax] ; eax = base register value
+
+SIBindex:
+ mov [esp].regESP,0 ; no esp indexing allowed
+ mov cl,bl
+ shr cl,6 ; cl = scale factor
+ and bl,7 shl 3 ; ebx = 8 * index register
+ shr bl,1
+ mov esi,[esp+1*ebx] ; esi = index register value
+ shl esi,cl ; esi = scaled index register value
+ add esi,eax ; esi = SIB address value
+ endm
+
+
+ ALIGN 4
+
+SIB00:
+ SIB 00 ; decode SIB field
+ jmp CommonMemory
+
+ ALIGN 4
+
+SIB01:
+ SIB 01 ; decode SIB field
+ movsx eax,byte ptr cseg:[edi]
+ inc edi
+ add esi,eax
+ jmp short CommonMemory
+
+ ALIGN 4
+
+SIB10:
+ SIB 10 ; decode SIB field
+ mov eax,cseg:[edi]
+ add edi,4
+ add esi,eax
+ jmp short CommonMemory
+
+
+; 386 single register addressing
+
+ ALIGN 4
+
+Exx00:
+ and bl,7 shl 2 ; mask off mod bits
+ mov esi,[esp+1*ebx]
+ jmp short CommonMemory
+
+ ALIGN 4
+
+Exx01:
+ and bl,7 shl 2 ; mask off mod bits
+ mov esi,[esp+1*ebx]
+ movsx eax,byte ptr cseg:[edi]
+ inc edi
+ add esi,eax
+ jmp short CommonMemory
+
+ ALIGN 4
+
+Exx10:
+ and bl,7 shl 2 ; mask off mod bits
+ mov esi,[esp+1*ebx]
+ add esi,cseg:[edi]
+ add edi,4
+ jmp short CommonMemory
+
+
+; 386 direct addressing
+
+ ALIGN 4
+
+Direct386:
+ mov esi,cseg:[edi]
+ add edi,4
+
+CommonMemory:
+ MOV [esp].regEIP,edi ; final return offset
+
+
+; At this point ESI = memory address, dx = |Op|r/m|MOD|escape|MF|Arith|
+; Current format of opcode and address mode bytes (after rol dh,2)
+;
+; 7 6 5 4 3 2 1 0
+; |1 1 0 1 1| op1 | dl
+;
+; 7 6 5 4 3 2 1 0
+; | op2 | r/m |mod| dh
+;
+;op1 and op2 fields together make the FP opcode
+
+ rol dx,5 ; dl = | op1 | op2 |? ?|
+ and edx,0FCH ;Keep only op1 & op2 bits
+ push offset EMLFINISH
+ mov edi,EMSEG:[CURstk]
+MemOpDisp: ;Debugging label
+;edi = [CURstk]
+ jmp tOpMemDisp[edx]
+
+
+ ALIGN 4
+
+
+NoEffectiveAddress0:
+ rol dh,2
+NoEffectiveAddress: ; Either Register op or Miscellaneous
+ mov [esp].regEIP,edi ; final return offset
+
+;Current format of opcode and address mode bytes (after rol dh,2)
+;
+; 7 6 5 4 3 2 1 0
+; |1 1 0 1 1| op1 | dl
+;
+; 7 6 5 4 3 2 1 0
+; | op2 | r/m |mod| dh
+;
+;op1 and op2 fields together make the FP opcode
+
+ mov al,dh ;Save r/m bits (contains reg. no.)
+ rol dx,5 ; dl = | op1 | op2 |? ?|
+ and edx,0FCH ;Keep only op1 & op2 bits
+ push offset EMLFINISH
+ and eax,7 shl 2 ;Mask to register number * 4
+ mov edi,EMSEG:[CURstk]
+ lea esi,[2*eax+eax] ;Register no. * 12
+ add esi,edi
+ cmp esi,ENDstk ;Run past end?
+ jae RegWrap
+RegOpDisp: ;Debugging label
+;eax = r/m bits * 4
+;esi = FP register address
+;edi = [CURstk]
+ jmp tOpRegDisp[edx]
+
+ ALIGN 4
+RegWrap:
+ sub esi,ENDstk - BEGstk ;Wrap around JWM
+RegOpDispWrap: ;Debugging label
+ jmp tOpRegDisp[edx]
+
+
+SawFwait:
+ inc edi ; bump past FWAIT
+ mov [esp].regEIP,edi ; final return offset
+ mov EMSEG:[CURErr],0 ; clear current error and cond. codes
+
+; return from routine; restore registers and return
+
+ align 4
+EMLFINISH:
+; check for errors
+ mov al, EMSEG:[CURerr] ; fetch errors
+ or al, EMSEG:[SWerr]
+ mov EMSEG:[SWerr],al ; set errors in sticky error flag
+ and al,EMSEG:[ErrMask]
+ jnz CommonExceptions
+
+ifdef TRACENPX
+ jmp CommonExceptions
+endif
+
+if DBG eq 0
+
+;
+; On a free build, look ahead to next instruction
+;
+
+;09BH is FWAIT - just skip it
+;0D8H - 0DFH is 387 instruction, emulate it
+ mov edi,[esp].regEIP ;edi = 387 instruction address
+ mov dx,cseg:[edi]
+ cmp dl,09BH ;FWAIT?
+ jz short SawFwait
+ sub dl,0D8H
+ cmp dl,8
+ jb ReExecute
+endif
+ mov ebx,[esp].[OldLongStatus]
+ and ebx,LongSavedFlags ;preserve condition codes, error flags
+ or EMSEG:[LongStatusWord],ebx ;merge saved status word, condition codes
+
+ pop eax
+ pop ecx
+ pop edx
+ pop ebx
+ add esp,4 ; toss esp value
+ pop ebp
+ pop esi
+ pop edi
+ add esp,8 ;toss old PrevCodeOff and StatusWord
+ mov EMSEG:[CURerr],Summary ;Indicate we are not busy
+ pop ds
+ error_return ; common exit sequence
+
+ReExecute:
+ mov eax,EMSEG:[LongStatusWord]
+ mov ebx,[esp].[OldLongStatus]
+ and ebx,LongSavedFlags ;preserve condition codes, error flags
+ or eax,ebx ;merge saved status word, condition codes
+ mov [esp].OldLongStatus,eax
+ mov eax,EMSEG:[PrevCodeOff]
+ mov [esp].OldCodeOff,eax
+ lea eax,[esp+regFlg+4] ;must restore "saved" esp
+ mov [esp].RegEsp,eax
+ jmp Execute387inst
diff --git a/private/ntos/dll/i386/emfadd.asm b/private/ntos/dll/i386/emfadd.asm
new file mode 100644
index 000000000..cdd0a8f58
--- /dev/null
+++ b/private/ntos/dll/i386/emfadd.asm
@@ -0,0 +1,396 @@
+ subttl emfadd.asm - Addition and Subtraction
+ page
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emfadd.asm - long double add and subtract
+; by Tim Paterson
+;
+;Purpose:
+; Long double add/subtract.
+;Outputs:
+; Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+;*******************************************************************************
+; Dispatch for Add/Sub/Subr
+;
+; Signs are passed in dx:
+; xor source sign with dl
+; xor dest sign with dh
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+;Any special case routines not found in this file are in emarith.asm
+tFaddDisp label dword ;Source (reg) Dest (*[di])
+ dd AddDouble ;single single
+ dd AddDouble ;single double
+ dd AddSourceSign ;single zero
+ dd AddSpclDest ;single special
+ dd AddDouble ;double single
+ dd AddDouble ;double double
+ dd AddSourceSign ;double zero
+ dd AddSpclDest ;double special
+ dd AddDestSign ;zero single
+ dd AddDestSign ;zero double
+ dd AddZeroZero ;zero zero
+ dd AddSpclDest ;zero special
+ dd AddSpclSource ;special single
+ dd AddSpclSource ;special double
+ dd AddSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd AddTwoInf ;Two infinities
+
+EM_ENTRY eFISUB16
+eFISUB16:
+ call Load16Int
+ mov dx,bSign ;Change sign of source
+ jmp AddSetResult
+
+EM_ENTRY eFISUBR16
+eFISUBR16:
+ call Load16Int
+ mov dx,bSign shl 8 ;Change sign of dest
+ jmp AddSetResult
+
+EM_ENTRY eFIADD16
+eFIADD16:
+ call Load16Int
+ xor edx,edx ;Both signs positive
+ jmp AddSetResult
+
+EM_ENTRY eFISUB32
+eFISUB32:
+ call Load32Int
+ mov dx,bSign ;Change sign of source
+ jmp AddSetResult
+
+EM_ENTRY eFISUBR32
+eFISUBR32:
+ call Load32Int
+ mov dx,bSign shl 8 ;Change sign of dest
+ jmp AddSetResult
+
+EM_ENTRY eFIADD32
+eFIADD32:
+ call Load32Int
+ xor edx,edx ;Both signs positive
+ jmp AddSetResult
+
+EM_ENTRY eFSUB32
+eFSUB32:
+ call Load32Real
+ mov dx,bSign ;Change sign of source
+ jmp AddSetResult
+
+EM_ENTRY eFSUBR32
+eFSUBR32:
+ call Load32Real
+ mov dx,bSign shl 8 ;Change sign of dest
+ jmp AddSetResult
+
+EM_ENTRY eFADD32
+eFADD32:
+ call Load32Real
+ xor edx,edx ;Both signs positive
+ jmp AddSetResult
+
+EM_ENTRY eFSUB64
+eFSUB64:
+ call Load64Real
+ mov dx,bSign ;Change sign of source
+ jmp AddSetResult
+
+EM_ENTRY eFSUBR64
+eFSUBR64:
+ call Load64Real
+ mov dx,bSign shl 8 ;Change sign of dest
+ jmp AddSetResult
+
+EM_ENTRY eFADD64
+eFADD64:
+ call Load64Real
+ xor edx,edx ;Both signs positive
+ jmp AddSetResult
+
+
+PolyAddDouble:
+;This entry point is used by polynomial evaluator.
+;It checks the operand in registers for zero, and doesn't require
+;signs to be set up in dx.
+;
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;edi = pointer to op2 in ds
+ xor edx,edx ;Addition
+ cmp cl,bTAG_ZERO ;Adding to zero?
+ jnz AddDouble
+;Number in registers is zero, so just return value from memory.
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov ebx,EMSEG:[edi].lManHi
+ mov esi,EMSEG:[edi].lManLo
+ ret
+
+EM_ENTRY eFSUBPreg
+eFSUBPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFSUBreg
+eFSUBreg:
+ xchg esi,edi
+
+EM_ENTRY eFSUBtop
+eFSUBtop:
+ mov dx,bSign ;Change sign of source
+ jmp AddHaveSgn
+
+EM_ENTRY eFSUBRPreg
+eFSUBRPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFSUBRreg
+eFSUBRreg:
+ xchg esi,edi
+
+EM_ENTRY eFSUBRtop
+eFSUBRtop:
+ mov dx,bSign shl 8 ;Change sign of dest
+ jmp AddHaveSgn
+
+
+InsignifAdd:
+ mov eax,1 ;Set sticky bit
+ shl ch,1 ;Get sign, CY set IFF subtracting mant.
+ jnc ReturnOp1
+ sub esi,eax ;Subtract 1 from mantissa
+ sbb ebx,0
+ neg eax
+ReturnOp1:
+;ebx:esi:eax = normalized unrounded mantissa
+;high half of ecx = exponent
+;high bit of ch = sign
+ jmp EMSEG:[RoundMode]
+
+EM_ENTRY eFADDPreg
+eFADDPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFADDreg
+eFADDreg:
+ xchg esi,edi
+
+EM_ENTRY eFADDtop
+eFADDtop:
+ xor edx,edx ;Both signs positive
+AddHaveSgn:
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+AddSetResult:
+ mov ebp,offset tFaddDisp
+ mov EMSEG:[Result],edi ;Save result pointer
+ mov al,cl
+ mov ah,EMSEG:[edi].bTag
+ test ax,ZEROorSPCL * 100H + ZEROorSPCL
+ jnz TwoOpDispatch
+
+;.erre AddDouble eq $ ;Fall into AddDouble
+
+;*********
+AddDouble:
+;*********
+;
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;dl = sign change for op1
+;dh = sign change for op2
+;edi = pointer to op2
+
+ xor ch,dl ;Flip sign if subtracting
+ mov eax,EMSEG:[edi].ExpSgn
+ xor ah,dh ;Flip sign if subtracting
+ mov edx,EMSEG:[edi].lManHi
+ mov edi,EMSEG:[edi].lManLo
+
+AddDoubleReg:
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+
+ cmp eax,ecx ;Compare exponents
+.erre TexpBias eq 0 ;Not biased, use signed jump
+ jle short HavLg ;op1 is larger, we have the right order
+ xchg esi,edi
+ xchg ebx,edx
+ xchg eax,ecx
+HavLg:
+;Larger in ebx:esi. Note that if the exponents were equal, things like
+;the sign bit or tag may have determined which is "larger". It doesn't
+;matter which is which if the exponents are equal, however.
+ and ah,80H ;Keep sign bit
+ sar ch,1 ;Extend sign into bit 6 of byte
+ xor ch,ah ;See if signs are the same
+ xor ax,ax ;Clear out sign and tag
+ neg eax ;ax still 0
+ add eax,ecx ;Get exponent difference
+ shr eax,16 ;Bring exp. difference down to low end
+ jz short Aligned
+ cmp eax,64+1 ;Is difference in range?
+;CONSIDER: tell me again why 1/4 LSB could have effect. It seems like
+;CONSIDER: 1/2 LSB is the limit.
+ ja short InsignifAdd ; (Even 1/4 LSB could have effect)
+ mov cl,al ;Shift count to cl
+;High half ecx = exponent
+;ch bit 7 = sign difference
+;ch bit 6 = sign
+;cl = shift count
+ xor eax,eax ;Prepare to take bits shifted out
+ cmp cl,32 ;More than a whole word?
+ jb short ShortShift
+ xchg eax,edx ;Save bits shifted out in eax
+ xchg edi,eax
+ sub cl,32
+ cmp cl,8 ;Safe to shift this much
+ jb short ShortSticky
+;Collapse all (sticky) bits of eax into LSB of edi
+ neg eax ;Sets CY if eax was not zero
+ sbb eax,eax ;-1 if CY was set, zero otherwise
+ neg eax ;Sticky bit in LSB only
+ or di,ax ;Move sticky bit up
+ cmp cl,32 ;Less than another Dword?
+ jb short ShortShift
+ mov eax,edi
+ xor edi,edi ;edx = edi = 0
+ShortSticky:
+;Shift will not be more than 8 bits
+ or ah,al ;Move up sticky bits
+ShortShift:
+ shrd eax,edi,cl ;Save bits shifted out in eax
+ shrd edi,edx,cl
+ shr edx,cl
+Aligned:
+ shl ch,1 ;Were signs the same?
+ jc short SubMant ;No--go subtract mantissas
+;Add mantissas
+ add esi,edi
+ adc ebx,edx
+ jnc short AddExit
+;Addition of mantissas overflowed. Bump exponent and shift right
+ shrd eax,esi,1
+ shrd esi,ebx,1 ;Faster than RCR
+ sar ebx,1
+ or ebx,1 shl 31 ;Set MSB
+ add ecx,1 shl 16
+AddExit:
+;ebx:esi:eax = normalized unrounded mantissa
+;high half of ecx = exponent
+;high bit of ch = sign
+ jmp EMSEG:[RoundMode]
+
+NegMant:
+;To get here, exponents must have been equal and op2 was bigger than op1.
+;Note that this means nothing ever got shifted into eax.
+ not ch ;Change sign of result
+ not ebx
+ neg esi
+ sbb ebx,-1
+ js short AddExit ;Already normalized?
+ test ebx,40000000H ;Only one bit out of normal?
+ jz short NormalizeAdd
+ jmp short NormOneBit
+
+SubMant:
+;Subtract mantissas
+ neg eax ;Pretend minuend is zero extended
+ sbb esi,edi
+ sbb ebx,edx
+ jc short NegMant
+ js short AddExit ;Already normalized?
+NormChk:
+ test ebx,40000000H ;Only one bit out of normal?
+ jz short NormalizeAdd
+;One bit normalization
+NormOneBit:
+ sub ecx,1 shl 16 ;Adjust exponent
+ShiftOneBit: ;Entry point from emfmul.asm
+ shld ebx,esi,1
+ shld esi,eax,1
+ shl eax,1
+ jmp EMSEG:[RoundMode]
+
+;***********
+AddZeroZero: ;Entry point for adding two zeros
+;***********
+ mov ah,EMSEG:[edi].bSgn ;Get sign of op
+ xor ch,dl ;Possibly subtracting source
+ xor ah,dh ;Possibly subtracting dest
+ xor ch,ah ;Do signs match?
+ js FindZeroSign ;No - use rounding mode to set sign
+ mov EMSEG:[edi].bSgn,ah ;Correct the sign if subtracting
+ ret ;Result at [edi] is now correct
+
+ZeroChk:
+;Upper 64 bits were all zero, but there could be 1 bit in the MSB
+;of eax.
+ or eax,eax
+ jnz short OneBitLeft
+ mov ebx,eax
+ mov esi,eax ;Zero mantissa
+FindZeroSign:
+;Round to -0 if "round down" mode, round to +0 otherwise
+ xor ecx,ecx ;Zero exponent, positive sign
+ mov dl,EMSEG:[CWcntl] ;Get control word
+ and dl,RoundControl
+ cmp dl,RCdown ;Rounding down?
+ jnz ZeroJmp
+ mov ch,80H ;Set sign bit
+ZeroJmp:
+ mov cl,bTAG_ZERO
+ jmp EMSEG:[ZeroVector]
+
+OneBitLeft:
+ xchg ebx,eax ;Bit now normalized
+ sub ecx,64 shl 16 ;Adjust exponent
+ jmp EMSEG:[RoundMode]
+
+NormalizeAdd:
+;Inputs:
+; ebx:esi:eax = 65-bit number
+; ecx high half = exponent
+;
+;Since we are more than 1 bit out of normalization, exponents must have
+;differed by 0 or 1. Thus rounding will not be necessary for 64 bits.
+ bsr edx,ebx ;Scan for MSB
+ jnz short ShortNorm
+ bsr edx,esi
+ jz short ZeroChk
+ sub ecx,32 shl 16 ;Adjust exponent
+ mov ebx,esi ;Push it up 32 bits
+ mov esi,eax
+ShortNorm:
+;Bit number in edx ranges from 0 to 31
+ mov cl,dl
+ not cl ;Convert bit number to shift count
+ shld ebx,esi,cl
+ shld esi,eax,cl
+ shl edx,16 ;Move exp. adjustment to high end
+ lea ecx,[ecx+edx-(31 shl 16)] ;Adjust exponent
+ xor eax,eax ;No extra bits
+ jmp EMSEG:[RoundMode]
+
+AddDestSign:
+ xor EMSEG:[edi].bSgn,dh
+ ret
+
+AddSourceSign:
+ xor ch,dl
+ jmp SaveResult
diff --git a/private/ntos/dll/i386/emfcom.asm b/private/ntos/dll/i386/emfcom.asm
new file mode 100644
index 000000000..43b5672c6
--- /dev/null
+++ b/private/ntos/dll/i386/emfcom.asm
@@ -0,0 +1,402 @@
+ subttl emfcom.asm - Comparison Instructions
+ page
+;*******************************************************************************
+;emfcom.asm - Comparison Instructions
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FCOM,FCOMP,FCOMPP,FUCOM,FUCOMP,FUCOMPP,FTST,FXAM instructions
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;*******************************************************************************
+;Dispatch table for compare
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+;Any special case routines not found in this file are in emarith.asm
+tFcomDisp label dword ;Source (reg) Dest (*[di] = ST)
+ dd ComDouble ;single single
+ dd ComDouble ;single double
+ dd ComDestZero ;single zero
+ dd ComSpclDest ;single special
+ dd ComDouble ;double single
+ dd ComDouble ;double double
+ dd ComDestZero ;double zero
+ dd ComSpclDest ;double special
+ dd ComSrcZero ;zero single
+ dd ComSrcZero ;zero double
+ dd ComEqual ;zero zero
+ dd ComSpclDest ;zero special
+ dd ComSpclSource ;special single
+ dd ComSpclSource ;special double
+ dd ComSpclSource ;special zero
+ dd ComBothSpcl ;special special
+
+
+EM_ENTRY eFICOMP16
+eFICOMP16:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ push offset ComOpLoaded
+ jmp Load16Int ;Returns to ComOpLoaded
+
+EM_ENTRY eFICOM16
+eFICOM16:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComOpLoaded
+ jmp Load16Int ;Returns to ComOpLoaded
+
+EM_ENTRY eFICOMP32
+eFICOMP32:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ push offset ComOpLoaded
+ jmp Load32Int ;Returns to ComOpLoaded
+
+EM_ENTRY eFICOM32
+eFICOM32:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComOpLoaded
+ jmp Load32Int ;Returns to ComOpLoaded
+
+EM_ENTRY eFCOMP32
+eFCOMP32:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ push offset ComOpLoaded
+ jmp Load32Real ;Returns to ComOpLoaded
+
+EM_ENTRY eFCOM32
+eFCOM32:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComOpLoaded
+ jmp Load32Real ;Returns to ComOpLoaded
+
+EM_ENTRY eFCOMP64
+eFCOMP64:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ push offset ComOpLoaded
+ jmp Load64Real ;Returns to ComOpLoaded
+
+EM_ENTRY eFCOM64
+eFCOM64:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComOpLoaded
+ jmp Load64Real ;Returns to ComOpLoaded
+
+EM_ENTRY eFUCOMPP
+eFUCOMPP:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComPop2
+ jmp eFUCOM0
+
+EM_ENTRY eFUCOMP
+eFUCOMP:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ jmp eFUCOM0
+
+EM_ENTRY eFUCOM
+eFUCOM:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+eFUCOM0:
+;esi = pointer to st(i) from instruction field
+;edi = [CURstk]
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+ mov dl,40H ;Flag FUCOM - Look for SNAN
+ jmp UComOpLoaded
+
+EM_ENTRY eFCOMPP
+eFCOMPP:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset ComPop2
+ jmp eFCOM0
+
+EM_ENTRY eFCOMP
+eFCOMP:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ push offset PopWhenDone
+ jmp eFCOM0
+
+EM_ENTRY eFCOM
+eFCOM:
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+eFCOM0:
+;esi = pointer to st(i) from instruction field
+;edi = [CURstk]
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+
+ComOpLoaded:
+; mov EMSEG:[UpdateCCodes],1
+ mov dl,0 ;flag FCOM - Look for any NAN
+UComOpLoaded:
+ mov ebp,offset tFcomDisp
+ mov al,cl
+ mov ah,EMSEG:[edi].bTag
+ test ax,ZEROorSPCL * 100H + ZEROorSPCL
+ jnz TwoOpDispatch
+
+;.erre ComDouble eq $ ;Fall into ComDouble
+
+;*********
+ComDouble:
+;*********
+;
+;ebx:esi = op1 mantissa
+;ecx = op1 sign in bit 15, exponent in high half
+;edi = pointer to op2
+ mov eax,EMSEG:[edi].ExpSgn
+ and ax,bSign shl 8 ;Keep sign only
+ and cx,bSign shl 8
+ cmp ah,ch ;Are signs the same?
+ jnz StBigger
+ cmp eax,ecx ;Are exponents the same?
+ jl StSmaller
+ jg StBigger
+ cmp EMSEG:[edi].lManHi,ebx ;Compare mantissas
+ jnz MantDif
+ cmp EMSEG:[edi].lManLo,esi ;Set flags for ST - src
+ jz ComEqual
+MantDif:
+ adc al,al ;Copy CY flag to bit 0
+ rol ah,1 ;Rotate sign to bit 0
+ xor al,ah ;Flip saved CY bit if negative
+ mov EMSEG:[SWcc],al ;Set condition code
+ ret
+
+StSmaller:
+ not ah
+StBigger:
+;ah = sign of ST
+;ch = sign of other operand
+;ST is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+.erre C0 eq 1
+ shr ah,7 ;Bring sign down to bit 0, clear CY
+ mov EMSEG:[SWcc],ah ;Bit set if ST smaller (negative)
+ ret
+
+ComEqual:
+ mov EMSEG:[SWcc],CCequal
+ ret
+
+
+
+PopWhenDone:
+.erre bTAG_NOPOP eq -1
+ inc cl ;OK to pop?
+ jz ComPopX ;No - had unmasked Invalid Operation
+
+ POPSTret
+
+ComPop2:
+.erre bTAG_NOPOP eq -1
+ inc cl ;OK to pop?
+ jz ComPopX ;No - had unmasked Invalid Operation
+ mov esi,EMSEG:[CURstk]
+ mov EMSEG:[esi].bTag,bTAG_EMPTY
+ add esi,Reg87Len*2
+ cmp esi,ENDstk ;JWM
+ je PopOneOver
+ ja PopTwoOver
+ mov EMSEG:[esi-Reg87Len].bTag,bTAG_EMPTY
+ mov EMSEG:[CURstk],esi
+ComPopX:
+ ret
+
+PopOneOver:
+ mov EMSEG:[CURstk],BEGstk ;JWM
+ifdef NT386
+ mov EMSEG:[INITstk].bTAG,bTAG_EMPTY
+else
+ mov EMSEG:[XINITstk].bTAG,bTAG_EMPTY
+endif
+ ret
+
+PopTwoOver:
+ mov EMSEG:[CURstk],BEGstk+Reg87Len ;JWM
+ifdef NT386
+ mov EMSEG:[BEGstk].bTAG,bTAG_EMPTY
+else
+ mov EMSEG:[XBEGstk].bTAG,bTAG_EMPTY
+endif
+ ret
+
+;*******************************************************************************
+;Special cases for FCOM/FUCOM.
+;These don't share with those in emarith.asm because NANs are treated
+;differently.
+ComDestZero:
+;ST is zero, so Src is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+ not ch ;C0 is 1 if ST < Src
+.erre C0 eq 1
+ shr ch,7 ;Bring sign down to bit 0
+ mov EMSEG:[SWcc],ch ;Bit set if Src smaller (negative)
+ ret
+
+ComSrcZero:
+;ST is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+ mov al,EMSEG:[edi].bSgn
+.erre C0 eq 1
+ shr al,7 ;Bring sign down to bit 0
+ mov EMSEG:[SWcc],al ;Bit set if ST smaller (negative)
+ ret
+
+ComSpclSource:
+ cmp cl,bTAG_NAN
+ jz ComSrcNAN
+ cmp cl,bTAG_INF
+ jz ComDestZero
+ cmp cl,bTAG_DEN
+ jz ComDenormal
+;Must be empty
+ComEmpty:
+ mov EMSEG:[CURerr],Invalid+StackFlag
+ jmp ComChkMask
+
+ComSrcNAN:
+ shl edx,24 ;Move dl to high byte
+ test ebx,edx ;See if we report error with this NAN
+ComChkNAN:
+ jnz Incomp
+ComInvalid:
+ mov EMSEG:[CURerr],Invalid ;Flag the error
+ComChkMask:
+ test EMSEG:[CWmask],Invalid ;Is exception masked?
+ jnz Incomp
+ mov cl,bTAG_NOPOP ;Unmasked, don't pop stack
+Incomp:
+ mov EMSEG:[SWcc],CCincomprable
+ ret
+
+ComSpclDest:
+ mov al,EMSEG:[edi].bTag
+ cmp al,bTAG_INF
+ jz ComSrcZero
+ cmp al,bTAG_Empty
+ jz ComEmpty
+ cmp al,bTAG_DEN
+ jz ComDenormal
+;Must be NAN
+ComDestNAN:
+ test EMSEG:[edi].bMan7,dl ;See if we report error with this NAN
+ jmp ComChkNAN
+
+ComBothSpcl:
+ mov al,EMSEG:[edi].bTag
+ cmp cl,bTAG_EMPTY
+ jz ComEmpty
+ cmp al,bTAG_EMPTY
+ jz ComEmpty
+ cmp cl,bTAG_NAN
+ jz ComSrcNAN
+ cmp al,bTAG_NAN
+ jz ComDestNAN
+ mov ah,cl
+ cmp ax,(bTAG_INF shl 8) + bTag_INF ;Are both Infinity?
+ jz ComDouble ;If so, compare their signs
+;Must have at least one denormal
+ComDenormal:
+ or EMSEG:[CURerr],Denormal
+ jmp ComDouble
+
+;*******************************************************************************
+
+XAM_Unsupported equ 0
+XAM_NAN equ C0
+XAM_Norm equ C2
+XAM_Inf equ C2+C0
+XAM_Zero equ C3
+XAM_Empty equ C3+C0
+XAM_Den equ C3+C2
+
+tXamTag label byte
+.erre TAG_SNGL eq $-tXamTag
+ db XAM_Norm ;TAG_SNGL
+.erre TAG_VALID eq $-tXamTag
+ db XAM_Norm ;TAG_VALID
+.erre TAG_ZERO eq $-tXamTag
+ db XAM_Zero ;TAG_ZERO
+.erre TAG_EMPTY eq $-tXamTag
+ db XAM_Empty ;TAG_EMPTY
+ db 0
+ db 0
+ db 0
+.erre TAG_INF eq $-tXamTag
+ db XAM_Inf ;TAG_INF
+ db 0
+ db 0
+ db 0
+.erre TAG_NAN eq $-tXamTag
+ db XAM_NAN ;TAG_NAN
+ db 0
+ db 0
+ db 0
+.erre TAG_DEN eq $-tXamTag
+ db XAM_Den ;TAG_DEN
+
+EM_ENTRY eFXAM
+eFXAM:
+;edi = [CURstk]
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ mov eax,EMSEG:[edi].ExpSgn ;Get sign and tag
+ mov bl,ah ;Save sign
+ and bl,bSign ;Keep only sign bit
+ and eax,0FH ;Save low 4 bits of tag
+ mov al,tXamTag[eax] ;Lookup cond. codes for this tag
+.erre C1 eq 2 ;Bit 1
+.erre bSign eq 80H ;Bit 7
+ shr bl,7-1 ;Move sign bit to CC C1
+ or al,bl
+ mov EMSEG:[SWcc],al
+ ret
+
+;*******************************************************************************
+
+EM_ENTRY eFTST
+eFTST:
+;edi = [CURstk]
+ and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3
+ mov eax,EMSEG:[edi].ExpSgn
+ cmp al,bTAG_ZERO
+ jz ComEqual
+ ja TestSpcl
+;Either single or double, non-zero. Just check sign.
+TestSign:
+ shr ah,7 ;Bring sign down to bit 0
+ mov EMSEG:[SWcc],ah ;Bit set if negative
+ ret
+
+TestSpcl:
+ cmp al,bTAG_INF
+ jz TestSign ;Normal test for Infinity
+ cmp al,bTAG_EMPTY
+ jz ComEmpty
+ cmp al,bTAG_NAN
+ jz ComInvalid
+;Must be denormal
+ mov EMSEG:[CURerr],Denormal
+ jmp TestSign
diff --git a/private/ntos/dll/i386/emfconst.asm b/private/ntos/dll/i386/emfconst.asm
new file mode 100644
index 000000000..210cccf65
--- /dev/null
+++ b/private/ntos/dll/i386/emfconst.asm
@@ -0,0 +1,126 @@
+ subttl emfconst.asm - Loading of 387 on chip constants
+ page
+;*******************************************************************************
+;emfconst.asm - Loading of 387 on chip constants
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FLDZ, FLD1, FLDPI, FLDL2T, FLDL2E, FLDLG2, FLDLN2 instructions
+;Inputs:
+; edi = [CURstk]
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+ PrevStackWrap edi,Ld1 ;Tied to PrevStackElem below
+
+EM_ENTRY eFLD1
+eFLD1:
+;edi = [CURstk]
+ PrevStackElem edi,Ld1 ;Point to receiving location
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty?
+ jnz FldErr ;in emload.asm
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].lManHi,1 shl 31
+ mov EMSEG:[edi].ExpSgn,bTAG_SNGL ;Exponent and sign are zero
+ ret
+
+
+ PrevStackWrap edi,Ldz ;Tied to PrevStackElem below
+
+EM_ENTRY eFLDZ
+eFLDZ:
+;edi = [CURstk]
+ PrevStackElem edi,Ldz ;Point to receiving location
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty?
+ jnz FldErr ;in emload.asm
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].lManHi,0
+ mov EMSEG:[edi].ExpSgn,bTAG_ZERO ;Exponent and sign are zero
+ ret
+
+;*******************************************************************************
+
+;The 5 irrational constants need to be adjusted according to rounding mode.
+
+DefConst macro cName,low,high,expon,round
+c&cName&lo equ low
+c&cName&hi equ high
+c&cName&exp equ expon
+c&cName&rnd equ round
+ endm
+
+DefConst FLDL2T,0CD1B8AFEH,0D49A784BH,00001H,0
+
+DefConst FLDL2E,05C17F0BCH,0B8AA3B29H,00000H,1
+
+DefConst FLDLG2,0FBCFF799H,09A209A84H,0FFFEH,1
+
+DefConst FLDLN2,0D1CF79ACH,0B17217F7H,0FFFFH,1
+
+DefConst FLDPI,02168C235H,0C90FDAA2H,00001H,1
+
+
+LoadConstant macro cName,nojmp
+EM_ENTRY e&cName
+e&cName:
+ mov ebx,c&cName&hi
+ mov edx,c&cName&lo
+ mov ecx,c&cName&exp shl 16 + c&cName&rnd
+ifb <nojmp>
+ jmp CommonConst
+endif
+ endm
+
+LoadConstant FLDL2T
+
+LoadConstant FLDL2E
+
+LoadConstant FLDLG2
+
+LoadConstant FLDLN2
+
+LoadConstant FLDPI,nojmp
+
+CommonConst:
+;ebx:edx = mantissa of constant, rounded to nearest
+;high ecx = exponent
+;ch = sign
+;cl = rounding flag: 1 indicates roundup occured for round nearest, else 0
+;edi = [CURstk]
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearConst ;Adjust constant if not round nearest
+StoreConst:
+ mov cl,bTAG_VALID
+ mov esi,edx
+ jmp FldCont ;In emload.asm
+
+NotNearConst:
+;It is known that the five constants positive irrational numbers.
+;This means they are never exact, and chop and round down always
+;produce the same answer. It is also know that the values are such
+;that rounding only alters bits in the last byte.
+;
+;A flag in cl indicates if the number has been rounded up for round
+;nearest (1 = rounded up, 0 = rounded down). In chop and round down
+;modes, this flag can be directly subtracted to reverse the rounding.
+;In round up mode, we want to add (1-flag) = -(flag-1).
+.erre RCchop eq 0CH ;Two bits set only for chop
+ test EMSEG:[CWcntl],RCdown ;DOWN bit set?
+ jnz DirectRoundConst ;If so, it's chop or down
+;Round Up mode
+ dec cl ;-1 if round up needed, else 0
+DirectRoundConst:
+ sub dl,cl ;Directed rounding
+ jmp StoreConst
diff --git a/private/ntos/dll/i386/emfdiv.asm b/private/ntos/dll/i386/emfdiv.asm
new file mode 100644
index 000000000..a06b910f2
--- /dev/null
+++ b/private/ntos/dll/i386/emfdiv.asm
@@ -0,0 +1,473 @@
+ subttl emfdiv.asm - Division
+ page
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emfdiv.asm - long double divide
+; by Tim Paterson
+;
+;Purpose:
+; Long double division.
+;Inputs:
+; ebx:esi = op1 mantissa
+; ecx = op1 sign in bit 15, exponent in high half
+; edi = pointer to op2 and result location
+; [Result] = edi
+;
+; Exponents are unbiased. Denormals have been normalized using
+; this expanded exponent range. Neither operand is allowed to be zero.
+;Outputs:
+; Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;Dispatch tables for division
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest"). edi points to dividend for fdiv,
+;to divisor for fdivr.
+;
+;Tag of source is shifted. Tag values are as follows:
+;
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;dest = dest / source
+tFdivDisp label dword ;Source (reg) Dest (*[di])
+ dd DivSingle ;single single
+ dd DivSingle ;single double
+ dd XorDestSign ;single zero
+ dd DivSpclDest ;single special
+ dd DivDouble ;double single
+ dd DivDouble ;double double
+ dd XorDestSign ;double zero
+ dd DivSpclDest ;double special
+ dd DivideByZero ;zero single
+ dd DivideByZero ;zero double
+ dd ReturnIndefinite ;zero zero
+ dd DivSpclDest ;zero special
+ dd DivSpclSource ;special single
+ dd DivSpclSource ;special double
+ dd DivSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd ReturnIndefinite ;Two infinities
+
+;dest = source / dest
+tFdivrDisp label dword ;Source (reg) Dest (*[di])
+ dd DivrSingle ;single single
+ dd DivrDouble ;single double
+ dd DivideByZero ;single zero
+ dd DivrSpclDest ;single special
+ dd DivrSingle ;double single
+ dd DivrDouble ;double double
+ dd DivideByZero ;double zero
+ dd DivrSpclDest ;double special
+ dd XorSourceSign ;zero single
+ dd XorSourceSign ;zero double
+ dd ReturnIndefinite ;zero zero
+ dd DivrSpclDest ;zero special
+ dd DivrSpclSource ;special single
+ dd DivrSpclSource ;special double
+ dd DivrSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd ReturnIndefinite ;Two infinities
+
+
+EM_ENTRY eFIDIV16
+eFIDIV16:
+ push offset DivSetResult
+ jmp Load16Int ;Returns to DivSetResult
+
+EM_ENTRY eFIDIVR16
+eFIDIVR16:
+ push offset DivrSetResult
+ jmp Load16Int
+
+EM_ENTRY eFIDIV32
+eFIDIV32:
+ push offset DivSetResult
+ jmp Load32Int
+
+EM_ENTRY eFIDIVR32
+eFIDIVR32:
+ push offset DivrSetResult
+ jmp Load32Int
+
+EM_ENTRY eFDIV32
+eFDIV32:
+ push offset DivSetResult
+ jmp Load32Real ;Returns to DivSetResult
+
+EM_ENTRY eFDIVR32
+eFDIVR32:
+ push offset DivrSetResult ;Returns to DivrSetResult
+ jmp Load32Real
+
+EM_ENTRY eFDIV64
+eFDIV64:
+ push offset DivSetResult
+ jmp Load64Real ;Returns to DivSetResult
+
+EM_ENTRY eFDIVR64
+eFDIVR64:
+ push offset DivrSetResult
+ jmp Load64Real ;Returns to DivrSetResult
+
+
+EM_ENTRY eFDIVRPreg
+eFDIVRPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFDIVRreg
+eFDIVRreg:
+ xchg esi,edi
+
+EM_ENTRY eFDIVRtop
+eFDIVRtop:
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+DivrSetResult:
+;cl has tag of dividend
+ mov ebp,offset tFdivrDisp
+ mov EMSEG:[Result],edi ;Save result pointer
+ mov ah,cl
+ mov al,EMSEG:[edi].bTag
+ and ah,not 1 ;Ignore single vs. double on dividend
+ cmp ax,1
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ jz DivrDouble ;Divisor was double
+ ja TwoOpResultSet
+;.erre DivrSingle eq $ ;Fall into DivrSingle
+
+;*********
+DivrSingle:
+;*********
+;Computes op1/op2
+;Op1 is double, op2 is single (low 32 bits are zero)
+ mov edx,ebx
+ mov eax,esi ;Mantissa in edx:eax
+ mov ebx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManHi
+ jmp DivSingleReg
+
+
+SDivBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+ or EMSEG:[CURerr],Underflow
+ test EMSEG:[CWmask],Underflow ;Is exception masked?
+ jnz UnderflowZero ;Yes, return zero (in emfmul.asm)
+ add ecx,Underbias shl 16 ;Fix up exponent
+ jmp ContSdiv ;Continue with multiply
+
+
+EM_ENTRY eFDIVPreg
+eFDIVPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFDIVreg
+eFDIVreg:
+ xchg esi,edi
+
+EM_ENTRY eFDIVtop
+eFDIVtop:
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+DivSetResult:
+;cl has tag of divisor
+ mov ebp,offset tFdivDisp
+ mov EMSEG:[Result],edi ;Save result pointer
+ mov al,cl
+ mov ah,EMSEG:[edi].bTag
+ and ah,not 1 ;Ignore single vs. double on dividend
+ cmp ax,1
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ jz DivDouble ;Divisor was double
+ ja TwoOpResultSet
+;.erre DivSingle eq $ ;Fall into DivSingle
+
+;*********
+DivSingle:
+;*********
+;Computes op2/op1
+;Op2 is double, op1 is single (low 32 bits are zero)
+ xchg edi,ebx ;Mantissa in edi, op2 ptr to ebx
+ xchg ebx,ecx ;ExpSgn to ebx, op2 ptr to ecx
+ mov edx,EMSEG:[ecx].lManHi
+ mov eax,EMSEG:[ecx].lManLo
+ mov ecx,EMSEG:[ecx].ExpSgn ;Op2 loaded
+
+DivSingleReg:
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in edi, exponent in high ebx, sign in bh bit 7
+
+ xor ch,bh ;Compute result sign
+ xor bx,bx ;Clear out sign and tag
+ sub ecx,1 shl 16 ;Exponent adjustment needed
+ sub ecx,ebx ;Compute result exponent
+.erre TexpBias eq 0 ;Exponents not biased
+ jo SDivBigUnderflow ;Dividing denormal by large number
+ContSdiv:
+
+;If dividend >= divisor, the DIV instruction will overflow. Check for
+;this condition and shift the dividend right one bit if necessary.
+;
+;In previous versions of this algorithm for 24-bit and 53-bit mantissas,
+;this shift was always performed without a test. This meant that a 1-bit
+;normalization might be required at the end. This worked fine because
+;32 or 64 bits were calculated, so extra precision was available for
+;normalization. However, this version needs all 64 bits that are calculated,
+;so we can't afford a normalization shift at the end. This test tells us
+;up front how to align so we'll be normalized.
+ xor ebx,ebx ;Extend dividend
+ cmp edi,edx ;Will DIV overflow?
+ ja DoSdiv ;No, we're safe
+ shrd ebx,eax,1
+ shrd eax,edx,1
+ shr edx,1
+ add ecx,1 shl 16 ;Bump exponent to account for shift
+DoSdiv:
+ div edi
+ xchg ebx,eax ;Save quotient in ebx, extend remainder
+ div edi
+ mov esi,eax
+;We have a 64-bit quotient in ebx:esi. Now compare remainder*2 with divisor
+;to compute round and sticky bits.
+ mov eax,-1 ;Set round and sticky bits
+ shl edx,1 ;Double remainder
+ jc RoundJmp ;If too big, round & sticky set
+ cmp edx,edi ;Is remainder*2 > divisor?
+ ja RoundJmp
+
+;Observe, oh wondering one, how you can assume the result of this last
+;compare is not equality. Use the following notation: n=numerator,
+;d=denominator,q=quotient,r=remainder,b=base(2^64 here). If
+;initially we had n < d then there was no shift and we will find q and r
+;so that q*d+r=n*b, if initially we had n >= d then there was a shift and
+;we will find q and r so that q*d+r=n*b/2. If we have equality here
+;then r=d/2 ==> n={possibly 2*}(2*q+1)*d/(2*b), since this can only
+;be integral if d is a multiple of b, but by definition b/2 <= d < b, we
+;have a contradiction. Equality is thus impossible at this point.
+
+ cmp edx,1 ;Check for zero remainder
+ sbb eax,-2 ;eax==0 if CY, ==1 if NC (was -1)
+RoundJmp:
+ jmp EMSEG:[RoundMode]
+
+;*******************************************************************************
+
+DDivBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+ or EMSEG:[CURerr],Underflow
+ test EMSEG:[CWmask],Underflow ;Is exception masked?
+ jnz UnderflowZero ;Yes, return zero (in emfmul.asm)
+ add ecx,Underbias shl 16 ;Fix up exponent
+ jmp ContDdiv ;Continue with multiply
+
+DivrDoubleSetFlag:
+;Special entry point used by FPATAN to set bit 6 of flag dword pushed
+;on stack before call.
+ or byte ptr [esp+4],40H
+;*********
+DivrDouble:
+;*********
+;Computes op1/op2
+ mov edx,ebx
+ mov eax,esi ;Mantissa in edx:eax
+ mov ebx,EMSEG:[edi].ExpSgn
+ mov esi,EMSEG:[edi].lManHi
+ mov edi,EMSEG:[edi].lManLo
+ jmp short DivDoubleReg
+
+HighHalfEqual:
+;edx:eax:ebp = dividend
+;esi:edi = divisor
+;ecx = exponent and sign of result
+;
+;High half of dividend is equal to high half of divisor. This will cause
+;the DIV instruction to overflow. If whole dividend >= whole divisor, then
+;we just shift the dividend right 1 bit.
+ cmp eax,edi ;Is dividend >= divisor?
+ jae ShiftDividend ;Yes, divide it by two
+;DIV instruction would overflow, so skip it and calculate the effective
+;result. Assume a quotient of 2^32-1 and calculate the remainder. See
+;detailed comments under MaxQuo below--this is a copy of that code.
+ push ecx ;Save exp. and sign
+ mov ebx,-1 ;Max quotient digit
+ sub eax,edi ;Calculate correct remainder
+;Currently edx == esi, but the next instruction ensures that is no longer
+;true, since eax != 0. This will allow us to skip the MaxQuo check at
+;DivFirstDigit.
+ add edx,eax ;Should set CY if quotient fit
+ mov eax,edi ;ecx:eax has new remainder
+ jc ComputeSecond ;Remainder was positive
+;Quotient doesn't fit. Note that we can no longer ensure that edx != esi
+;after making a correction.
+ mov ecx,edx ;Need remainder in ecx:eax
+ jmp DivCorrect1
+
+;*********
+DivDouble:
+;*********
+;Computes op2/op1
+ mov eax,edi ;Move op2 pointer
+ mov edi,esi
+ mov esi,ebx ;Mantissa in esi:edi
+ mov ebx,ecx ;ExpSgn to ebx
+ mov ecx,EMSEG:[eax].ExpSgn ;Op2 loaded
+ mov edx,EMSEG:[eax].lManHi
+ mov eax,EMSEG:[eax].lManLo
+
+DivDoubleReg:
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+
+ xor ch,bh ;Compute result sign
+ xor bx,bx ;Clear out sign and tag
+ sub ecx,1 shl 16 ;Exponent adjustment needed
+ sub ecx,ebx ;Compute result exponent
+.erre TexpBias eq 0 ;Exponents not biased
+ jo DDivBigUnderflow ;Dividing denormal by large number
+ContDdiv:
+
+;If dividend >= divisor, we must shift the dividend right one bit.
+;This will ensure the result is normalized.
+;
+;In previous versions of this algorithm for 24-bit and 53-bit mantissas,
+;this shift was always performed without a test. This meant that a 1-bit
+;normalization might be required at the end. This worked fine because
+;32 or 64 bits were calculated, so extra precision was available for
+;normalization. However, this version needs all 64 bits that are calculated,
+;so we can't afford a normalization shift at the end. This test tells us
+;up front how to align so we'll be normalized.
+ xor ebp,ebp ;Extend dividend
+ cmp esi,edx ;Dividend > divisor
+ ja DoDdiv
+ jz HighHalfEqual ;Go compare low halves
+ShiftDividend:
+ shrd ebp,eax,1
+ shrd eax,edx,1
+ shr edx,1
+ add ecx,1 shl 16 ;Bump exponent to account for shift
+DoDdiv:
+ push ecx ;Save exp. and sign
+
+;edx:eax:ebp = dividend
+;esi:edi = divisor
+;
+;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
+;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
+;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
+;this guess will be no more than 2 larger than the correct value of that
+;quotient digit (and never smaller). Divisor meets magnitude condition
+;because it's normalized.
+
+ div esi ;Guess first quotient "digit"
+
+;Check out our guess.
+;Currently, remainder in edx = dividend - (quotient * high half divisor).
+;The definition of remainder is dividend - (quotient * all divisor). So
+;if we subtract (quotient * low half divisor) from edx, we'll get
+;the true remainder. If it's negative, our guess was too big.
+
+ mov ebx,eax ;Save quotient
+ mov ecx,edx ;Save remainder
+ mul edi ;Quotient * low half divisor
+ sub ebp,eax ;Subtract from dividend extension
+ sbb ecx,edx ;Subtract from remainder
+ mov eax,ebp ;Low remainder to eax
+ jnc DivFirstDigit ;Was quotient OK?
+DivCorrect1:
+ dec ebx ;Quotient was too big
+ add eax,edi ;Add divisor back into remainder
+ adc ecx,esi
+ jnc DivCorrect1 ;Repeat if quotient is still too big
+DivFirstDigit:
+ cmp ecx,esi ;Would DIV instruction overflow?
+ jae short MaxQuo ;Yes, figure alternate quotient
+ mov edx,ecx ;Remainder back to edx:eax
+
+;Compute 2nd quotient "digit"
+
+ComputeSecond:
+ div esi ;Guess 2nd quotient "digit"
+ mov ebp,eax ;Save quotient
+ mov ecx,edx ;Save remainder
+ mul edi ;Quotient * low half divisor
+ neg eax ;Subtract from dividend extended with 0
+ sbb ecx,edx ;Subtract from remainder
+ jnc DivSecondDigit ;Was quotient OK?
+DivCorrect2:
+ dec ebp ;Quotient was too big
+ add eax,edi ;Add divisor back into remainder
+ adc ecx,esi
+ jnc DivCorrect2 ;Repeat if quotient is still too big
+DivSecondDigit:
+;ebx:ebp = quotient
+;ecx:eax = remainder
+;esi:edi = divisor
+;Now compare remainder*2 with divisor to compute round and sticky bits.
+ mov edx,-1 ;Set round and sticky bits
+ shld ecx,eax,1 ;Double remainder
+ jc DDivEnd ;If too big, round & sticky set
+ shl eax,1
+ sub edi,eax
+ sbb esi,ecx ;Subtract remainder*2 from divisor
+ jb DDivEnd ;If <0, use round & sticky bits set
+
+;Observe, oh wondering one, how you can assume the result of this last
+;compare is not equality. Use the following notation: n=numerator,
+;d=denominator,q=quotient,r=remainder,b=base(2^64 here). If
+;initially we had n < d then there was no shift and we will find q and r
+;so that q*d+r=n*b, if initially we had n >= d then there was a shift and
+;we will find q and r so that q*d+r=n*b/2. If we have equality here
+;then r=d/2 ==> n={possibly 2*}(2*q+1)*d/(2*b), since this can only
+;be integral if d is a multiple of b, but by definition b/2 <= d < b, we
+;have a contradiction. Equality is thus impossible at this point.
+
+;No round bit, but set sticky bit if remainder != 0.
+ or eax,ecx ;Is remainder zero?
+ add eax,-1 ;Set CY if non-zero
+ adc edx,1 ;edx==0 if NC, ==1 if CY (was -1)
+DDivEnd:
+ mov esi,ebp ;Result in ebx:esi
+ mov eax,edx ;Round/sticky bits to eax
+ pop ecx ;Recover sign/exponent
+ jmp EMSEG:[RoundMode]
+
+
+MaxQuo:
+;ebx = first quotient "digit"
+;ecx:eax = remainder
+;esi:edi = divisor
+;On exit, ebp = second quotient "digit"
+;
+;Come here if divide instruction would overflow. This must mean that ecx == esi,
+;i.e., the high halves of the dividend and divisor are equal. Assume a result
+;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
+; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
+;and divisor are equal, dividend - divisor * 2^32 can be computed by
+;subtracting only the low halves. When adding divisor (in esi) to this, note
+;that ecx == esi, and we want the result in ecx anyway.
+;
+;Note also that since the dividend is a previous remainder, the
+;dividend - divisor * 2^32 calculation must always be negative. Thus the
+;addition of divisor back to it should generate a carry if it goes positive.
+
+ mov ebp,-1 ;Max quotient digit
+ sub eax,edi ;Calculate correct remainder
+ add ecx,eax ;Should set CY if quotient fit
+ mov eax,edi ;ecx:eax has new remainder
+ jc DivSecondDigit ;Remainder was positive
+ jmp DivCorrect2
diff --git a/private/ntos/dll/i386/emfinit.asm b/private/ntos/dll/i386/emfinit.asm
new file mode 100644
index 000000000..baf0f0598
--- /dev/null
+++ b/private/ntos/dll/i386/emfinit.asm
@@ -0,0 +1,46 @@
+ subttl emfinit.asm - Emulator initialization and FINIT instruction
+ page
+;*******************************************************************************
+;emfinit.asm - Emulator initialization and FINIT instruction
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+EM_ENTRY eEmulatorInit
+EmulatorInit:
+EM_ENTRY eFINIT
+eFINIT:
+ mov esi,BEGstk
+ mov EMSEG:[CURstk],INITstk
+ mov ecx,Numlev
+ xor eax,eax
+
+EmInitLoop:
+ mov EMSEG:[esi].ExpSgn,bTAG_EMPTY ;Exponent and sign are zero
+ mov EMSEG:[esi].lManHi,eax
+ mov EMSEG:[esi].lManLo,eax
+
+ add esi, Reg87Len
+ loop EmInitLoop
+
+ mov EMSEG:[StatusWord],ax ; clear status word
+ mov [esp+4].OldStatus,ax ; clear saved status word.
+ mov EMSEG:[PrevCodeOff],eax
+ mov EMSEG:[PrevDataOff],eax
+ mov EMSEG:[LongControlWord],InitControlWord
+ mov eax,offset Round64near
+ mov EMSEG:[RoundMode],eax ;Address of round routine
+ mov EMSEG:[TransRound],eax ;Address of round routine
+ mov EMSEG:[SavedRoundMode],eax
+ mov EMSEG:[ZeroVector],offset SaveResult
+ mov EMSEG:[Einstall], 1
+ ret
diff --git a/private/ntos/dll/i386/emfmisc.asm b/private/ntos/dll/i386/emfmisc.asm
new file mode 100644
index 000000000..6adc08dc3
--- /dev/null
+++ b/private/ntos/dll/i386/emfmisc.asm
@@ -0,0 +1,81 @@
+ subttl emfmisc.asm - FABS, FCHS, FFREE, FXCH
+ page
+;*******************************************************************************
+;emfmisc.asm - FABS, FCHS, FFREE, FXCH
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FABS, FCHS, FFREE, FXCH instructions
+;Inputs:
+; edi = [CURstk]
+; esi = pointer to st(i) from instruction field
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+EM_ENTRY eFABS
+eFABS:
+;******
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY
+ jz StackError ;in emarith.asm
+ mov EMSEG:[edi].bSgn,0 ;Turn sign bit off
+ ret
+
+;******
+EM_ENTRY eFCHS
+eFCHS:
+;******
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY
+ jz StackError ;in emarith.asm
+ not EMSEG:[edi].bSgn ;Flip the sign
+ ret
+
+;******
+EM_ENTRY eFFREE
+eFFREE:
+;******
+ mov EMSEG:[esi].bTag,bTAG_EMPTY
+ ret
+
+;******
+EM_ENTRY eFXCH
+eFXCH:
+;******
+ cmp EMSEG:[esi].bTag,bTAG_EMPTY
+ jz XchDestEmpty
+XchgChkSrc:
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY
+ jz XchSrcEmpty
+DoSwap:
+;Swap [esi] with [edi]
+ mov eax,EMSEG:[edi]
+ xchg eax,EMSEG:[esi]
+ mov EMSEG:[edi],eax
+ mov eax,EMSEG:[edi+4]
+ xchg eax,EMSEG:[esi+4]
+ mov EMSEG:[edi+4],eax
+ mov eax,EMSEG:[edi+8]
+ xchg eax,EMSEG:[esi+8]
+ mov EMSEG:[edi+8],eax
+ ret
+
+XchDestEmpty:
+ call ReturnIndefinite ;in emarith.asm - ZF set if unmasked
+ jnz XchgChkSrc ;Continue if masked
+ ret
+
+XchSrcEmpty:
+ xchg edi,esi ;pass pointer in esi
+ call ReturnIndefinite ;in emarith.asm - ZF set if unmasked
+ xchg edi,esi
+ jnz DoSwap ;Continue if masked
+ ret
diff --git a/private/ntos/dll/i386/emfmul.asm b/private/ntos/dll/i386/emfmul.asm
new file mode 100644
index 000000000..2a5fcca9f
--- /dev/null
+++ b/private/ntos/dll/i386/emfmul.asm
@@ -0,0 +1,238 @@
+ subttl emfmul.asm - Multiplication
+ page
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emfmul.asm - long double multiply
+; by Tim Paterson
+;
+;Purpose:
+; Long double multiplication.
+;Inputs:
+; ebx:esi = op1 mantissa
+; ecx = op1 sign in bit 15, exponent in high half
+; edi = pointer to op2 and result location
+; [Result] = edi
+;
+; Exponents are unbiased. Denormals have been normalized using
+; this expanded exponent range. Neither operand is allowed to be zero.
+;Outputs:
+; Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+;Dispatch table for multiply
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFmulDisp label dword ;Source (reg) Dest (*[di])
+ dd MulSingle ;single single
+ dd MulDouble ;single double
+ dd XorDestSign ;single zero
+ dd MulSpclDest ;single special
+ dd MulDouble ;double single
+ dd MulDouble ;double double
+ dd XorDestSign ;double zero
+ dd MulSpclDest ;double special
+ dd XorSourceSign ;zero single
+ dd XorSourceSign ;zero double
+ dd XorDestSign ;zero zero
+ dd MulSpclDest ;zero special
+ dd MulSpclSource ;special single
+ dd MulSpclSource ;special double
+ dd MulSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd XorDestSign ;Two infinities
+
+
+EM_ENTRY eFIMUL16
+eFIMUL16:
+ push offset MulSetResult
+ jmp Load16Int ;Returns to MulSetResult
+
+EM_ENTRY eFIMUL32
+eFIMUL32:
+ push offset MulSetResult
+ jmp Load32Int ;Returns to MulSetResult
+
+EM_ENTRY eFMUL32
+eFMUL32:
+ push offset MulSetResult
+ jmp Load32Real ;Returns to MulSetResult
+
+EM_ENTRY eFMUL64
+eFMUL64:
+ push offset MulSetResult
+ jmp Load64Real ;Returns to MulSetResult
+
+EM_ENTRY eFMULPreg
+eFMULPreg:
+ push offset PopWhenDone
+
+EM_ENTRY eFMULreg
+eFMULreg:
+ xchg esi,edi
+
+EM_ENTRY eFMULtop
+eFMULtop:
+ mov ecx,EMSEG:[esi].ExpSgn
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+MulSetResult:
+ mov ebp,offset tFmulDisp
+ mov EMSEG:[Result],edi ;Save result pointer
+ mov al,cl
+ or al,EMSEG:[edi].bTag
+ cmp al,bTAG_VALID
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ jz MulDouble
+ ja TwoOpResultSet
+;.erre MulSingle eq $ ;Fall into MulSingle
+
+
+;*********
+MulSingle:
+;*********
+
+ mov edx,EMSEG:[edi].ExpSgn
+ mov eax,EMSEG:[edi].lManHi
+
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 high mantissa in eax, exponent in high edx, sign in dh bit 7
+
+ xor ch,dh ;Compute result sign
+ xor dx,dx ;Clear out sign and tag
+ add ecx,edx ;Result exponent
+.erre TexpBias eq 0 ;Exponents not biased
+ jo SMulBigUnderflow ;Multiplying two denormals
+ContSmul:
+
+;Value in ecx is correct exponent if result is not normalized.
+;If result comes out normalized, 1 will be added.
+
+ mul ebx ;Compute product
+ mov ebx,edx
+ mov esi,eax
+ xor eax,eax ;Extend with zero
+
+;Result in ebx:esi:eax
+;ecx = exponent minus one in high half, sign in ch
+ or ebx,ebx ;Check for normalization
+ jns ShiftOneBit ;In emfadd.asm
+ add ecx,1 shl 16 ;Adjust exponent
+ jmp EMSEG:[RoundMode]
+
+SMulBigUnderflow:
+ or EMSEG:[CURerr],Underflow
+ add ecx,Underbias shl 16 ;Fix up exponent
+ test EMSEG:[CWmask],Underflow ;Is exception masked?
+ jz ContSmul ;No, continue with multiply
+UnderflowZero:
+ or EMSEG:[CURerr],Precision
+SignedZero:
+ and ecx,bSign shl 8 ;Preserve sign bit
+ xor ebx,ebx
+ mov esi,ebx
+ mov cl,bTAG_ZERO
+ jmp EMSEG:[ZeroVector]
+
+;*******************************************************************************
+
+DMulBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+ or EMSEG:[CURerr],Underflow
+ test EMSEG:[CWmask],Underflow ;Is exception masked?
+ jnz UnderflowZero ;Yes, return zero
+ add ecx,Underbias shl 16 ;Fix up exponent
+ jmp ContDmul ;Continue with multiply
+
+PolyMulToZero:
+ ret ;Return the zero in registers
+
+PolyMulDouble:
+;This entry point is used by polynomial evaluator.
+;It checks the operand in registers for zero.
+ cmp cl,bTAG_ZERO ;Adding to zero?
+ jz PolyMulToZero
+
+;*********
+MulDouble:
+;*********
+
+ mov eax,EMSEG:[edi].ExpSgn
+ mov edx,EMSEG:[edi].lManHi
+ mov edi,EMSEG:[edi].lManLo
+
+MulDoubleReg: ;Entry point used by transcendentals
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+
+ xor ch,ah ;Compute result sign
+ xor ax,ax ;Clear out sign and tag
+ add ecx,eax ;Result exponent
+.erre TexpBias eq 0 ;Exponents not biased
+ jo DMulBigUnderflow ;Multiplying two denormals
+ContDmul:
+
+;Value in ecx is correct exponent if result is not normalized.
+;If result comes out normalized, 1 will be added.
+
+ mov ebp,edx ;edx is used by MUL instruction
+
+;Generate and sum partial products, from least to most significant
+
+ mov eax,edi
+ mul esi ;Lowest partial product
+ add eax,-1 ;CY set IFF eax<>0
+ sbb cl,cl ;Sticky bit: 0 if zero, -1 if nz
+ xchg edi,edx ;Save high result
+
+;First product: cl reflects low dword non-zero (sticky bit), edi has high dword
+
+ mov eax,ebx
+ mul edx
+ add edi,eax
+ adc edx,0 ;Sum first results
+ xchg edx,esi ;High result to esi
+
+;Second product: accumulated in esi:edi:cl
+
+ mov eax,ebp ;Next mult. to eax
+ mul edx
+ add edi,eax ;Sum low results
+ adc esi,edx ;Sum high results
+ mov eax,ebx
+ mov ebx,0 ;Preserve CY flag
+ adc ebx,ebx ;Keep carry out of high sum
+
+;Third product: accumulated in ebx:esi:edi:cl
+
+ mul ebp
+ add esi,eax
+ adc ebx,edx
+ mov eax,edi
+ or al,cl ;Collapse sticky bits into eax
+
+;Result in ebx:esi:eax
+;ecx = exponent minus one in high half, sign in ch
+MulDivNorm:
+ or ebx,ebx ;Check for normalization
+ jns ShiftOneBit ;In emfadd.asm
+ add ecx,1 shl 16 ;Adjust exponent
+ jmp EMSEG:[RoundMode]
diff --git a/private/ntos/dll/i386/emfprem.asm b/private/ntos/dll/i386/emfprem.asm
new file mode 100644
index 000000000..3cb8670bb
--- /dev/null
+++ b/private/ntos/dll/i386/emfprem.asm
@@ -0,0 +1,407 @@
+ subttl emfprem.asm - FPREM and FPREM1 instructions
+ page
+;*******************************************************************************
+;emfprem.asm - FPREM and FPREM1 instructions
+; by Tim Paterson
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Inputs:
+; edi = [CURstk]
+; ST(1) loaded into ebx:esi & ecx
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+;Dispatch table for remainder
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+ ;Divisor Dividend
+tFpremDisp label dword ;Source(ST(1)) Dest (ST(0))
+ dd PremDouble ;single single
+ dd PremDouble ;single double
+ dd PremX ;single zero
+ dd PremSpclDest ;single special
+ dd PremDouble ;double single
+ dd PremDouble ;double double
+ dd PremX ;double zero
+ dd PremSpclDest ;double special
+ dd ReturnIndefinite ;zero single
+ dd ReturnIndefinite ;zero double
+ dd ReturnIndefinite ;zero zero
+ dd PremSpclDest ;zero special
+ dd PremSpclSource ;special single
+ dd PremSpclSource ;special double
+ dd PremSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd ReturnIndefinite ;Two infinites
+
+
+PremSpclDone:
+ add sp,4 ;Clean off return address for normal
+ ret
+
+;***
+PremSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF ;Dividing infinity?
+ jz ReturnIndefinite ;Invalid operation if so
+ jmp SpclDest ;In emarith.asm
+
+;***
+PremSpclSource:
+ cmp cl,bTAG_INF ;Dividing by infinity?
+ jnz SpclSource ;in emarith.asm
+PremX:
+;Return Dest unchanged, quotient = 0
+ mov EMSEG:[SWcc],0
+ ret
+;*******************************************************************************
+
+;Map quotient bits to condition codes
+
+Q0 equ C1
+Q1 equ C3
+Q2 equ C0
+
+MapQuo label byte
+ db 0
+ db Q0
+ db Q1
+ db Q1+Q0
+ db Q2
+ db Q2+Q0
+ db Q2+Q1
+ db Q2+Q1+Q0
+
+Prem1Cont:
+
+;edx:eax = remainder, normalized
+;ebx:esi = divisor
+;ebp = quotient
+;edi = exponent difference, zero or less
+;ecx = 0 (positive sign)
+;
+;At this point, 0 <= remainder < divisor. However, for FPREM1 we need
+; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether
+;we choose + or - is dependent on whichever gives us an even quotient
+;(the usual IEEE rounding rule). Quotient must be incremented if we
+;use negative remainder.
+
+ cmp edi,-1
+ jl PremCont ;Remainder < divisor/2
+ jg NegRemainExp0 ;Remainder > divisor/2
+;Exponent is -1
+ cmp edx,ebx
+ jl PremCont ;Remainder < divisor/2
+ jg NegRemain ;Remainder > divisor/2
+ cmp eax,esi
+ jl PremCont ;Remainder < divisor/2
+ jg NegRemain ;Remainder > divisor/2
+;Remainder = divisor/2. Ensure quotient is even
+ test ebp,1 ;Even?
+ jz PremCont
+NegRemain:
+;Theoretically we subtract divisor from remainder once more, leaving us
+;with a negative remainder. But since we use sign/magnitude representation,
+;we want the abs() of that with sign bit set--so subtract remainder from
+;(larger) divisor. Note that exponent difference is -1, so we must align
+;binary points first.
+ add esi,esi
+ adc ebx,ebx ;Double divisor to align binary points
+NegRemainExp0:
+ sub esi,eax
+ sbb ebx,edx ;Subtract remainder
+ mov eax,esi
+ mov edx,ebx ;Result in edx:eax
+ mov ch,bSign ;Flip sign of remainder
+ inc ebp ;Increase quotient
+;Must normalize result of subtraction
+ bsr ecx,edx ;Look for 1 bit
+ jnz @F
+ sub edi,32
+ xchg edx,eax ;Shift left 32 bits
+ bsr ecx,edx
+@@:
+ lea edi,[edi+ecx-31] ;Fix up exponent for normalization
+ not cl
+ shld edx,eax,cl
+ shl eax,cl
+ mov ch,bSign ;Flip sign of remainder
+
+PremCont:
+;edx:eax = remainder, normalized
+;ebp = quotient
+;edi = exponent difference, zero or less
+;ch = sign
+ or eax,eax ;Low bits zero?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ mov esi,EMSEG:[CURstk]
+ mov ebx,esi
+ NextStackElem ebx,Prem
+ add di,EMSEG:[ebx].wExp ;Compute result exponent
+ cmp di,IexpMin-IexpBias
+ jle PremUnderflow
+SavePremResult:
+ mov EMSEG:[esi].lManLo,eax
+ xor EMSEG:[esi].bSgn,ch
+ mov EMSEG:[esi].lManHi,edx
+ and ebp,7 ;Keep last 3 bits of quotient only
+ ; and give write buffers a break
+ mov EMSEG:[esi].wExp,di
+ mov EMSEG:[esi].bTag,cl
+ mov al,MapQuo[ebp] ;Get cond. codes for this quotient
+ mov EMSEG:[SWcc],al
+ ret
+
+ NextStackWrap ebx,Prem ;Tied to NextStackElem above
+
+PremUnderflow:
+ test EMSEG:[CWmask],Underflow ;Is exception unmasked?
+ jz UnmaskedPremUnder
+ mov cl,bTAG_DEN
+ jmp SavePremResult
+
+UnmaskedPremUnder:
+ add edi,UnderBias ;Additional exp. bias for unmasked resp.
+ or EMSEG:[CURerr],Underflow
+ jmp SavePremResult
+
+;*******************************************************************************
+
+PremDouble:
+;edi = [CURstk]
+;ebx:esi = ST(1) mantissa, ecx = ExpSgn
+
+ add sp,4 ;Clean off return address for special
+ mov eax,EMSEG:[edi].lManLo
+ mov edx,EMSEG:[edi].lManHi
+ movsx edi,EMSEG:[edi].wExp
+ xor ebp,ebp ;Quotient, in case we skip stage 1
+ sar ecx,16 ;Bring exponent down
+ sub edi,ecx ;Get exponent difference
+ jl ExitPremLoop ;If dividend is smaller, return it.
+
+;FPREM is performed in two stages. The first stage is used only if the
+;exponent difference is greater than 31. It reduces the exponent difference
+;by 32, and repeats until the difference is less than 32. Note that
+;unlike the hardware FPREM instruction, we are not limited to reducing
+;the exponent by only 63--we just keep looping until it's done.
+;
+;The second stage performs ordinary 1-bit-at-a-time long division.
+;It stops when the exponent difference is zero, meaning we have an
+;integer quotient and the final remainder.
+;
+;edx:eax = dividend
+;ebx:esi = divisor
+;edi = exponent difference
+;ebp = 0 (initial quotient)
+
+ cmp edi,32 ;Do we need to do stage 1?
+ jl FitDivisor ;No, start stage 2
+
+;FPREM stage 1
+;
+;Exponent difference is at least 32. Use 32-bit division to compute
+;quotient and exact remainder, reducing exponent difference by 32.
+
+;DIV instruction will overflow if dividend >= divisor. In this case,
+;subtract divisor from dividend to ensure no overflow. This will change
+;the quotient, but that doesn't matter because we only need the last
+;3 bits of the quotient (and we're about to calculate 32 quotient bits).
+;This subtraction will not affect the remainder.
+
+ sub eax,esi
+ sbb edx,ebx
+ jnc FpremReduce32 ;Was dividend big?
+ add eax,esi ;Restore dividend, it was smaller
+ adc edx,ebx
+
+;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
+;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
+;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
+;this guess will be no more than 2 larger than the correct value of that
+;quotient digit (and never smaller). Divisor meets magnitude condition
+;because it's normalized.
+;
+;This loop typically takes 117 clocks.
+
+;edx:eax = dividend
+;ebx:esi = divisor
+;edi = exponent difference
+;ebp = quotient (zero)
+
+FpremReduce32:
+;We know that dividend < divisor, but it is still possible that
+;high dividend == high divisor, which will cause the DIV instruction
+;to overflow.
+ cmp edx,ebx ;Will DIV instruction overflow?
+ jae PremOvfl
+ div ebx ;Guess a quotient "digit"
+
+;Currently, remainder in edx = dividend - (quotient * high half divisor).
+;The definition of remainder is dividend - (quotient * all divisor). So
+;if we subtract (quotient * low half divisor) from edx, we'll get
+;the true remainder. If it's negative, our guess was too big.
+
+ mov ebp,eax ;Save quotient
+ mov ecx,edx ;Save remainder
+ mul esi ;Quotient * low half divisor
+ neg eax ;Subtract from dividend extended with 0
+ sbb ecx,edx ;Subtract from remainder
+ mov edx,ecx ;Remainder back to edx:eax
+ jnc HavPremQuo ;Was quotient OK?
+FpremCorrect:
+ dec ebp ;Quotient was too big
+ add eax,esi ;Add divisor back into remainder
+ adc edx,ebx
+ jnc FpremCorrect ;Repeat if quotient is still too big
+HavPremQuo:
+ sub edi,32 ;Exponent reduced
+ cmp edi,32 ;Exponent difference within 31?
+ jl PremNormalize ;Do it a bit a time
+ or edx,edx ;Check for zero remainder
+ jnz FpremReduce32
+ or eax,eax ;Remainder 0?
+ jz ExactPrem
+ xchg edx,eax ;Shift left 32 bits
+ sub edi,32 ;Another 32 bits reduced
+ cmp edi,32
+ jge FpremReduce32
+ xor ebp,ebp ;No quotient bits are valid
+ jmp PremNormalize
+
+PremOvfl:
+;edx:eax = dividend
+;ebx:esi = divisor
+;On exit, ebp = second quotient "digit"
+;
+;Come here if divide instruction would overflow. This must mean that edx == ebx,
+;i.e., the high halves of the dividend and divisor are equal. Assume a result
+;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
+; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
+;and divisor are equal, dividend - divisor * 2^32 can be computed by
+;subtracting only the low halves. When adding divisor (in ebx) to this, note
+;that edx == ebx, and we want the result in edx anyway.
+;
+;Note also that since dividend < divisor, the
+;dividend - divisor * 2^32 calculation must always be negative. Thus the
+;addition of divisor back to it should generate a carry if it goes positive.
+
+ mov ebp,-1 ;Max quotient digit
+ sub eax,esi ;Calculate correct remainder
+ add edx,eax ;Should set CY if quotient fit
+ mov eax,esi ;edx:eax has new remainder
+ jc HavPremQuo ;Remainder was positive
+ jmp FpremCorrect
+
+ExactPrem:
+;eax = 0
+ mov esi,EMSEG:[CURstk]
+ mov EMSEG:[esi].lManLo,eax
+ mov EMSEG:[esi].lManHi,eax
+ add sp,4 ;Clean off first return address
+ mov EMSEG:[esi].wExp,ax
+ mov EMSEG:[esi].bTag,bTAG_ZERO
+ ret
+
+
+;FPREM stage 2
+;
+;Exponent difference is less than 32. Use restoring long division to
+;compute quotient bits until exponent difference is zero. Note that we
+;often get more than one bit/loop: BSR is used to scan off leading
+;zeros each time around. Since the divisor is normalized, we can
+;instantly compute a zero quotient bit for each leading zero bit.
+;
+;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
+;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions
+;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
+;instead of the 112 required by stage 1!
+
+FpremLoop:
+;edx:eax = dividend (remainder) minus divisor
+;ebx:esi = divisor
+;ebp = quotient
+;edi = exponent difference, less than 32
+;
+;If R is current remainder and d is divisor, then we have edx:eax = R - d,
+;which is negative. We want 2*R - d, which is positive.
+;2*R - d = 2*(R - d) + d.
+ add eax,eax ;2*(R - d)
+ adc edx,edx
+ add eax,esi ;2*(R-d) + d = 2*R - d
+ adc edx,ebx
+ add ebp,ebp ;Double quotient too
+ dec edi ;Decrement exponent difference
+DivisorFit:
+ inc ebp ;Count one in quotient
+PremNormalize:
+ bsr ecx,edx ;Find first 1 bit
+ jz PremHighZero
+ not cl
+ and cl,1FH ;Convert bit no. to shift count
+ shld edx,eax,cl ;Normalize
+ shl eax,cl
+ sub edi,ecx ;Reduce exponent difference
+ jl PremTooFar
+ shl ebp,cl ;Shift quotient
+FitDivisor:
+;Dividend could be larger or smaller than divisor
+ sub eax,esi
+ sbb edx,ebx
+ jnc DivisorFit
+;Couldn't subtract divisor from dividend.
+ or edi,edi ;Is exponent difference zero or less?
+ jg FpremLoop
+ add eax,esi ;Restore dividend
+ adc edx,ebx
+ xor ecx,ecx ;Sign is positive
+ ret
+
+PremTooFar:
+;Exponent difference in edi went negative when reduced by shift count in ecx.
+;We need a quotient corresponding to exponent difference of zero.
+ add ecx,edi ;Restore exponent difference
+ shl ebp,cl ;Fix up quotient
+ExitPremLoop:
+;edx:eax = remainder, normalized
+;ebp = quotient
+;edi = exponent difference, zero or less
+ xor ecx,ecx ;Sign is positive
+ ret
+
+PremHighZero:
+;High half of remainder is all zero, so we've reduced exponent difference
+;by 32 bits and overshot. We need a quotient corresponding to exponent
+;difference of zero, so we just shift it by the original difference. Then
+;we need to normalize the low half remainder.
+ mov ecx,edi
+ shl ebp,cl ;Fix up quotient
+ bsr ecx,eax
+ jz ExactPrem
+ lea edi,[edi+ecx-63] ;Fix up exponent for normalization
+ xchg eax,edx ;Shift by 32 bits
+ not cl
+ shl edx,cl ;Normalize remainder
+ xor ecx,ecx ;Sign is positive
+ ret
diff --git a/private/ntos/dll/i386/emfsqrt.asm b/private/ntos/dll/i386/emfsqrt.asm
new file mode 100644
index 000000000..cc36f7b0a
--- /dev/null
+++ b/private/ntos/dll/i386/emfsqrt.asm
@@ -0,0 +1,267 @@
+ subttl emfsqrt.asm - FSQRT instruction
+ page
+;*******************************************************************************
+;emfsqrt.asm - FSQRT instruction
+; by Tim Paterson
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Inputs:
+; edi = [CURstk]
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;A linear approximation of the square root function is used to get the
+;intial value for Newton-Raphson iteration. This approximation gives
+;nearly 5-bit accuracy over the required input interval, [1,4). The
+;equation for the linear approximation of y = sqrt(x) is y = mx + b,
+;where m is the slope (named SQRT_COEF) and b is the y-intercept (named
+;SQRT_INTERCEPT).
+;
+;(The values for m and b were computed with Excel Solver in two passes:
+;the first pass computed them full precision, minimizing absolute error;
+;the second computed only b after m was rounded to an 8-bit value.)
+;
+;The resulting values have the following maximum error:
+;
+;inp. value --> 1 2.18972 3.82505
+;----------------------------------------------------------------
+;abs. err., full prec. 0.04544 -0.03233 0.04423
+;abs. err., truncated 0.04544 -0.04546 0.04423
+;
+;The three input values shown represent the left end point, the maximum
+;error (derivative of absolute error == 0), and the right end point.
+;The right end point is not 4 because the approximation reaches 2.000
+;at the value given--we abandon the linear approximation at that point
+;and use that same value for all greater input values. This linear
+;approximation is computed with 8-bit operations, so truncations can
+;add a negative error. This increases maximum error only when it is
+;already negative, as shown in the table.
+;
+;Each iteration of Newton-Raphson approximation more than doubles the
+;number of bits of accuracy. Suppose the current guess is A, and it has
+;an absolute error of e (i.e., A+e or A-e is the root). Then the absolute
+;error after the next iteration is e^2/2A. This error is always positive.
+;However, the divide instruction truncates, which introduces an error
+;that is always negative. Sometimes a constant or rounding bit is added
+;to balance the positive and negative errors. The maximum possible error
+;is given in comments below for each iteration. (Note that when we compute
+;the error from e^2/2A, A could be in the range 1 to 2--we use 1 to get
+;max error.) Remember that the binary point is to the RIGHT of the MSB
+;when looking at these error numbers.
+
+
+;SQRT_INTERCEPT is used when the binary point is to the right of the MSB.
+;Multiplying it by 64K would put the binary point to the left of the MSB,
+;so it must be divided by two to be aligned.
+SQRT_INTERCEPT equ 23185 ; 0.70755 * 65536 / 2
+
+;SQRT_COEF would have the binary point to the left of the MSB if multiplied
+;by 256. However, this would leave it with a leading zero, so we multiply
+;it by two more to normalize it.
+SQRT_COEF equ 173 ; 0.33789 * 256 * 2
+
+SqrtSpcl:
+ cmp al,bTAG_DEN
+ jz SqrtDen
+ cmp al,bTAG_INF
+ jnz SpclDestNotDen
+;Have infinity
+ or ah,ah ;Is it negative?
+ js ReturnIndefinite
+SqrtRet:
+ ret
+
+
+MaxStartRoot:
+;The first iteration is calculated as (ax / bh) * 100H + bx. The first
+;trial root in bx should be 10000H (which is too big). But it's very
+;easy to calculate (ax / 100H) * 100H + 10000H = ax.
+ mov bx,ax
+ cmp ax,-1 ;Would subsequent DIV overflow?
+ jb FirstTrialRoot
+;The reduced argument is so close to 4.0 that the 16-bit DIV instruction
+;used in the next iteration would overflow. If the argument is 4-A
+;then a guess of 2.0 is in error by approximately A/4. [This is not
+;an upper bound. The error is a little by more than this by an
+;addition with the magnitude of A^2. This is an insignificant amount
+;when A is small.] This means that the first guess of 2.0 is quite
+;accurate, and we'll use it to bypass some of the iteration steps.
+;This will eliminate the DIV overflow by skipping the DIV.
+;
+;One iteration is performed by: (Arg/Guess + Guess)/2. When Guess = 2,
+;this becomes (Arg/2 + 2)/2 = Arg/4 + 1. We get Arg/2 just by assuming
+;the binary point is one bit further left; then a single right shift is
+;needed to get Arg/4. By shifting in a 1 bit on the left, we account for
+;adding 1 at the same time. [Note that if Arg = 4 - A, then Arg/4 + 1
+; = (4 - A)/4 + 1 = 1 - A/4 + 1 = 2 - A/4. In other words, we just
+;subtract out exactly what we estimate our error to be, A/4.]
+;
+;Since the upper 16 bits are 0FFFFH, A <= 2^-14, so error <= 2^-16 =
+; +0.00001526, -0.
+ mov ebx,esi ;Return root in ebx
+ sar ebx,1 ;Trial root = arg/2
+ cmp esi,ebx ;Will 32-bit division overflow?
+ jb StartThirdIteration ;No, our 32-bit guess is good
+;Argument is really, really close to 4.0: with an initial trial root of
+;2.0, max absolute error is 2^-32 = +2.328E-10, -0. One trivial
+;iteration will get us 65-bit accuracy, max abs. error = +2.71E-20, -0.
+ mov ebx,esi
+ mov eax,ecx ;65-bit root*2 in ebx:eax (MSB implied)
+ shl ecx,2 ;ecx = low half*4
+ jmp RoundRoot
+
+SqrtDen:
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jnz SqrtRet ;If not, quit
+
+;******
+EM_ENTRY eFSQRT
+eFSQRT:
+;******
+ mov eax,EMSEG:[edi].ExpSgn
+ cmp al,bTAG_ZERO
+ jz SqrtRet
+ ja SqrtSpcl
+ or ah,ah
+ js ReturnIndefinite
+ mov esi,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].lManLo
+ sar EMSEG:[edi].wExp,1 ;Divide exponent by two
+ mov edi,0 ;Extend mantissa
+ jc RootAligned ;If odd exponent, leave it normalized
+ shrd edi,ecx,1
+ shrd ecx,esi,1
+ shr esi,1 ;Denormalize, extending into edi
+RootAligned:
+;esi:ecx:edi has mantissa, 2 MSBs are left of binary point. Range is [1,4).
+ shld eax,esi,16 ;Get high word of mantissa
+ movzx ebx,ah ;High byte to bl
+;UNDONE: MASM 6 bug!!
+;UNDONE: SQRT_COEF (=0AEH) get sign extended!!
+ mov dx,SQRT_COEF ;UNDONE
+ imul bx,dx ;UNDONE
+;UNDONE imul bx,SQRT_COEF ;Product in bx
+;Multiply by SQRT_COEF causes binary point to shift left 1 bit.
+ add bx,SQRT_INTERCEPT ;5-bit approx. square root in bh
+ jc MaxStartRoot
+;Max absolute error is +/- 0.04546
+ div bh ;See how close we are
+ add bh,al ;quotient + divisor (always sets CY)
+FirstTrialRoot:
+;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead.
+ mov dl,1 ;Need bit set
+ shrd bx,dx,1 ;(quotient + divisor)/2
+;bx has 9-bit approx. square root, normalized
+;Max absolute error is +0.001033, -0.003906
+ movzx eax,si
+ shld edx,esi,16 ;dx:ax has high half mantissa
+ div bx ;Test our approximation
+ add ebx,eax ;quotient + divisor
+ shl ebx,15 ;Normalize (quotient + divisor)/2
+;ebx has 17-bit approx. square root, normalized
+;Max absolute error is +0.000007629, -0.00001526
+;Add adjustment factor to center the error range at +/-0.00001144
+ or bh,20H ;Add in 0.000003815
+StartThirdIteration:
+ mov edx,esi
+ mov eax,ecx
+ div ebx ;Test approximation
+ stc ;Set bit for rounding (= 2.328E-10)
+ adc ebx,eax ;quotient + divisor + round bit
+;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead.
+ mov dl,1 ;Need bit set
+ shrd ebx,edx,1 ;(quotient + divisor)/2, rounded
+;ebx has 32-bit approx. square root, normalized
+;Max absolute error is +2.983E-10, -2.328E-10
+ mov edx,esi ;Last time we need high half
+ mov eax,ecx
+ shld ecx,edi,2 ;ecx = low half*4, w/extension back in
+ div ebx ;Test approximation
+ xchg edi,eax ;Save 1st quotient, get extension
+ mov esi,eax
+ or esi,edx ;Any remainder?
+ jz HaveRoot ;Result is ebx:esi
+ div ebx ;edi:eax is 64-bit quotient
+ add ebx,edi ;quotient + divisor (always sets CY)
+RoundRoot:
+ mov esi,eax ;Save low half root*2
+
+;We have 65-bit root*2 in ebx:esi (eax==esi) (MSB is implied one).
+;Max absolute error is +4.450E-20, -5.421E-20. This maximum error
+;corresponds to just less than +/- 1 in the last (65th) bit.
+;
+;We have to determine if this error is positive or negative so
+;we can tell if we rounded up or down (and set the status bit
+;accordingly). This is done by squaring the root and comparing the
+;that result with the input.
+;
+;Squaring the sample root requires summing partial products:
+; lo*lo + lo*hi + hi*lo + hi*hi. lo*hi == hi*lo, so only one multiply
+;is needed there. The low half of lo*lo isn't relevant, we know it
+;is non-zero. Only the low few bits of hi*hi are needed, so we can use
+;an 8-bit multiply there. Since the MSB is implied, we need to add in
+;two 1*lo products (shifted up 64 bits). We only need bits 64 - 71 of
+;the 130-bit product (the action happens near bit 65). What we're
+;squaring is root*2, so the result is square*4. ecx already has arg*4.
+
+ mul eax ;Low partial product of square
+ mov edi,edx ;Only high half counts
+ mov eax,ebx
+ mul esi ;Middle partial product of square
+ add eax,eax ;There are two of these
+ adc edx,edx
+ add edi,eax
+ adc edx,0 ;edx:edi = lo*lo + lo*hi + hi*lo
+ add edx,esi ;lo*implied msb
+ add edx,esi ;lo*implied msb again
+ mov al,bl
+ mul al ;hi*hi - only low 8 bits are valid
+ add al,dl ;Bits 64 - 71 of product
+ or al,1 ;Account for sticky bits 0 - 63
+ sub cl,al ;Compare product with argument
+;Sign flag set if product is larger. In this case, subtract 1 from root.
+ add cl,cl ;Set CY if sign is set
+SubOneFromRoot:
+ sbb esi,0 ;Reduce root if product was too big
+ sbb ebx,0
+ShiftRoot:
+;ebx:esi = root*2
+;Absolute error is in the range (0, -5.421E-20). This is equivalent to
+;less than +1, -0 in last bit. Thus LSB is correct rounding bit as
+;long as we set a sticky bit below it.
+;
+;Now divide root*2 by 2, preserving LSB as rounding bit and filling
+;eax with 1's as sticky bits.
+;
+;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead.
+ mov eax,-1
+ shrd eax,esi,1 ;Move round bit to MSB of eax
+ shrd esi,ebx,1
+ shrd ebx,eax,1 ;Shift 1 into MSB of ebx
+StoreRoot:
+ mov edi,EMSEG:[CURstk]
+ mov EMSEG:[Result],edi
+ mov ecx,EMSEG:[edi].ExpSgn
+;mantissa in ebx:esi:eax, exponent in high ebx, sign in bh bit 7
+ jmp EMSEG:[RoundMode]
+
+HaveRoot:
+;esi = eax = edx = 0
+ cmp edi,ebx ;Does quotient == divisor?
+ jz StoreRoot ;If so, we're done
+;Quotient != divisor, so answer is not exact. Since remainder is zero,
+;the division was exact. The only error in the result is e^2/2A, which
+;is always positive. We need the error to be only negative so that
+;the rounding routine can properly tell if it rounded up.
+ add ebx,edi ;quotient + divisor (always sets CY)
+ jmp SubOneFromRoot ;Reduce root to ensure negative error
diff --git a/private/ntos/dll/i386/emftran.asm b/private/ntos/dll/i386/emftran.asm
new file mode 100644
index 000000000..116c3a29f
--- /dev/null
+++ b/private/ntos/dll/i386/emftran.asm
@@ -0,0 +1,1206 @@
+ subttl emftran.asm - Transcendental instructions
+ page
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emftran.asm - Transcendental instructions
+; by Tim Paterson
+;
+;Purpose:
+; F2XM1, FPATAN, FYL2X, FYL2XP1 instructions
+;Inputs:
+; edi = [CURstk]
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;********************* Polynomial Coefficients *********************
+
+;These polynomial coefficients were all taken from "Computer Approximations"
+;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
+;conversions to hexadecimal were done with a character-string calculator
+;written in Visual Basic with precision set to 30 digits. Once the constants
+;were typed into this file, all transfers were done with cut-and-paste
+;operations to and from the calculator to help eliminate any typographical
+;errors.
+
+
+tAtanPoly label word
+
+;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2),
+;accurate to 20.78 digits over interval [0, tan(pi/12)].
+
+ dd 4 ;P() is degree four
+
+; Hart constant
+;
+;+.16241 70218 72227 96595 08 E0
+;Hex value: 0.A650A5D5050DE43A2C25A8C00 HFFFE
+ dq 0A650A5D5050DE43AH
+ dw bTAG_VALID,0FFFEH-1
+
+;+.65293 76545 29069 63960 675 E1
+;Hex value: 0.D0F0A714A9604993AC4AC49A0 H3
+ dq 0D0F0A714A9604994H
+ dw bTAG_VALID,03H-1
+
+;+.39072 57269 45281 71734 92684 E2
+;Hex value: 0.9C4A507F16530AC3CDDEFA3DE H6
+ dq 09C4A507F16530AC4H
+ dw bTAG_VALID,06H-1
+
+;+.72468 55912 17450 17145 90416 9 E2
+;Hex value: 0.90EFE6FB30465042CF089D1310 H7
+ dq 090EFE6FB30465043H
+ dw bTAG_VALID,07H-1
+
+;+.41066 29181 34876 24224 77349 62 E2
+;Hex value: 0.A443E2004BB000B84A5154D44 H6
+ dq 0A443E2004BB000B8H
+ dw bTAG_VALID,06H-1
+
+ dd 4 ;Q() is degree four
+
+; Hart constant
+;
+;+.15023 99905 56978 85827 4928 E2
+;Hex value: 0.F0624CD575B782643AFB912D0 H4
+ dq 0F0624CD575B78264H
+ dw bTAG_VALID,04H-1
+
+;+.59578 42201 83554 49303 22456 E2
+;Hex value: 0.EE504DDC907DEAEB7D7473B82 H6
+ dq 0EE504DDC907DEAEBH
+ dw bTAG_VALID,06H-1
+
+;+.86157 32305 95742 25062 42472 E2
+;Hex value: 0.AC508CA5E78E504AB2032E864 H7
+ dq 0AC508CA5E78E504BH
+ dw bTAG_VALID,07H-1
+
+;+.41066 29181 34876 24224 84140 84 E2
+;Hex value: 0.A443E2004BB000B84F542813C H6
+ dq 0A443E2004BB000B8H
+ dw bTAG_VALID,06H-1
+
+
+;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3)
+;= 0.26794 91924 31122 70647 25536 58494 12763 ;From Hart appendix
+;Hex value: 0.8930A2F4F66AB189B517A51F2 HFFFF
+Tan15Hi equ 08930A2F4H
+Tan15Lo equ 0F66AB18AH
+Tan15exp equ 0FFFFH-1
+
+;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236 ;From Hart appendix
+;Hex value: 0.DDB3D742C265539D92BA16B8 H1
+Sqrt3Hi equ 0DDB3D742H
+Sqrt3Lo equ 0C265539EH
+Sqrt3exp equ 01H-1
+
+;pi = +3.14159265358979323846264338328
+;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
+PiHi equ 0C90FDAA2H
+PiLo equ 02168C235H
+PiExp equ 02H-1
+
+;3*pi = +9.42477796076937971538793014984
+;Hex value: 0.96CBE3F9990E91A79394C9E890 H4
+XThreePiHi equ 096CBE3F9H
+XThreePiMid equ 0990E91A7H
+XThreePiLo equ 090000000H
+ThreePiExp equ 04H-1
+
+
+;This is a table of multiples of pi/6. It is used to adjust the
+;final result angle after atan(). Derived from Hart appendix
+;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612
+;
+;When the reduced argument for atan() is very small, these correction
+;constants simply become the result. These constants have all been
+;rounded to nearest, but the user may have selected a different rounding
+;mode. The tag byte is not needed for these constants, so its space
+;is used to indicate if it was rounded. To determine if a constant
+;was rounded, 7FH is subtracted from this flag; CY set means it was
+;rounded up.
+
+RoundedUp equ 040H
+RoundedDown equ 0C0H
+
+tAtanPiFrac label dword
+;pi/2 = +1.57079632679489661923132169163
+;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
+ dq 0C90FDAA22168C235H
+ dw RoundedUp,01H-1
+
+;2*pi/3 = +2.09439510239319549230842892218
+;Hex value: 0.860A91C16B9B2C232DD997078 H2
+ dq 0860A91C16B9B2C23H
+ dw RoundedDown,02H-1
+
+;none
+ dd 0,0,0
+
+;pi/6 = +0.523598775598298873077107230544E0
+;Hex value: 0.860A91C16B9B2C232DD99707A H0
+ dq 0860A91C16B9B2C23H
+ dw RoundedDown,00H-1
+
+;pi/2 = +1.57079632679489661923132169163
+;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
+ dq 0C90FDAA22168C235H
+ dw RoundedUp,01H-1
+
+;pi/3 = +1.04719755119659774615421446109
+;Hex value: 0.860A91C16B9B2C232DD997078 H1
+ dq 0860A91C16B9B2C23H
+ dw RoundedDown,01H-1
+
+;pi = +3.14159265358979323846264338328
+;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
+ dq 0C90FDAA22168C235H
+ dw RoundedUp,02H-1
+
+;5*pi/6 = +2.61799387799149436538553615272
+;Hex value: 0.A78D3631C681F72BF94FFCC96 H2
+ dq 0A78D3631C681F72CH
+ dw RoundedUp,02H-1
+
+;*********************
+
+tExpPoly label word
+
+;These constants are from Hart #1324: 2^x - 1 =
+; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) )
+;accurate to 21.54 digits over interval [0, 0.5].
+
+ dd 2 ;P() is degree two
+
+; Hart constant
+;
+;+.60613 30790 74800 42574 84896 07 E2
+;Hex value: 0.F27406FCF405189818F68BB78 H6
+ dq 0F27406FCF4051898H
+ dw bTAG_VALID,06H-1
+
+;+.30285 61978 21164 59206 24269 927 E5
+;Hex value: 0.EC9B3D5414E1AD0852E432A18 HF
+ dq 0EC9B3D5414E1AD08H
+ dw bTAG_VALID,0FH-1
+
+;+.20802 83036 50596 27128 55955 242 E7
+;Hex value: 0.FDF0D84AC3A35FAF89A690CC4 H15
+ dq 0FDF0D84AC3A35FB0H
+ dw bTAG_VALID,015H-1
+
+ dd 3 ;Q() is degree three. First
+ ;coefficient is 1.0 and is not listed.
+; Hart constant
+;
+;+.17492 20769 51057 14558 99141 717 E4
+;Hex value: 0.DAA7108B387B776F212ECFBEC HB
+ dq 0DAA7108B387B776FH
+ dw bTAG_VALID,0BH-1
+
+;+.32770 95471 93281 18053 40200 719 E6
+;Hex value: 0.A003B1829B7BE85CC81BD5309 H13
+ dq 0A003B1829B7BE85DH
+ dw bTAG_VALID,013H-1
+
+;+.60024 28040 82517 36653 36946 908 E7
+;Hex value: 0.B72DF814E709837E066855BDD H17
+ dq 0B72DF814E709837EH
+ dw bTAG_VALID,017H-1
+
+
+;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808 ;From Hart appendix
+;Hex value: 0.B504F333F9DE6484597D89B30 H1
+Sqrt2Hi equ 0B504F333H
+Sqrt2Lo equ 0F9DE6484H
+Sqrt2Exp equ 01H-1
+
+;sqrt(2) - 1 = +0.4142135623730950488016887242E0
+;Hex value: 0.D413CCCFE779921165F626CC4 HFFFF
+Sqrt2m1Hi equ 0D413CCCFH
+Sqrt2m1Lo equ 0E7799211H
+XSqrt2m1Lo equ 060000000H
+Sqrt2m1Exp equ 0FFFFH-1
+
+;2 - sqrt(2) = +0.5857864376269049511983112758E0
+;Hex value: 0.95F619980C4336F74D04EC9A0 H0
+TwoMinusSqrt2Hi equ 095F61998H
+TwoMinusSqrt2Lo equ 00C4336F7H
+TwoMinusSqrt2Exp equ 00H-1
+
+;*********************
+
+tLogPoly label dword
+
+;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2),
+; z = (x+1) / (x-1) accurate to 19.74 digits over interval
+;[1/sqrt(2), sqrt(2)]. The original Hart coefficients were for log10();
+;the P() coefficients have been scaled by log2(10) to compute log2().
+;
+;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017 ;From Hart appendix
+
+ dd 3 ;P() is degree three
+
+; Original Hart constant Scaled value
+;
+;+.18287 59212 09199 9337 E0 +0.607500660543248917834110566373E0
+;Hex value: 0.9B8529CD54E72022A12BAEC53 H0
+ dq 09B8529CD54E72023H
+ dw bTAG_VALID,00H-1
+
+;-.41855 96001 31266 20633 E1 -13.9042489506087332809657007634
+;Hex value: 0.DE77CDBF64E8C53F0DCD458D0 H4
+ dq 0DE77CDBF64E8C53FH
+ dw bSign shl 8 + bTAG_VALID,04H-1
+
+;+.13444 58152 27503 62236 E2 +44.6619330844279438866067340334
+;Hex value: 0.B2A5D1C95708A0C9FE50F6F97 H6
+ dq 0B2A5D1C95708A0CAH
+ dw bTAG_VALID,06H-1
+
+;-.10429 11213 72526 69497 44122 E2 -34.6447606134704282123622236943
+;Hex value: 0.8A943C20526AE439A98B30F6A H6
+ dq 08A943C20526AE43AH
+ dw bSign shl 8 + bTAG_VALID,06H-1
+
+
+ dd 3 ;Q() is degree three. First
+ ;coefficient is 1.0 and is not listed.
+; Hart constant
+;
+;-.89111 09060 90270 85654 E1
+;Hex value: 0.8E93E7183AA998D74F45CDFF0 H4
+ dq 08E93E7183AA998D7H
+ dw bSign shl 8 + bTAG_VALID,04H-1
+
+;+.19480 96618 79809 36524 155 E2
+;Hex value: 0.9BD904CCFEE118D4BEF319716 H5
+ dq 09BD904CCFEE118D5H
+ dw bTAG_VALID,05H-1
+
+;-.12006 95907 02006 34243 4218 E2
+;Hex value: 0.C01C811D2EC1B5806304B1858 H4
+ dq 0C01C811D2EC1B580H
+ dw bSign shl 8 + bTAG_VALID,04H-1
+
+;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213 ;From Hart appendix
+;Hex value: 0.B8AA3B295C17F0BBBE87FED04 H1
+Log2OfEHi equ 0B8AA3B29H
+Log2OfELo equ 05C17F0BCH
+Log2OfEexp equ 01H-1
+
+
+;********************* Generic polynomial evaluation *********************
+;
+;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly
+;
+;Inputs:
+; ebx:esi,ecx = floating point number, internal format
+; edi = pointer to polynomial degree and coefficients
+;Outputs:
+; result in ebx:esi,ecx
+; edi incremented to start of last coefficient in list
+;
+;EvalPoly is the basic polynomial evaluator, using Horner's rule. The
+;polynomial pointer in edi points to a list: the first dword in the list
+;is the degree of the polynomial (n); it is followed by the n+1
+;coefficients in internal (12-byte) format. The argment for EvalPoly
+;must be stored in the static FloatTemp in addition to being in
+;registers.
+;
+;EvalPolyAdd is an alternate entry point into the middle of EvalPoly.
+;It is used when the first coefficient is 1.0, so it skips the first
+;multiplication. It requires that the degree of the polynomial be
+;already loaded into ebp.
+;
+;EvalPolySetup store a copy of the argument in the static ArgTemp,
+;and stores the square of the argument in the static FloatTemp.
+;Then it falls into EvalPoly to evaluate the polynomial on the square.
+;
+;Eval2Poly evaluate two polynomials on its argument. The first
+;polynomial is x * P(x^2), and its result is left at [[CURstk]].
+;The second polynomial is Q(x^2), and its result is left in registers.
+;The most significant coefficient of Q() is 1.
+;
+;Polynomial evaluation uses a slight variation on the standard add
+;and multiply routines. PolyAddDouble and PolyMulDouble both check
+;to see if the argument in registers (the current accumulation) is
+;zero. The argument pointed to by edi is a coefficient and is never
+;zero.
+;
+;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped",
+;i.e., redirected to special handlers for polynomial evaluation.
+;[RoundMode] ordinarily points to the routine that handles the
+;the current rounding mode and precision control; however, during
+;polynomial evaluation, we always want full precision and round
+;nearest. The normal rounding routines also store their result
+;at [[Result]], but we want the result left in registers.
+;[ZeroVector] exists solely so polynomial evaluation can trap
+;when AddDouble results of zero. The normal response is to store
+;a zero at [[Result]], but we need the zero left in registers.
+;PolyRound and PolyZero handle these traps.
+
+
+EvalPolySetup:
+;Save x in ArgTemp
+ mov EMSEG:[ArgTemp].ExpSgn,ecx
+ mov EMSEG:[ArgTemp].lManHi,ebx
+ mov EMSEG:[ArgTemp].lManLo,esi
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ push edi ;Save pointer to polynomials
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+ mov edx,ebx
+ mov edi,esi
+ mov eax,ecx
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+ call MulDoubleReg ;Compute x^2
+;Save x^2 in FloatTemp
+ mov EMSEG:[FloatTemp].ExpSgn,ecx
+ mov EMSEG:[FloatTemp].lManHi,ebx
+ mov EMSEG:[FloatTemp].lManLo,esi
+ pop edi
+EvalPoly:
+;ebx:esi,ecx = arg to evaluate, also in FloatTemp
+;edi = pointer to degree and list of coefficients.
+ push edi
+ mov eax,cs:[edi+4].ExpSgn
+ mov edx,cs:[edi+4].lManHi
+ mov edi,cs:[edi+4].lManLo
+ call MulDoubleReg ;Multiply arg by first coef.
+ pop edi
+ mov ebp,cs:[edi] ;Get polynomial degree
+ add edi,4+Reg87Len ;Point to second coefficient
+ jmp EvalPolyAdd
+
+PolyLoop:
+ push ebp ;Save loop count
+ifdef NT386
+ mov edi,YFloatTemp
+else
+ mov edi,offset edata:FloatTemp
+endif
+ call PolyMulDouble
+ pop ebp
+ pop edi
+ add di,Reg87Len
+EvalPolyAdd:
+ push edi
+ mov eax,cs:[edi].ExpSgn
+ mov edx,cs:[edi].lManHi
+ mov edi,cs:[edi].lManLo
+ cmp cl,bTAG_ZERO ;Adding to zero?
+ jz AddToZero
+ call AddDoubleReg ;ebp preserved
+ContPolyLoop:
+ dec ebp
+ jnz PolyLoop
+ pop edi
+ ret
+
+AddToZero:
+;Number in registers is zero, so just return value from memory.
+ mov ecx,eax
+ mov ebx,edx
+ mov esi,edi
+ jmp ContPolyLoop
+
+
+Eval2Poly:
+ call EvalPolySetup
+ push edi
+ifdef NT386
+ mov edi,YArgTemp
+else
+ mov edi,offset edata:ArgTemp
+endif
+ call PolyMulDouble ;Multiply first result by argument
+ pop edi
+;Save result of first polynomial at [[CURstk]]
+ mov edx,EMSEG:[CURstk]
+ mov EMSEG:[edx].ExpSgn,ecx
+ mov EMSEG:[edx].lManHi,ebx
+ mov EMSEG:[edx].lManLo,esi
+;Load x^2 back into registers
+ mov ecx,EMSEG:[FloatTemp].ExpSgn
+ mov ebx,EMSEG:[FloatTemp].lManHi
+ mov esi,EMSEG:[FloatTemp].lManLo
+;Start second polynomial evaluation
+ add edi,4+Reg87Len ;Point to coefficient
+ mov ebp,cs:[edi-4] ;Get polynomial degree
+ jmp EvalPolyAdd
+
+
+PolyRound:
+;This routine handles all rounding during polynomial evaluation.
+;It performs 64-but round nearest, with result left in registers.
+;
+;Inputs:
+; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+;Outputs:
+; same, plus tag in cl.
+;
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down. This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.
+;
+;This routine needs to be reversible in case we're at the last step
+;in the polynomial and final rounding uses a different rounding mode.
+;We do this by copying the LSB of esi into al. While the rounding is
+;reversible, you can't tell if the answer was exact.
+
+ mov edx,esi
+ and dl,1 ;Look at LSB
+ or al,dl ;Set LSB as sticky bit
+ add eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
+ adc esi,0
+ adc ebx,0
+ jc PolyBumpExponent ;Overflowed, increment exponent
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ ret
+
+PolyBumpExponent:
+ add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
+ or ebx,1 shl 31 ;Set MSB
+ mov cl,bTAG_SNGL
+PolyZero:
+;Enter here when result is zero
+ ret
+
+;*******************************************************************************
+
+;FPATAN instruction
+
+;Actual instruction entry point is in emarith.asm
+
+tFpatanDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
+ dd AtanDouble ;single single
+ dd AtanDouble ;single double
+ dd AtanZeroDest ;single zero
+ dd AtanSpclDest ;single special
+ dd AtanDouble ;double single
+ dd AtanDouble ;double double
+ dd AtanZeroDest ;double zero
+ dd AtanSpclDest ;double special
+ dd AtanZeroSource ;zero single
+ dd AtanZeroSource ;zero double
+ dd AtanZeroDest ;zero zero
+ dd AtanSpclDest ;zero special
+ dd AtanSpclSource ;special single
+ dd AtanSpclSource ;special double
+ dd AtanSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd AtanTwoInf ;Two infinites
+
+;Compute atan( st(1)/st(0) ). Neither st(0) or st(1) are zero or
+;infinity at this point.
+;
+;Argument reduction starts by dividing the smaller by the larger,
+;ensuring that the result x is <= 1. The absolute value of the quotient
+;is used and the quadrant is fixed up later. If x = st(0)/st(1), then
+;the final atan result is subtracted from pi/2 (and normalized for the
+;correct range of -pi to +pi).
+;
+;The range of x is further reduced using the formulas:
+; t = (x - k) / (1 + kx)
+; atan(x) = atan(k) + atan(t)
+;
+;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we
+;are assured that t <= tan(pi/12) = 2 - sqrt(3), and
+;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12).
+;Thus we can always reduce the argument to abs(t) <= tan(pi/12).
+;
+;Since k = 1/sqrt(3), it is convenient to multiply the numerator
+;and denominator of t by 1/k, which gives
+;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ).
+;This is the form found in Cody and Waite and in previous versions
+;of the emulator. It requires one each add, subtract, multiply, and
+;divide.
+;
+;Hart has derived a simpler version of this formula:
+;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ).
+;Note that this computation requires one each add, subtract, and
+;divide, but no multiply.
+
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+
+AtanDouble:
+ mov EMSEG:[Result],edi
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ mov ah,EMSEG:[edi].bSgn ;Sign of result
+ mov al,ch ;Affects quadrant of result
+ and al,bSign ;Zero other bits, used as flags
+ push eax ;Save flag
+;First figure out which is larger
+ push offset AtanQuo ;Return address for DivDouble
+ shld edx,ecx,16 ;Get exponent to ax
+ cmp dx,EMSEG:[edi].wExp ;Compare exponents
+ jl DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
+ jg DivDouble ; ...is bigger, make it divisor
+;Exponents are equal, compare mantissas
+ cmp ebx,EMSEG:[edi].lManHi
+ jb DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
+ ja DivDouble ; ...is bigger, make it divisor
+ cmp esi,EMSEG:[edi].lManLo
+ jbe DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
+ jmp DivDouble
+
+TinyAtan:
+;Come here if the angle was reduced to zero, or the divide resulted in
+;unmasked underflow so that the quotient exponent was biased.
+;Note that an angle of zero means reduction was performed, and the
+;result will be corrected to a non-zero value.
+ mov dl,[esp] ;Get flag byte
+ or dl,dl ;No correction needed?
+ jz AtanSetSign ;Just return result of divide
+ and EMSEG:[CURerr],not Underflow
+;Angle in registers is too small to affect correction amount. Just
+;load up correction angle instead of adding it in.
+ add dl,40H ;Change flags for correction lookup
+ shr dl,5-2 ;Now in bits 2,3,4
+ and edx,7 shl 2
+ mov ebx,[edx+2*edx+tAtanPiFrac].lManHi
+ mov esi,[edx+2*edx+tAtanPiFrac].lManLo
+ mov ecx,[edx+2*edx+tAtanPiFrac].ExpSgn
+ shrd eax,ecx,8 ;Copy rounding flag to high eax
+ jmp AtanSetSign
+
+AtanQuo:
+;Return here after divide. Underflow flag is set only for "big underflow",
+;meaning the (15-bit) exponent couldn't even be kept in 16 bits. This can
+;only happen dividing a denormal by one of the largest numbers.
+;
+;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
+ test EMSEG:[CURerr],Underflow;Did we underflow?
+ jnz TinyAtan
+;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3)
+ xor cx,cx ;Use absolute value
+ cmp ecx,Tan15exp shl 16
+ jg AtnNeedReduce
+ jl AtnReduced
+ cmp ebx,Tan15Hi
+ ja AtnNeedReduce
+ jb AtnReduced
+ cmp esi,Tan15Lo
+ jbe AtnReduced
+AtnNeedReduce:
+ or byte ptr [esp],20H ;Note reduction in flags on stack
+;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ).
+ mov eax,Sqrt3exp shl 16
+ mov edx,Sqrt3Hi
+ mov edi,Sqrt3Lo
+ call AddDoubleReg ;x + sqrt(3)
+ mov edi,esi
+ mov esi,ebx ;Mantissa in esi:edi
+ mov ebx,ecx ;ExpSgn to ebx
+ mov ecx,(2+TexpBias) shl 16
+ mov edx,1 shl 31
+ xor eax,eax ;edx:edi,eax = 4.0
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+ call DivDoubleReg ;4 / ( x + sqrt(3) )
+ not ch ;Flip sign
+ mov eax,Sqrt3exp shl 16
+ mov edx,Sqrt3Hi
+ mov edi,Sqrt3Lo
+ call AddDoubleReg ;sqrt(3) - 4 / ( x + sqrt(3) )
+;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6).
+ cmp cl,bTAG_ZERO
+ jz TinyAtan
+AtnReduced:
+;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x
+;[or 1 - x^2/3 = 1], which happens when x < 2^-32. This prevents underflow
+;in computing x^2.
+TinyAtanArg equ -32
+ cmp ecx,TinyAtanArg shl 16
+ jl AtanCorrection
+ mov edi,offset tAtanPoly
+ call Eval2Poly
+ mov edi,EMSEG:[CURstk] ;Point to first result
+ call DivDouble ;x * P(x^2) / Q(x^2)
+AtanCorrection:
+;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
+;
+;Correct sign and add fraction of pi to account for various angle reductions:
+;
+; flag bit indicates correction
+;----------------------------------------------------
+; 5 arg > tan(pi/12) add pi/6
+; 6 st(1) > st(0) sub from pi/2
+; 7 st(0) < 0 sub from pi
+;
+;This results in the following correction for the result R:
+;
+;bit 7 6 5 correction
+;---------------------------
+; 0 0 0 none
+; 0 0 1 pi/6 + R
+; 0 1 0 pi/2 - R
+; 0 1 1 pi/3 - R
+; 1 0 0 pi - R
+; 1 0 1 5*pi/6 - R
+; 1 1 0 pi/2 + R
+; 1 1 1 2*pi/3 + R
+
+ mov dl,[esp] ;Get flag byte
+ or dl,dl ;No correction needed?
+ jz AtanSetSign
+ add dl,40H ;Set bit 7 for all -R cases
+
+;This changes the meaning of the flag bits to the following:
+;
+;bit 7 6 5 correction
+;---------------------------
+; 0 0 0 pi/2 + R
+; 0 0 1 2*pi/3 + R
+; 0 1 0 none
+; 0 1 1 pi/6 + R
+; 1 0 0 pi/2 - R
+; 1 0 1 pi/3 - R
+; 1 1 0 pi - R
+; 1 1 1 5*pi/6 - R
+
+ xor ch,dl ;Flip sign bit in cases 4 - 7
+ shr dl,5-2 ;Now in bits 2,3,4
+ and edx,7 shl 2
+ mov eax,[edx+2*edx+tAtanPiFrac].ExpSgn
+ mov edi,[edx+2*edx+tAtanPiFrac].lManLo
+ mov edx,[edx+2*edx+tAtanPiFrac].lManHi
+ call AddDoubleReg ;Add in correction angle
+AtanSetSign:
+ pop edx ;Get flags again
+ mov ch,dh ;Set sign to original ST(1)
+;Rounded mantissa in ebx:esi:eax, exp/sign in ecx
+ jmp TransUnround
+
+
+;***
+AtanSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+; cmp cl,bTAG_INF ;Is argument infinity?
+ cmp al,bTAG_INF ;Is argument infinity?
+ jnz SpclDest ;In emarith.asm
+AtanZeroSource:
+;Dividend is infinity or divisor is zero. Return pi/2 with
+;same sign as dividend.
+ mov ecx,(PiExp-1) shl 16 + bTAG_VALID ;Exponent for pi/2
+PiMant:
+;For storing multiples of pi. Exponent/tag is in ecx.
+ mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
+ mov ebx,XPiHi
+ mov esi,XPiMid
+ mov eax,XPiLo
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow. Unmasked underflow requires [RoundMode] be set.
+ jmp EMSEG:[TransRound]
+
+;***
+AtanSpclSource:
+ cmp cl,bTAG_INF ;Scaling by infinity?
+ jnz SpclSource ;in emarith.asm
+AtanZeroDest:
+;Divisor is infinity or dividend is zero. Return zero for +divisor,
+;pi for -divisor. Result sign is same is dividend.
+ or ch,ch ;Check divisor's sign
+ mov ecx,PiExp shl 16 + bTAG_VALID ;Exponent for pi
+ js PiMant ;Store pi
+;Result is zero
+ mov EMSEG:[edi].lManHi,0
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].wExp,0
+ mov EMSEG:[edi].bTAG,bTAG_ZERO
+ ret
+
+;***
+AtanTwoInf:
+;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor.
+;Result sign is same is dividend infinity.
+ or ch,ch ;Check divisor's sign
+ mov ecx,(PiExp-2) shl 16 + bTAG_VALID ;Exponent for pi/4
+ jns PiMant ;Store pi/4
+ mov ecx,(ThreePiExp-2) shl 16 + bTAG_VALID ;Exponent for 3*pi/4
+ mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
+ mov ebx,XThreePiHi
+ mov esi,XThreePiMid
+ mov eax,XThreePiLo
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow. Unmasked underflow requires [RoundMode] be set.
+ jmp EMSEG:[TransRound]
+
+;*******************************************************************************
+
+ExpSpcl:
+;Tagged special
+ cmp cl,bTAG_DEN
+ jz ExpDenorm
+ cmp cl,bTAG_INF
+ mov al, cl
+ jnz SpclDestNotDen ;Check for Empty or NAN
+;Have infinity, check its sign.
+;Return -1 for -infinity, no change if +infinity
+ or ch,ch ;Check sign
+ jns ExpRet ;Just return the +inifinity
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].lManHi,1 shl 31
+ mov EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL ;-1.0 (exponent is zero)
+ ret
+
+ExpDenorm:
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jnz ExpCont ;Yes, continue
+ExpRet:
+ ret
+
+EM_ENTRY eF2XM1
+eF2XM1:
+;edi = [CURstk]
+ mov ecx,EMSEG:[edi].ExpSgn
+ cmp cl,bTAG_ZERO
+ jz ExpRet ;Return same zero
+ ja ExpSpcl
+ExpCont:
+
+;The input range specified for the function is (-1, +1). The polynomial
+;used for this function is valid only over the range [0, +0.5], so range
+;reduction is needed. Range reduction is based on the identity:
+;
+; 2^(a+b) = 2^a * 2^b
+;
+;1.0 or 0.5 can be added/subtracted from the argument to bring it into
+;range. We calculate 2^x - 1 with a polynomial, and then adjust the
+;result according to the amount added or subtracted, as shown in the table:
+;
+;Arg range Adj Polynomial result Required result, 2^x - 1
+;
+; (-1, -0.5] +1 P = 2^(x+1) - 1 (P - 1)/2
+;
+; (-0.5, 0) +0.5 P = 2^(x+0.5) - 1 P * sqrt(2)/2 + (sqrt(2)/2 - 1)
+;
+; (0, 0.5) 0 P = 2^x - 1 P
+;
+; [0.5, 1) -0.5 P = 2^(x-0.5) - 1 P * sqrt(2) + (sqrt(2)-1)
+;
+;Since the valid input range does not include +1.0 or -1.0, and zero is
+;handled separately, the precision exception will always be set.
+
+ mov EMSEG:[Result],edi
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ push offset TransUnround ;Always exit through here
+ mov ebx,EMSEG:[edi].lManHi
+ mov esi,EMSEG:[edi].lManLo
+;Check for small argument, so that x^2 does not underflow. Note that
+;e^x = 1+x for small x, where small x means x + x^2/2 = x [or 1 + x/2 = 1],
+;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x.
+TinyExpArg equ -64
+ cmp ecx,TinyExpArg shl 16
+ jl TinyExp
+ cmp ecx,-1 shl 16 + bSign shl 8 ;See if positive, < 0.5
+ jl ExpReduced
+;Argument was not in range (0, 0.5), so we need some kind of reduction
+ or ecx,ecx ;Exp >= 0 means arg >= 1.0 --> too big
+;CONSIDER: this returns through TransUnround which restores the rounding
+;vectors, but it also randomly rounds the result becase eax is not set.
+ jge ExpRet ;Give up if arg out of range
+;We're going to need to add/subtract 1.0 or 0.5, so load up the constant
+ mov edx,1 shl 31
+ xor edi,edi
+ mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
+ mov ebp,offset ExpReducedMinusHalf
+ or ch,ch ;If it's positive, must be [0.5, 1)
+ jns ExpReduction
+ xor ah,ah ;edx:edi,eax = +0.5
+ mov ebp,offset ExpReducedPlusHalf
+ cmp ecx,eax ;See if abs(arg) >= 0.5
+ jl ExpReduction ;No, adjust by .5
+ xor eax,eax ;edx:edi,eax = 1.0
+ mov ebp,offset ExpReducedPlusOne
+ExpReduction:
+ call AddDoubleReg ;Argument now in range [0, 0.5]
+ cmp cl,bTAG_ZERO ;Did reduction result in zero?
+ jz ExpHalf ;If so, must have been exactly 0.5
+ push ebp ;Address of reduction cleanup
+ExpReduced:
+ mov edi,offset tExpPoly
+ call Eval2Poly
+;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
+;Q(x^2) is in registers, P(x^2) is at [[CURstk]]
+ mov edi,EMSEG:[CURstk]
+ mov dx,bSign shl 8 ;Subtract memory operand
+;Note that Q() and P() have no roots over the input range
+;(they will never be zero).
+ call AddDouble ;Q(x^2) - x*P(x^2)
+ sub ecx,1 shl 16 ;Divide by two
+ mov edi,EMSEG:[CURstk]
+ jmp DivDouble ;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
+;Returns to correct argument reduction correction routine or TransUnround
+
+TinyExp:
+;Exponent is very small (and was not reduced)
+ mov edx,cFLDLN2hi
+ mov edi,cFLDLN2lo
+ mov eax,cFLDLN2exp shl 16
+;This could underflow (but not big time)
+ jmp MulDoubleReg ;Returns to TransUnround
+
+ExpHalf:
+;Argument of exactly 0.5 was reduced to zero. Just return result.
+ mov ebx,Sqrt2m1Hi
+ mov esi,Sqrt2m1Lo
+ mov eax,XSqrt2m1Lo + 1 shl 31 - 1
+ mov ecx,Sqrt2m1Exp shl 16
+ ret ;Exit through TransUnround
+
+ExpReducedPlusOne:
+;Correct result is (P - 1)/2
+ sub ecx,1 shl 16 ;Divide by two
+ mov edx,1 shl 31
+ xor edi,edi
+ mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
+ jmp AddDoubleReg
+
+ExpReducedPlusHalf:
+;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2)
+ mov edx,Sqrt2Hi
+ mov edi,Sqrt2Lo
+ mov eax,Sqrt2exp-1 shl 16 ;sqrt(2)/2
+ call MulDoubleReg
+ mov edx,TwoMinusSqrt2Hi
+ mov edi,TwoMinusSqrt2Lo
+ mov eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8 ;(2-sqrt(2))/2
+ jmp AddDoubleReg
+
+ExpReducedMinusHalf:
+;Correct result is P * sqrt(2) + (sqrt(2)-1)
+ mov edx,Sqrt2Hi
+ mov edi,Sqrt2Lo
+ mov eax,Sqrt2exp shl 16
+ call MulDoubleReg
+ mov edx,Sqrt2m1Hi
+ mov edi,Sqrt2m1Lo
+ mov eax,Sqrt2m1Exp shl 16
+ jmp AddDoubleReg
+
+;*******************************************************************************
+
+;Dispatch table for log(x+1)
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFyl2xp1Disp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
+ dd LogP1Double ;single single
+ dd LogP1Double ;single double
+ dd LogP1ZeroDest ;single zero
+ dd LogP1SpclDest ;single special
+ dd LogP1Double ;double single
+ dd LogP1Double ;double double
+ dd LogP1ZeroDest ;double zero
+ dd LogP1SpclDest ;double special
+ dd XorSourceSign ;zero single
+ dd XorSourceSign ;zero double
+ dd XorDestSign ;zero zero
+ dd LogP1SpclDest ;zero special
+ dd LogSpclSource ;special single
+ dd LogSpclSource ;special double
+ dd LogSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd LogTwoInf ;Two infinites
+
+
+LogP1Double:
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+;
+;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)]
+;The approximation used (valid over exactly this range) is
+; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is
+; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2)
+;
+;We're not too picky about this range check because the function is simply
+;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and
+;signal Invalid if less, -infinity if equal.
+ or ecx,ecx ;abs(x) >= 1.0?
+ jge LogP1OutOfRange ;Valid range is approx [-0.3, +0.4]
+ mov EMSEG:[Result],edi
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ mov eax,1 shl 16 ;Exponent of 1 for adding 2.0
+ push offset TotalLog ;Return address for BasicLog
+; jmp BasicLog ;Fall into BasicLog
+;.erre BasicLog eq $
+
+;BasicLog is used by eFYL2X and eFYL2XP1.
+;eax has exponent and sign to add 1.0 or 2.0 to argument
+;ebx:esi,ecx has argument, non-zero, tag not set
+;ST has argument to take log2 of, minus 1. (This is the actual argument
+;of eFYL2XP1, or argument minus 1 of eFYL2X.)
+
+BasicLog:
+ mov edx,1 shl 31
+ xor edi,edi ;edx:edi,eax = +1.0 or +2.0
+ call AddDoubleReg
+ mov edi,EMSEG:[CURstk] ;Point to x-1
+ call DivDouble ;Compute (x-1) / (x+1)
+;Result in registers is z = (x-1)/(x+1). For tiny z, ln(x) = 2*z, so
+; log2(x) = 2 * log2(e) * z. Tiny z is such that z + z^3/3 = z.
+ cmp ecx,-32 shl 16 ;Smallest exponent to bother with
+ jl LogSkipPoly
+ mov edi,offset tLogPoly
+ call Eval2Poly
+ mov edi,EMSEG:[CURstk] ;Point to first result, r * P(r^2)
+ jmp DivDouble ;Compute r * P(r^2) / Q(r^2)
+
+LogSkipPoly:
+;Multiply r by 2 * log2(e)
+ mov edx,Log2OfEHi
+ mov edi,Log2OfELo
+ mov eax,(Log2OfEexp+1) shl 16
+ jmp MulDoubleReg
+
+LogP1OutOfRange:
+;Input range isn't valid, so we can return anything we want--EXCEPT, for
+;numbers < -1 we must signal Invalid Operation, and Divide By Zero for
+;-1. Otherwise, we return an effective log of one by just leaving the
+;second operand as the return value.
+;
+;Exponent in ecx >= 0 ( abs(x) >= 1 )
+ or ch,ch ;Is it positive?
+ jns LogP1Ret ;If so, skip it
+ and ecx,0FFFFH shl 16 ;Look at exponent only: 0 for -1.0
+ sub ebx,1 shl 31 ;Kill MSB
+ or ebx,esi
+ or ebx,ecx
+ jnz ReturnIndefinite ;Must be < -1.0
+ jmp DivideByMinusZero
+
+LogP1Ret:
+ ret
+
+;***
+LogP1ZeroDest:
+ or ch,ch ;Is it negative?
+ jns LogP1Ret ;If not, just leave it zero
+ or ecx,ecx ;abs(x) >= 1.0?
+ jl XorDestSign ;Flip sign of zero
+;Argument is <= -1
+ jmp ReturnIndefinite ;Have 0 * log( <=0 )
+
+;***
+LogP1SpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF ;Is argument infinity?
+ jnz SpclDest ;In emarith.asm
+;Multiplying log(x+1) * infinity.
+;If x > 0, return original infinity.
+;If -1 <= x < 0, return infinity with sign flipped.
+;If x < -1 or x == 0, invalid operation.
+ cmp cl,bTAG_ZERO
+ jz ReturnIndefinite
+ or ch,ch ;Is it positive?
+ jns LogP1Ret
+ test ecx,0FFFFH shl 16 ;Is exponent zero?
+ jl XorDestSign
+ jg ReturnIndefinite
+ sub ebx,1 shl 31 ;Kill MSB
+ or ebx,esi
+ jnz ReturnIndefinite ;Must be < -1.0
+ jmp XorDestSign
+
+;***
+LogSpclSource:
+ cmp cl,bTAG_INF ;Is argument infinity?
+ jnz SpclSource ;in emarith.asm
+ or ch,ch ;Is it negative infinity?
+ js ReturnIndefinite
+ jmp MulByInf
+
+;***
+LogTwoInf:
+ or ch,ch ;Is it negative infinity?
+ js ReturnIndefinite
+ jmp XorDestSign
+
+;*******************************************************************************
+
+;Dispatch table for log(x)
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFyl2xDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
+ dd LogDouble ;single single
+ dd LogDouble ;single double
+ dd LogZeroDest ;single zero
+ dd LogSpclDest ;single special
+ dd LogDouble ;double single
+ dd LogDouble ;double double
+ dd LogZeroDest ;double zero
+ dd LogSpclDest ;double special
+ dd DivideByMinusZero ;zero single
+ dd DivideByMinusZero ;zero double
+ dd ReturnIndefinite ;zero zero
+ dd LogSpclDest ;zero special
+ dd LogSpclSource ;special single
+ dd LogSpclSource ;special double
+ dd LogSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd LogTwoInf ;Two infinites
+
+
+LogDouble:
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+;
+;Must reduce the argument to the range [1/sqrt(2), sqrt(2)]
+ or ch,ch ;Is it positive?
+ js ReturnIndefinite ;Can't take log of negative number
+ mov EMSEG:[Result],edi
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ shld eax,ecx,16 ;Save exponent in ax as int part of log2
+ xor ecx,ecx ;Zero exponent: 1 <= x < 2
+ cmp ebx,Sqrt2Hi ;x > sqrt(2)?
+ jb LogReduced
+ ja LogReduceOne
+ cmp esi,Sqrt2Lo
+ jb LogReduced
+LogReduceOne:
+ sub ecx,1 shl 16 ;1/sqrt(2) < x < 1
+ inc eax
+LogReduced:
+ push eax ;Save integer part of log2
+ mov ebp,ecx ;Save reduced exponent (tag is wrong!)
+ mov edx,1 shl 31
+ mov eax,bSign shl 8 ;Exponent of 0, negaitve
+ xor edi,edi ;edx:edi,eax = -1.0
+ call AddDoubleReg
+ cmp cl,bTAG_ZERO ;Was it exact power of two?
+ jz LogDone ;Skip log if power of two
+;Save (x - 1), reload x with reduced exponent
+ mov edi,EMSEG:[CURstk] ;Point to original x again
+ xchg EMSEG:[edi].lManHi,ebx
+ xchg EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].ExpSgn,ecx
+ mov ecx,ebp ;Get reduced exponent
+ xor eax,eax ;Exponent of 0, positive
+ call BasicLog
+LogDone:
+ pop eax ;Get integer part back
+ cwde
+ or eax,eax ;Is it zero?
+ jz TotalLog
+;Next 3 instructions take abs() of integer
+ cdq ;Extend sign through edx
+ xor eax,edx ;Complement...
+ sub eax,edx ; and increment if negative
+ bsr dx,ax ;Look for MSB to normalize integer
+;Bit number in dx ranges from 0 to 15
+ mov cl,dl
+ not cl ;Convert to shift count
+ shl eax,cl ;Normalize
+.erre TexpBias eq 0
+ rol edx,16 ;Move exponent high, sign low
+ or ebx,ebx ;Was log zero?
+ jz ExactPower
+ xchg edx,eax ;Exp/sign to eax, mantissa to edx
+ xor edi,edi ;Extend with zero
+ call AddDoubleReg
+TotalLog:
+;Registers could be zero if input was exactly 1.0
+ cmp cl,bTAG_ZERO
+ jz ZeroLog
+TotalLogNotZero:
+ mov edi,EMSEG:[Result] ;Point to second arg
+ push offset TransUnround
+ jmp MulDouble
+
+ExactPower:
+;Arg was a power of two, so log is exact (but not zero).
+ mov ebx,eax ;Mantissa to ebx
+ mov ecx,edx ;Exponent to ecx
+ xor esi,esi ;Extend with zero
+;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx
+;
+;The result log is exact, so we don't want TransUnround, which is designed
+;to ensure the result is never exact. Instead we set the [RoundMode]
+;vector to [TransRound] before the final multiply.
+ mov eax,EMSEG:[TransRound]
+ mov EMSEG:[RoundMode],eax
+ mov edi,EMSEG:[Result] ;Point to second arg
+ push offset RestoreRound ;Return addr. for MulDouble in emtrig.asm
+ jmp MulDouble
+
+ZeroLog:
+ mov eax,EMSEG:[SavedRoundMode]
+ mov EMSEG:[RoundMode],eax
+ mov EMSEG:[ZeroVector],offset SaveResult
+ jmp SaveResult
+
+;***
+LogZeroDest:
+ or ch,ch ;Is it negative?
+ js ReturnIndefinite ;Can't take log of negative numbers
+;See if log is + or - so we can get correct sign of zero
+ or ecx,ecx ;Is exponent >= 0?
+ jge LogRet ;If so, keep present zero sign
+FlipDestSign:
+ not EMSEG:[edi].bSgn
+ ret
+
+;***
+LogSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF ;Is argument infinity?
+ jnz SpclDest ;In emarith.asm
+;Multiplying log(x) * infinity.
+;If x > 1, return original infinity.
+;If 0 <= x < 1, return infinity with sign flipped.
+;If x < 0 or x == 1, invalid operation.
+ cmp cl,bTAG_ZERO
+ jz FlipDestSign
+ or ch,ch ;Is it positive?
+ js ReturnIndefinite
+ test ecx,0FFFFH shl 16 ;Is exponent zero?
+ jg LogRet ;x > 1, just return infinity
+ jl FlipDestSign
+ sub ebx,1 shl 31 ;Kill MSB
+ or ebx,esi
+ jz ReturnIndefinite ;x == 1.0
+LogRet:
+ ret
diff --git a/private/ntos/dll/i386/emload.asm b/private/ntos/dll/i386/emload.asm
new file mode 100644
index 000000000..c2e68c561
--- /dev/null
+++ b/private/ntos/dll/i386/emload.asm
@@ -0,0 +1,416 @@
+ subttl emload.asm - FLD and FILD instructions
+ page
+;*******************************************************************************
+;emload.asm - FLD and FILD instructions
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FLD and FILD instructions
+;Inputs:
+; edi = [CURstk]
+; dseg:esi = pointer to memory operand
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+ PrevStackWrap edi,LdStk ;Tied to PrevStackElem below
+
+;*******
+EM_ENTRY eFLDreg
+eFLDreg:
+;*******
+; edi = [CURstk]
+; esi = pointer to st(i) from instruction field
+
+ PrevStackElem edi,LdStk ;Point to receiving location
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty?
+ jnz FldErr
+ mov ecx,EMSEG:[esi].ExpSgn
+ cmp cl,bTAG_EMPTY
+ jz FldErr
+ mov ebx,EMSEG:[esi].lManHi
+ mov esi,EMSEG:[esi].lManLo
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].lManHi,ebx
+ mov EMSEG:[edi].ExpSgn,ecx
+ ret
+
+
+;This is common code that stores a value into the stack after being loaded
+;into registers by the appropriate routine.
+
+ PrevStackWrap edi,Load ;Tied to PrevStackElem below
+
+FldCont:
+;mantissa in ebx:esi, exp/sign in ecx
+;edi = [CURstk]
+ PrevStackElem edi,Load ;Point to receiving location
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty?
+ jnz FldErr
+ cmp cl,bTAG_NAN ;Returning a NAN?
+ jz FldNAN
+SaveStack:
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].lManHi,ebx
+ mov EMSEG:[edi].ExpSgn,ecx
+ ret
+
+FldErr:
+ or EMSEG:[SWcc],C1 ;Signal overflow
+ mov EMSEG:[CURerr],StackFlag;Kills possible denormal exception
+Unsupported:
+ call ReturnIndefinite ;in emarith.asm
+ jz FldExit ;Unmasked, do nothing
+ mov EMSEG:[CURstk],edi ;Update top of stack
+FldExit:
+ ret
+
+FldNAN:
+;Is it a signaling NAN?
+ test ebx,1 shl 30 ;Check for SNAN
+ jnz SaveStack ;If QNAN, just use it as result
+ or EMSEG:[CURerr],Invalid ;Flag the error
+ or ebx,1 shl 30 ;Make it into a QNAN
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jnz SaveStack ;If so, update with masked response
+ ret
+
+
+;****************
+;Load Single Real
+;****************
+
+EM_ENTRY eFLD32
+eFLD32:
+ push offset FldCont ;Return address
+ ;Fall into Load32Real
+Load32Real:
+;dseg:esi points to IEEE 32-bit real number
+;On exit:
+; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ecx,dseg:[esi] ;Get number
+ mov ebx,ecx ;Save copy of mantissa
+ shl ebx,8 ;Normalize
+ shr ecx,7 ;Bring exponent down
+ and ecx,0FFH shl 16 ;Look at just exponent
+ mov ch,dseg:[esi+3] ;Get sign again
+ jz short ZeroOrDenorm32 ;Exponent is zero
+ xor esi,esi ;Zero out the low bits
+ or ebx,1 shl 31 ;Set implied bit
+ cmp ecx,SexpMax shl 16
+ jge NANorInf ;Max exp., must be NAN or Infinity
+ add ecx,(TexpBias-SexpBias) shl 16 ;Change to extended format bias
+ mov cl,bTAG_SNGL
+ ret
+
+ZeroOrDenorm32:
+;Exponent is zero. Number is either zero or denormalized
+ xor esi,esi ;Zero out the low bits
+ and ebx,not (1 shl 31) ;Keep just mantissa
+ jnz Norm32
+ mov cl,bTAG_ZERO
+ ret
+
+Norm32:
+ add ecx,(TexpBias-SexpBias+1-31) shl 16 ;Fix up bias
+ jmp FixDenorm
+
+
+NANorInf:
+;Shared by single and double real
+ and ecx,bSign shl 8 ;Save only sign in ch
+ or ecx,TexpMax shl 16 + bTAG_NAN ;Max exp.
+ cmp ebx,1 shl 31 ;Only 1 bit set means infinity
+ jnz @F
+ or esi,esi
+ jnz @F
+ mov cl,bTAG_INF
+@@:
+ ret
+
+;****************
+;Load Double Real
+;****************
+
+EM_ENTRY eFLD64
+eFLD64:
+ push offset FldCont ;Return address
+ ;Fall into Load64Real
+Load64Real:
+;dseg:esi points to IEEE 64-bit real number
+;On exit:
+; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ecx,dseg:[esi+4] ;Get sign, exp., and high mantissa
+ mov ebx,ecx ;Save copy of mantissa
+ shr ecx,4 ;Bring exponent down
+ and ecx,7FFH shl 16 ;Look at just exponent
+ mov ch,dseg:[esi+7] ;Get sign again
+ mov esi,dseg:[esi] ;Get low 32 bits of op
+ jz short ZeroOrDenorm64 ;Exponent is zero
+ shld ebx,esi,31-20
+ shl esi,31-20 ;Normalize
+ or ebx,1 shl 31 ;Set implied bit
+ cmp ecx,DexpMax shl 16
+ jge NANorInf ;Max exp., must be NAN or Infinity
+ add ecx,(TexpBias-DexpBias) shl 16 ;Change to extended format bias
+SetNormTag:
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ ret
+
+ZeroOrDenorm64:
+;Exponent is zero. Number is either zero or denormalized
+ and ebx,0FFFFFH ;Keep just mantissa
+ jnz ShortNorm64 ;Are top 20 bits zero?
+ or esi,esi ;Are low 32 bits zero too?
+ jnz LongNorm64
+ mov cl,bTAG_ZERO
+ ret
+
+LongNorm64:
+ xchg ebx,esi ;Shift up 32 bits
+ sub ecx,32 shl 16 ;Correct exponent
+ShortNorm64:
+ add ecx,(TexpBias-DexpBias+12-31) shl 16 ;Fix up bias
+FixDenorm:
+ or EMSEG:[CURerr],Denormal ;Set Denormal Exception
+ bsr edx,ebx ;Scan for MSB
+;Bit number in edx ranges from 0 to 31
+ mov cl,dl
+ not cl ;Convert bit number to shift count
+ shld ebx,esi,cl
+ shl esi,cl
+ shl edx,16 ;Move exp. adjustment to high end
+ add ecx,edx ;Adjust exponent
+ jmp SetNormTag
+
+
+;******************
+;Load Short Integer
+;******************
+
+EM_ENTRY eFILD16
+eFILD16:
+ push offset FldCont ;Return address
+ ;Fall into Load16Int
+Load16Int:
+;dseg:esi points to 16-bit integer
+;On exit:
+; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ax,dseg:[esi]
+NormInt16:
+ xor esi,esi ;Extend with zero
+ cwd ;extend sign through dx
+ xor ax,dx
+ sub ax,dx ;Take ABS() of integer
+ bsr cx,ax ;Find MSB
+ jz ZeroInt
+;Bit number in cx ranges from 0 to 15
+ not ecx ;Convert to shift count
+ shl eax,cl ;Normalize
+ not ecx
+.erre TexpBias eq 0
+ shl ecx,16 ;Move exponent to high half
+ mov ch,dh ;Set sign
+ mov ebx,eax ;Mantissa to ebx
+ mov cl,bTAG_SNGL
+ ret
+
+ZeroInt:
+ xor ebx,ebx
+ mov ecx,ebx
+ mov cl,bTAG_ZERO
+ ret
+
+
+;******************
+;Load Long Integer
+;******************
+
+EM_ENTRY eFILD32
+eFILD32:
+ push offset FldCont ;Return address
+ ;Fall into Load32Int
+Load32Int:
+;dseg:esi points to 32-bit integer
+;On exit:
+; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov eax,dseg:[esi]
+ xor esi,esi ;Extend with zero
+ or eax,eax ;It it zero?
+ jz ZeroInt
+ cdq ;extend sign through edx
+ xor eax,edx
+ sub eax,edx ;Take ABS() of integer
+ mov ebx,eax ;Mantissa to ebx
+;BSR uses 3 clocks/bit, so speed it up by checking the top half
+;This saves 36 clocks on 386 (42 on 486sx)
+;Cost is 13 clocks on 386 if high word isn't zero (5 on 486sx)
+.erre TexpBias eq 0
+ xor eax,eax ;Initialize exponent
+ cmp ebx,0FFFFH ;Upper bits zero?
+ ja @F
+ shl ebx,16
+ sub eax,16
+@@:
+ bsr ecx,ebx ;Find MSB
+ add eax,ecx ;Compute expoment
+ not cl ;Convert bit number to shift count
+ shl ebx,cl ;Normalize
+ shrd ecx,eax,16 ;Move exponent to high half of ecx
+ mov ch,dh ;Set sign
+ mov cl,bTAG_SNGL
+ ret
+
+
+;*****************
+;Load Quad Integer
+;*****************
+
+EM_ENTRY eFILD64
+eFILD64:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ebx,dseg:[esi+4] ;Get high 32 bits
+ mov eax,ebx ;Make copy of sign
+ mov esi,dseg:[esi] ;Get low 32 bits
+ mov ecx,ebx
+ or ecx,esi ;Is it zero?
+ jz ZeroQuad
+NormQuadInt:
+;Entry point from eFBLD
+;eax bit 31 = sign
+;ebx:esi = integer
+;edi = [CURstk]
+.erre TexpBias eq 0
+ mov ax,32 ;Initialize exponent
+ or ebx,ebx ;Check sign
+ jz LongNormInt
+ jns FindBit
+ not ebx
+ neg esi ;CY set if non-zero
+ sbb ebx,-1 ;Add one if esi == 0
+ jnz FindBit ;Check for high bits zero
+LongNormInt:
+ xchg ebx,esi ;Normalize 32 bits
+ xor ax,ax ;Reduce exponent by 32
+FindBit:
+;BSR uses 3 clocks/bit, so speed it up by checking the top half
+;This saves 35 clocks on 386 (41 on 486sx)
+;Cost is 11 clocks on 386 if high word isn't zero (4 on 486sx)
+ cmp ebx,0FFFFH ;Upper bits zero?
+ ja @F
+ shld ebx,esi,16
+ shl esi,16
+ sub eax,16
+@@:
+ bsr ecx,ebx ;Find MSB
+ add eax,ecx ;Compute expoment
+ not cl ;Convert bit number to shift count
+ shld ebx,esi,cl ;Normalize
+ shl esi,cl
+ mov ecx,eax ;Move sign and exponent to ecx
+ rol ecx,16 ;Swap sign and exponent halves
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ jmp FldCont
+
+ZeroQuad:
+ mov cl,bTAG_ZERO
+ jmp FldCont
+
+
+;****************
+;Load Temp Real
+;****************
+
+ PrevStackWrap edi,Ld80 ;Tied to PrevStackElem below
+
+EM_ENTRY eFLD80
+eFLD80:
+;This is not considered an "arithmetic" operation (like all the others are),
+;so SNANs do NOT cause an exception. However, unsupported formats do.
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ PrevStackElem edi,Ld80 ;Point to receiving location
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty?
+ jnz FldErr
+LoadTempReal:
+ mov ebx,dseg:[esi+4] ;Get high half of mantissa
+ mov cx,dseg:[esi+8] ;Get exponent and sign
+ mov esi,dseg:[esi] ;Get low half of mantissa
+ mov eax,ecx
+ and ch,7FH ;Mask off sign bit
+ shl ecx,16 ;Move exponent to high end
+ mov ch,ah ;Restore sign
+ jz ZeroOrDenorm80
+;Check for unsupported format: unnormals (MSB not set)
+ or ebx,ebx
+ jns Unsupported
+ sub ecx,(IexpBias-TexpBias) shl 16 ;Correct the bias
+ cmp ecx,TexpMax shl 16
+ jge NANorInf80
+SetupTag:
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ jmp SaveStack
+
+NANorInf80:
+ mov cl,bTAG_NAN
+ cmp ebx,1 shl 31 ;Only 1 bit set means infinity
+ jnz SaveStack
+ or esi,esi
+ jnz SaveStack
+ mov cl,bTAG_INF
+ jmp SaveStack
+
+ZeroOrDenorm80:
+;Exponent is zero. Number is either zero or denormalized
+ or ebx,ebx
+ jnz ShortNorm80 ;Are top 32 bits zero?
+ or esi,esi ;Are low 32 bits zero too?
+ jnz LongNorm80
+ mov cl,bTAG_ZERO
+ jmp SaveStack
+
+;This code accepts and works correctly with pseudo-denormals (MSB already set)
+LongNorm80:
+ xchg ebx,esi ;Shift up 32 bits
+ sub ecx,32 shl 16 ;Correct exponent
+ShortNorm80:
+ add ecx,(TexpBias-IexpBias+1-31) shl 16 ;Fix up bias
+ bsr edx,ebx ;Scan for MSB
+;Bit number in edx ranges from 0 to 31
+ mov cl,dl
+ not cl ;Convert bit number to shift count
+ shld ebx,esi,cl
+ shl esi,cl
+ shl edx,16 ;Move exp. adjustment to high end
+ add ecx,edx ;Adjust exponent
+ jmp SetUpTag
diff --git a/private/ntos/dll/i386/emlsbcd.asm b/private/ntos/dll/i386/emlsbcd.asm
new file mode 100644
index 000000000..f07d35b1e
--- /dev/null
+++ b/private/ntos/dll/i386/emlsbcd.asm
@@ -0,0 +1,279 @@
+ subttl emlsbcd.asm - FBSTP and FBLD instructions
+ page
+;*******************************************************************************
+;emlsbcd.asm - FBSTP and FBLD instructions
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FBSTP and FBLD instructions.
+;
+; These routines convert between 64-bit integer and 18-digit packed BCD
+; format. They work by splitting the number being converted in half
+; and converting the two halves separately. This works well because
+; 9 decimal digits fit nicely within 30 binary bits, so converion of
+; each half is strictly a 32-bit operation.
+;
+;Inputs:
+; edi = [CURstk]
+; dseg:esi = pointer to memory operand
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+eFBLD:
+;******
+ mov eax,dseg:[esi+5] ;Get high 8 digits
+ or eax,eax ;Anything there?
+ jz HighDigitsZero
+ mov ecx,8
+ call ReadDigits ;Convert first 8 digits to binary
+ mov eax,dseg:[esi+1] ;Get next 8 digits
+ xor edi,edi
+ shld edi,eax,4 ;Shift ninth digit into edi
+ imul ebx,10
+ add edi,ebx ;Accumulate ninth digit
+SecondNineDigits:
+ xor ebx,ebx ;In case eax==0
+ shl eax,4 ;Keep digits left justified
+ jz LastTwoDigits
+ mov ecx,7
+ call ReadDigits ;Convert next 7 digits to binary
+LastTwoDigits:
+ mov al,dseg:[esi] ;Get last two digits
+ shl eax,24 ;Left justify
+ mov ecx,2
+ call InDigitLoop ;Accumulate last two digits
+;edi = binary value of high 9 digits
+;ebx = binary value of low 9 digits
+ mov eax,1000000000 ;One billion: shift nine digits left
+ mul edi ;Left shift 9 digits. 9 cl. if edi==0
+ add ebx,eax ;Add in low digits
+ adc edx,0
+BcdReadyToNorm:
+;edx:ebx = integer converted to binary
+ mov eax,dseg:[esi+6] ;Get sign to high bit of eax
+ mov esi,ebx
+ mov ebx,edx
+ mov edi,EMSEG:[CURstk]
+;mantissa in ebx:esi, sign in high bit of eax
+;edi = [CURstk]
+ jmp NormQuadInt ;in emload.asm
+
+HighDigitsZero:
+ mov eax,dseg:[esi+1] ;Get next 8 digits
+ or eax,eax ;Anything there?
+ jz CheckLastTwo
+ xor edi,edi
+ shld edi,eax,4 ;Shift ninth digit into edi
+ jmp SecondNineDigits
+
+CheckLastTwo:
+ mov bl,dseg:[esi] ;Get last two digits
+ or bl,bl
+ jz ZeroBCD
+ mov al,bl
+ shr al,4 ;Bring down upper digit
+ imul eax,10
+ and ebx,0FH ;Keep lowest digit only
+ add ebx,eax
+ xor edx,edx
+ jmp BcdReadyToNorm
+
+ZeroBCD:
+ mov ecx,bTAG_ZERO ;Exponent is zero
+ mov ch,dseg:[esi+9] ;Get sign byte to ch
+ xor ebx,ebx
+ mov esi,ebx
+;mantissa in ebx:esi, exp/sign in ecx
+;edi = [CURstk]
+ jmp FldCont ;in emload.asm
+
+
+;*** ReadDigits
+;
+;Inputs:
+; eax = packed BCD digits, left justified, non-zero
+; ecx = no. of digits, 7 or 8
+;Outputs:
+; ebx = number
+
+SkipZeroDigits:
+ sub ecx,3
+ shl eax,12
+ReadDigits:
+;We start by scanning off leading zeros. This costs 16 cl./nybble in
+;the ScanZero loop. To reduce this cost for many leading zeros, we
+;check for three leading zeros at a time. Adding this test saves
+;26 cl. for 3 leading zeros, 57 cl. for 6 leading zeros, at a cost
+;of only 5 cl. if less than 3 zeros. We choose 3 at a time so we
+;can repeat it once (there are never more than 7 zeros).
+ test eax,0FFF00000H ;Check first 3 nybbles for zero
+ jz SkipZeroDigits
+ xor ebx,ebx
+ScanZero:
+;Note that bsr is 3 cl/bit, or 12 cl/nybble. Add in the overhead and
+;this loop of 16 cl/nybble is cheaper for the 1 - 3 digits it does.
+ dec ecx
+ shld ebx,eax,4 ;Shift digit into ebx
+ rol eax,4 ;Left justify **Doesn't affect ZF!**
+ jz ScanZero ;Skip to next digit if zero
+ jecxz ReadDigitsX
+InDigitLoop:
+;eax = digits to convert, left justified
+;ebx = result accumulation
+;ecx = number of digits to convert
+ xor edx,edx
+ shld edx,eax,4 ;Shift digit into edx
+ shl eax,4 ;Keep digits left justified
+ imul ebx,10 ;Only 10 clocks on 386!
+ add ebx,edx ;Accumulate number
+ dec ecx
+ jnz InDigitLoop
+ReadDigitsX:
+ ret
+
+;*******************************************************************************
+
+ChkInvalidBCD:
+ ja SetInvalidBCD
+ cmp edi,0A7640000H ;(1000000000*1000000000) and 0ffffffffh
+ jb ValidBCD
+SetInvalidBCD:
+ mov EMSEG:[CURerr],Invalid
+InvalidBCD:
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz ReadDigitsX ;No--leave memory unchanged
+;Store Indefinite
+ mov dword ptr dseg:[esi],0
+ mov dword ptr dseg:[esi+4],0
+ mov word ptr dseg:[esi+8],-1 ;0FF00000000H for packed BCD indefinite
+ jmp PopStack ;in emstore.asm
+
+;******
+eFBSTP:
+;******
+ call RoundToInteger ;Get integer in ebx:edi, sign in ch
+ jc InvalidBCD
+ cmp ebx,0DE0B6B3H ;(1000000000*1000000000) shr 32
+ jae ChkInvalidBCD
+ValidBCD:
+ and ch,bSign
+ mov dseg:[esi+9],ch ;Fill in sign byte
+ mov edx,ebx
+ mov eax,edi ;Get number to edx:eax for division
+ mov ebx,1000000000
+ div ebx ;Break into two 9-digit halves
+ xor ecx,ecx ;Initial digits
+ mov edi,eax ;Save quotient
+ mov eax,edx
+ or eax,eax
+ jz SaveLowBCD
+ call WriteDigits
+ shrd ecx,eax,4 ;Pack 8th digit
+ xor al,al
+ shl eax,20 ;Move digit in ah to high end
+SaveLowBCD:
+ mov dseg:[esi],ecx ;Save low 8 digits
+ mov ecx,eax ;Get ready for next 8 digits
+ mov eax,edi
+ or eax,eax
+ jz ZeroHighBCD
+ call WriteDigits
+ shl ah,4 ;Move digit to upper nybble
+ or al,ah ;Combine last two digits
+SaveHighBCD:
+ mov dseg:[esi+4],ecx ;Save lower 8 digits
+ mov dseg:[esi+8],al
+ jmp PopStack
+
+ZeroHighBCD:
+ shr ecx,28 ;Position 9th digit
+ jmp SaveHighBCD
+
+
+;*** WriteDigits
+;
+;Inputs:
+; eax = binary number < 1,000,000,000 and > 0
+; ecx = Zero or had one BCD digit left justified
+;Purpose:
+; Convert binary integer to BCD.
+;
+; The time required for the DIV instruction is dependent on operand
+; size, at 6 + (no. of bits) clocks for 386. (In contrast, multiply
+; by 10 as used in FBLD/ReadDigits above takes the same amount of
+; time regardless of operand size--only 10 clocks.)
+;
+; The easy way to do this conversion would be to repeatedly do a
+; 32-bit division by 10 (at 38 clocks/divide). Instead, the number
+; is broken down so that mostly 8-bit division is used (only 14 clocks).
+; AAM (17 clocks) is also used to save us from having to load the
+; constant 10 and zero ah. AAM is faster than DIV on the 486sx.
+;
+;Outputs:
+; ecx has seven more digits packed into it (from left)
+; ah:al = most significant two digits (unpacked)
+;esi,edi preserved
+
+WriteDigits:
+;eax = binary number < 1,000,000,000
+ cdq ;Zero edx
+ mov ebx,10000
+ div ebx ;Break into 4-digit and 5-digit pieces
+ mov bl,100
+ or edx,edx
+ jz ZeroLowDigits
+ xchg edx,eax ;Get 4-digit remainder to eax
+;Compute low 4 digits
+; 0 < eax < 10000
+ div bl ;Get two 2-digit pieces. 14cl on 386
+ mov bh,al ;Save high 2 digits
+ mov al,ah ;Get low digits
+ aam
+ shl ah,4 ;Move digit to upper nybble
+ or al,ah
+ shrd ecx,eax,8
+ mov al,bh ;Get high 2 digits
+ aam
+ shl ah,4 ;Move digit to upper nybble
+ or al,ah
+ shrd ecx,eax,8
+;Compute high 5 digits
+ mov eax,edx ;5-digit quotient to eax
+ or eax,eax
+ jz ZeroHighDigits
+ConvHigh5:
+ cdq ;Zero edx
+ shld edx,eax,16 ;Put quotient in dx:ax
+ xor bh,bh ;bx = 100
+ div bx ;Get 2- and 3-digit pieces. 22cl on 386
+ xchg edx,eax ;Save high 3 digits, get log 2 digits
+ aam
+ shl ah,4 ;Move digit to upper nybble
+ or al,ah
+ shrd ecx,eax,8
+ mov eax,edx ;Get high 3 digits
+ mov bl,10
+ div bl
+ mov bl,ah ;Remainder is next digit
+ shrd ecx,ebx,4
+ aam ;Get last two digits
+;Last two digits in ah:al
+ ret
+
+ZeroLowDigits:
+ shr ecx,16
+ jmp ConvHigh5
+
+ZeroHighDigits:
+ shr ecx,12
+ ret
diff --git a/private/ntos/dll/i386/emlsenv.asm b/private/ntos/dll/i386/emlsenv.asm
new file mode 100644
index 000000000..a3b725d9d
--- /dev/null
+++ b/private/ntos/dll/i386/emlsenv.asm
@@ -0,0 +1,457 @@
+ subttl emlsenv.asm - Emulator Save/Restore
+ page
+;***
+;emlsenv.asm - Emulator Save/Restore
+;
+;
+; Copyright (c) Microsoft Corporation 1991
+;
+; All Rights Reserved
+;
+;Purpose:
+; FLDCW, FSTCW, FSTSW, FSTENV, FLDENV, FSAVE, FRSTOR instructions
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;
+;*******************************************************************************
+
+
+;When setting the control word, the [RoundMode] vector must be set
+;according to the rounding and precision modes.
+
+tRoundMode label dword
+ irp RC,<near,down,up,chop>
+ irp PC,<24,24,53,64>
+ dd Round&&PC&&RC
+ endm
+ endm
+
+
+EM_ENTRY eFLDCW
+eFLDCW:
+;Uses only eax and ebx
+ mov ax, dseg:[esi] ; Fetch control word from user memory
+SetControlWord:
+ and ax,0F3FH ; Limit to valid values
+ mov EMSEG:[ControlWord], ax ; Store in the emulated control word
+ not al ;Flip mask bits for fast compare
+ and al,3FH ;Limit to valid mask bits
+ mov EMSEG:[ErrMask],al
+ and eax,(RoundControl + PrecisionControl) shl 8
+.erre RoundControl eq 1100B
+.erre PrecisionControl eq 0011B
+ shr eax,6 ;Put PC and RC in bits 2-5
+ mov ebx,tRoundMode[eax] ;Get correct RoundMode vector
+ mov EMSEG:[RoundMode],ebx
+ mov EMSEG:[SavedRoundMode],ebx
+ and eax,RoundControl shl (8-6) ;Mask off precision control
+ mov ebx,tRoundMode[eax+PC64 shl (8-6)];Get correct RoundMode vector
+ mov EMSEG:[TransRound],ebx ;Round mode w/o precision
+ ret
+
+
+EM_ENTRY eFSTCW
+eFSTCW:
+;Uses only eax
+ mov ax, EMSEG:[ControlWord] ; Fetch user control word
+ mov dseg:[esi], ax ; Store into user memory
+ ret
+
+
+EM_ENTRY eFSTSW
+eFSTSW:
+;Uses only eax and ebx
+ call GetStatusWord ; Fetch emulated Status word
+ mov dseg:[esi], ax ; Store into user memory
+ ret
+
+
+eFSTSWax:
+;Uses only eax and ebx
+ call GetStatusWord ; Fetch emulated Status word
+ mov [esp+4].regAX,ax
+ ret
+
+
+EM_ENTRY eFDECSTP
+eFDECSTP:
+;edi = [CURstk]
+ cmp edi,BEGstk
+ jbe DecWrap
+ sub EMSEG:[CURstk],Reg87Len
+ ret
+
+DecWrap:
+ mov EMSEG:[CURstk],INITstk
+ ret
+
+
+EM_ENTRY eFINCSTP
+eFINCSTP:
+;edi = [CURstk]
+ cmp edi,INITstk
+ jae IncWrap
+ add EMSEG:[CURstk],Reg87Len
+ ret
+
+IncWrap:
+ mov EMSEG:[CURstk],BEGstk
+ ret
+
+
+eFCLEX:
+ mov EMSEG:[SWerr],0
+ and [esp+4].OldLongStatus,0FFFF00FFH ; clear saved SWerr
+ ret
+
+
+;*** eFSTENV - emulate FSTENV [address]
+;
+; ARGUMENTS
+; dseg:esi = where to store environment
+;
+;
+; DESCRIPTION
+; This routine emulates an 80387 FSTENV (store environment)
+;
+
+EM_ENTRY eFSTENV
+eFSTENV:
+ mov ax,[esp+4].OldStatus
+ mov EMSEG:[StatusWord],ax
+SaveEnv:
+ xor ax,ax
+ mov dseg:[esi.reserved1],ax
+ mov dseg:[esi.reserved2],ax
+ mov dseg:[esi.reserved3],ax
+ mov dseg:[esi.reserved4],ax
+ mov dseg:[esi.reserved5],ax
+ mov ax,EMSEG:[ControlWord]
+ mov dseg:[esi.E32_ControlWord],ax
+ call GetEMSEGStatusWord
+ mov dseg:[esi.E32_StatusWord],ax
+ call GetTagWord
+ mov dseg:[esi.E32_TagWord],ax
+ mov ax,cs
+ mov dseg:[esi.E32_CodeSeg],ax
+ mov ax,ss
+ mov dseg:[esi.E32_DataSeg],ax
+ mov eax,EMSEG:[PrevCodeOff]
+ mov dseg:[esi.E32_CodeOff],eax
+ mov eax,EMSEG:[PrevDataOff]
+ mov dseg:[esi.E32_DataOff],eax
+ mov EMSEG:[CWmask],03FH ;Set all mask bits
+ mov EMSEG:[ErrMask],0
+ ret
+
+
+;*** eFSAVE - emulate FSAVE [address]
+;
+; ARGUMENTS
+; dseg:esi = where to store environment
+;
+;
+; DESCRIPTION
+; This routine emulates an 80387 FSAVE (store environment)
+; Once the data is stored an finit is executed.
+;
+; REGISTERS
+; destroys ALL.
+
+EM_ENTRY eFSAVE
+eFSAVE:
+ mov ax,[esp+4].OldStatus
+ mov EMSEG:[StatusWord],ax
+ mov eax,[esp+4].OldCodeOff
+ mov EMSEG:[PrevCodeOff],eax
+ push offset eFINIT ; After fsave we must do a finit
+SaveState: ; Enter here for debugger save state
+ call SaveEnv
+ add esi,size Env80x87_32 ;Skip over environment
+ mov ebp,NumLev ;Save entire stack
+ mov edi,EMSEG:[CURstk]
+FsaveStoreLoop:
+ mov eax,EMSEG:[edi].ExpSgn
+ call StoreTempReal ;in emstore.asm
+ add esi,10
+
+ mov edi,EMSEG:[CURstk]
+ NextStackElem edi,FSave
+ mov EMSEG:[CURstk],edi
+
+ dec ebp
+ jnz FsaveStoreLoop
+ ret
+
+WrapFSave: ; tied to NextStackElem above
+ mov edi, BEGstk
+ mov EMSEG:[CURstk],edi
+ dec ebp
+ jnz FsaveStoreLoop
+ ret
+
+
+;*** eFRSTOR - emulate FRSTOR [address]
+;
+; ARGUMENTS
+; dseg:esi = where to get the environment
+;
+; DESCRIPTION
+; This routine emulates an 80387 FRSTOR (restore state)
+
+ NextStackWrap edi,Frstor
+
+EM_ENTRY eFRSTOR
+eFRSTOR:
+;First we set up the status word so that [CURstk] is initialized.
+;The floating-point registers are stored in logical ST(0) - ST(7) order,
+;not physical register order. We don't do a full load of the environment
+;because we're not ready to use the tag word yet.
+
+ and [esp+4].[OldLongStatus], NOT(LongSavedFlags) ;clear saved codes, errs
+ mov ax, dseg:[esi.E32_StatusWord]
+ call SetEmStatusWord ;Initialize [CURstk]
+ add esi,size Env80x87_32 ;Skip over environment
+
+;Load of temp real has one difference from real math chip: it is an invalid
+;operation to load an unsupported format. By ensuring the exception is
+;masked, we will convert unsupported format to Indefinite. Note that the
+;mask and [CURerr] will be completely restored by the FLDENV at the end.
+
+ mov EMSEG:[CWmask],3FH ;Mask off invalid operation exception
+ mov edi,EMSEG:[CURstk]
+ mov ebp,NumLev
+FrstorLoadLoop:
+ push esi
+ call LoadTempReal ;In emload.asm
+ pop esi
+ add esi,10 ;Point to next temp real
+ NextStackElem edi,Frstor
+ dec ebp
+ jnz FrstorLoadLoop
+ sub esi,NumLev*10+size Env80x87_32 ;Point to start of env.
+ jmp eFLDENV ;Fall into eFLDENV
+
+
+;*** eFLDENV - emulate FLDENV [address]
+;
+; ARGUMENTS
+; dseg:si = where to store environment
+;
+; This routine emulates an 80387 FLDENV (load environment)
+
+EM_ENTRY eFLDENV
+eFLDENV:
+ and [esp+4].[OldLongStatus], NOT(LongSavedFlags) ;clear saved codes, errs
+ mov ax, dseg:[esi.E32_StatusWord]
+ call SetEmStatusWord ; set up status word
+ mov ax, dseg:[esi.E32_ControlWord]
+ call SetControlWord
+ mov ax, dseg:[esi.E32_TagWord]
+ call UseTagWord
+ mov eax, dseg:[esi.E32_CodeOff]
+ mov EMSEG:[PrevCodeOff], eax
+ mov eax, dseg:[esi.E32_DataOff]
+ mov EMSEG:[PrevDataOff], eax
+ ret
+
+
+;*** GetTagWord - figures out what the tag word is from the numeric stack
+; and returns the value of the tag word in ax.
+;
+
+GetTagWord:
+ push esi
+ xor eax, eax
+ mov ecx, NumLev ; get tags for regs. 0, 7 - 1
+ mov esi,INITstk
+GetTagLoop:
+ mov bh, EMSEG:[esi.bTag] ; The top 2 bits of Tag are the X87 tag bits.
+ shld ax, bx, 2
+ sub esi, Reg87Len
+ loop GetTagLoop
+ rol ax, 2 ; This moves Tag(0) into the low 2 bits
+ pop esi
+ ret
+
+
+;*** UseTagWord - Set up tags using tag word from environment
+;
+; ARGUMENTS
+; ax - should contain the tag word
+;
+; Destroys ax,bx,cx,dx,di
+
+UseTagWord:
+ ror ax, 2 ; mov Tag(0) into top bits of ax
+ mov edi,INITstk
+ mov ecx, NumLev
+UseTagLoop:
+ mov dl,bTAG_EMPTY
+ cmp ah, 0c0h ;Is register to be tagged Empty?
+ jae SetTag ;Yes, go mark it
+ mov dl,EMSEG:[edi].bTag ;Get current tag
+ cmp dl,bTAG_EMPTY ;Is register currently Empty?
+ je SetTagNotEmpty ;If so, go figure out tag for it
+SetTag:
+ mov EMSEG:[edi].bTag,dl
+UseTagLoopCheck:
+ sub edi, Reg87Len
+ shl eax, 2
+ loop UseTagLoop
+ ret
+
+SetTagEmpty:
+ mov EMSEG:[edi.bTag], bTAG_EMPTY
+ jmp UseTagLoopCheck
+
+SetTagNotEmpty:
+;Register is currently tagged empty, but new tag word says it is not empty.
+;Figure out a new tag for it. The rules are:
+;
+;1. Everything is either normalized or zero--unnormalized formats cannot
+;get in. So if the high half mantissa is zero, the number is zero.
+;
+;2. Although the exponent bias is different, NANs and Infinities are in
+;standard IEEE format - exponent is TexpMax, mantissa indicates NAN vs.
+;infinity (mantissa for infinity is 800..000H).
+;
+;3. Denormals have an exponent less than TexpMin.
+;
+;4. If the low half of the mantissa is zero, it is tagged bTAG_SNGL
+;
+;5. Everything else is bTAG_VALID
+
+ mov ebx,EMSEG:[edi].lManHi
+ mov dl,bTAG_ZERO ;Try zero first
+ or ebx,ebx ;Is mantissa zero?
+ jz SetTag
+ mov edx,EMSEG:[edi].ExpSgn
+ mov dl,bTAG_DEN
+ cmp edx,TexpMin shl 16 ;Is it denormal?
+ jl SetTag
+ cmp EMSEG:[edi].lManLo,0 ;Is low half zero?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz dl ;if low half==0 then dl=0 else dl=1
+ cmp edx,TexpMax shl 16 ;Is it NAN or Infinity?
+ jl SetTag ;If not, it's valid
+.erre (bTAG_VALID - bTAG_SNGL) shl TAG_SHIFT eq (bTAG_NAN - bTAG_INF)
+ shl dl,TAG_SHIFT
+ add dl,bTAG_INF - bTAG_SNGL
+;If the low bits were zero we have just changed bTAG_SNGL to bTAG_INF
+;If the low bits weren't zero, we changed bTAG_VALID to bTAG_NAN
+;See if infinity is really possible: is high half 80..00H?
+ cmp ebx,1 shl 31 ;Is it infinity?
+ jz SetTag ;Store tag for infinity or NAN
+ mov dl,bTAG_NAN
+ jmp SetTag
+
+
+;*** GetStatusWord -
+;
+; User status word returned in ax.
+; Destroys ebx only.
+
+GetStatusWord:
+ mov eax, EMSEG:[CURstk]
+ sub eax, BEGstk
+ mov bl,Reg87Len
+ div bl
+ inc eax ; adjust for emulator's stack layout
+ and eax, 7 ; eax is now the stack number
+ shl ax, 11
+ or ax,[esp+8].OldStatus ; or in the rest of the status word.
+ ret
+
+
+;*** GetEMSEGStatusWord -
+;
+; User status word returned in ax.
+; Destroys ebx only.
+; Uses status word in per-thread data area, otherwise
+; identical to GetStatusWord
+
+EM_ENTRY eGetStatusWord
+GetEMSEGStatusWord:
+ mov eax, EMSEG:[CURstk]
+ sub eax, BEGstk
+ mov bl,Reg87Len
+ div bl
+ inc eax ; adjust for emulator's stack layout
+ and eax, 7 ; eax is now the stack number
+ shl ax, 11
+ or ax, EMSEG:[StatusWord] ; or in the rest of the status word.
+ ret
+
+
+;*** SetEmStatusWord -
+;
+; Given user status word in ax, set into emulator.
+; Destroys ebx only.
+
+
+SetEmStatusWord:
+ and ax,7F7FH
+ mov bx,ax
+ and bx,3FH ; set up CURerr in case user
+ mov EMSEG:[CURerr],bl ; wants to force an exception
+ mov ebx, eax
+ and ebx, not (7 shl 11) ; remove stack field.
+ mov EMSEG:[StatusWord], bx
+
+ sub ah, 8 ; adjust for emulator's stack layout
+ and ah, 7 shl 3
+ mov al, ah
+ shr ah, 1
+ add al, ah ; stack field * 3 * 4
+.erre Reg87Len eq 12
+ and eax, 255 ; eax is now 12*stack number
+ add eax, BEGstk
+ mov EMSEG:[CURstk], eax
+ ret
+
+
+public _SaveEm87Context
+_SaveEm87Context PROC
+
+ push ebp
+ mov ebp, esp
+ push ebx
+ push edi
+ push esi
+ mov esi, [ebp+8]
+ call SaveState
+ test EMSEG:[CURErr], Summary
+ jne RetSaveEmIdle
+ mov eax, Em87Busy
+ jmp RetSaveEm
+RetSaveEmIdle:
+ mov eax, Em87Idle
+RetSaveEm:
+ pop esi
+ pop edi
+ pop ebx
+ pop ebp
+ ret
+_SaveEm87Context ENDP
+
+
+public _RestoreEm87Context
+_RestoreEm87Context PROC
+ push ebp
+ mov ebp, esp
+ push ebx
+ push edi
+ push esi
+ mov esi, [ebp+8]
+ call eFRSTOR
+ pop esi
+ pop edi
+ pop ebx
+ pop ebp
+ ret
+_RestoreEm87Context ENDP
diff --git a/private/ntos/dll/i386/emround.asm b/private/ntos/dll/i386/emround.asm
new file mode 100644
index 000000000..34704c5bd
--- /dev/null
+++ b/private/ntos/dll/i386/emround.asm
@@ -0,0 +1,712 @@
+ subttl emround.asm - Rounding and Precision Control and FRNDINT
+ page
+;*******************************************************************************
+;emround.asm - Rounding and Precision Control
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; Rounding and precision control. The correct routine is jumped
+; to through the [RoundMode] vector.
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+; 02/28/92 JWM Minor bug fix in NotNearLow
+;
+;*******************************************************************************
+
+
+RndIntSpcl:
+ cmp cl,bTAG_INF
+ jz RndIntX ;Leave infinity unchanged
+ cmp cl,bTAG_DEN
+ jnz SpclDestNotDen ;Handle NAN & empty - in emarith.asm
+;Handle denormal
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is it masked?
+ jnz NormRndInt ;If so, ignore denormalization
+RndIntX:
+ ret
+
+;********
+EM_ENTRY eFRNDINT
+eFRNDINT:
+;********
+;edi points to top of stack
+ mov ecx,EMSEG:[edi].ExpSgn
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz RndIntX
+ ja RndIntSpcl
+ cmp ecx,63 shl 16 ;Is it already integer?
+ jge RndIntX
+NormRndInt:
+ mov ebx,EMSEG:[edi].lManHi
+ mov esi,EMSEG:[edi].lManLo
+ mov EMSEG:[Result],edi ;Save result pointer
+ xor eax,eax ;Extend mantissa
+ push offset SaveResult
+ jmp RoundToBit
+
+;*******************************************************************************
+
+ResultOverflow:
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl.
+;We were all ready to save the rounded result, but the exponent turned out
+;to be too large.
+ or EMSEG:[CURerr],Overflow
+ sub ecx,UnderBias shl 16 ;Unmasked response
+ test EMSEG:[CWmask],Overflow ;Is exception unmasked?
+ jz SaveResult ;If so, we're ready
+;Produce masked overflow response
+ mov ebx,1 shl 31 ;Assume infinity
+ xor esi,esi
+ mov cl,bTAG_INF
+ mov al,EMSEG:[CWcntl] ;Get rounding control
+ mov ah,al
+ and ah,RCchop ;Rounding control only
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre RCchop eq RCup + RCdown ;Always return max value
+.erre RCnear eq 0 ;Never return max value
+ sar ch,7 ;Expand sign through whole byte
+.erre (RCdown and bSign) eq 0 ;Don't want to change real sign
+ xor ch,RCdown ;Flip sign for RCdown bit
+ and ah,ch ;RCup & sign OR RCdown & not sign
+ jnz SaveMax
+ and ecx,0FFFFH
+ or ecx,TexpMax shl 16
+ jmp SaveResult ;Save Infinity
+SaveMax:
+;Get max value for current precision
+ mov ebx,0FFFFFF00H ;Max value for 24 bits
+ and ecx,bSign shl 8 ;Preserve only sign
+ or ecx,(IexpMax-IexpBias-1) shl 16 + bTAG_VALID ;Set up max value
+ and al,PrecisionControl
+.erre PC24 eq 0
+ jz SaveResult ;Save 24-bit max value
+ dec esi ;esi == -1
+ mov ebx,esi
+ cmp al,PC53
+ jnz SaveResult ;Save 64-bit max value
+ mov esi,0FFFFF800H
+ jmp SaveResult ;Save 53-bit max value
+
+;*******************************************************************************
+;
+;64-bit rounding routines
+;
+
+;***********
+Round64down:
+;***********
+ cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Test for Underflow
+ jl RndDenorm64
+ or eax,eax ;Exact result?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision ;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+ test ch,bSign
+ jz SaveValidResult ;Positive, so chop
+ jmp RoundUp64 ;Round up if negative
+
+RndDenorm64:
+ test EMSEG:[CWmask],Underflow ;Is exception unmasked?
+ jz RndSetUnder
+Denormalize:
+;We don't really store in denormalized format, but we need the number
+;to be rounded as if we do. If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to
+;keep look like integer bits, and performing round-to-integer.
+ add ecx,(IexpBias+62) shl 16 ;Adjust exponent so we're integer
+ call RoundToBit
+;Set underflow exception if precision exception is set
+ mov al,EMSEG:[CURerr]
+ and al,Precision
+ ror al,Precision-Underflow ;Move Precision bit to Underflow pos.
+ or EMSEG:[CURerr],al ;Signal Underflow if inexact
+ cmp cl,bTAG_ZERO
+ jz SaveResult
+ sub ecx,(IexpBias+62) shl 16;Restore unbiased exponent
+ cmp ecx,TexpMin shl 16 ;Did we round out of denorm?
+ jae SaveResult
+ mov cl,bTAG_DEN
+ jmp SaveResult
+
+RndSetUnder:
+;Underflow exception not masked. Adjust exponent and try again.
+ or EMSEG:[CURerr],Underflow
+ add ecx,UnderBias shl 16
+ jmp EMSEG:[RoundMode] ;Try again with revised exponent
+
+;***********
+Round64near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm64
+ or eax,eax ;Exact result?
+ jz short SaveValidResult
+ or EMSEG:[CURerr],Precision ;Set flag on inexact result
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down. This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.
+
+ bt esi,0 ;Is mantissa even or odd? (set CY)
+ adc eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
+ jnc SaveValidResult
+RoundUp64:
+ mov EMSEG:[SWcc],RoundUp
+ add esi,1
+ adc ebx,0
+ jc BumpExponent ;Overflowed, increment exponent
+
+SaveValidResult: ;A jump to here requires 9 clocks
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ cmp ecx,TexpMax shl 16 ;Test for overflow
+ jge ResultOverflow
+
+SaveResult: ;A jump to here requires 10 clocks
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+ mov edi,EMSEG:[Result]
+SaveResultEdi:
+ mov EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].lManHi,ebx
+SaveExpSgn:
+ mov EMSEG:[edi].ExpSgn,ecx
+ ret
+
+;***********
+Round64up:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm64
+ or eax,eax ;Exact result?
+ jz short SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+ cmp ch,bSign ;No CY iff sign bit is set
+ jc RoundUp64 ;Round up if positive
+ jmp short SaveValidResult
+
+;***********
+Round64chop:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm64
+ or eax,eax ;Exact result?
+ jz short SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ jmp short SaveValidResult
+
+;*******************************************************************************
+;
+;53-bit rounding routines
+;
+
+;***********
+Round53down:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm53
+ mov edx,esi ;Get low bits
+ and edx,(1 shl 11) - 1 ;Mask to last 11 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+ and esi,not ((1 shl 11)-1) ;Mask off low 11 bits
+ test ch,bSign
+ jz SaveValidResult ;Positive, go chop
+ jmp RoundUp53
+
+RndDenorm53:
+ test EMSEG:[CWmask],Underflow;Is exception unmasked?
+ jz RndSetUnder
+;We don't really store in denormalized format, but we need the number
+;to be rounded as if we do. If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to
+;keep look like integer bits, and performing round-to-integer.
+ add ecx,(IexpBias+51) shl 16 ;Adjust exponent so we're integer
+ call RoundToBit
+;Set underflow exception if precision exception is set
+ mov al,EMSEG:[CURerr]
+ and al,Precision
+ ror al,Precision-Underflow ;Move Precision bit to Underflow pos.
+ or EMSEG:[CURerr],al ;Signal Underflow if inexact
+ cmp cl,bTAG_ZERO
+ jz SaveResult
+ sub ecx,(IexpBias+51) shl 16;Restore unbiased exponent
+ cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Did we round out of denorm?
+ jae SaveResult
+ mov cl,bTAG_DEN
+ jmp SaveResult
+
+;***********
+Round53near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm53
+ mov edx,esi ;Get low bits
+ and edx,(1 shl 11) - 1 ;Mask to last 11 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+ mov edx,esi
+ and esi,not ((1 shl 11)-1) ;Mask off low 11 bits
+ test edx,1 shl 10 ;Is round bit set?
+ jz SaveValidResult
+ and edx,(3 shl 10)-1 ;Keep only sticky bits and LSB
+ or eax,edx ;Combine with other sticky bits
+ jz SaveValidResult
+RoundUp53:
+ mov EMSEG:[SWcc],RoundUp
+ add esi,1 shl 11 ;Round
+ adc ebx,0
+ jnc SaveValidResult
+BumpExponent:
+ add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
+ or ebx,1 shl 31 ;Set MSB
+ jmp SaveValidResult
+
+;***********
+Round53up:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm53
+ mov edx,esi ;Get low bits
+ and edx,(1 shl 11) - 1 ;Mask to last 11 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+ and esi,not ((1 shl 11)-1) ;Mask off low 11 bits
+ test ch,bSign
+ jz RoundUp53 ;Round up if positive
+ jmp SaveValidResult
+
+;***********
+Round53chop:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm53
+ mov edx,esi ;Get low bits
+ and edx,(1 shl 11) - 1 ;Mask to last 11 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ and esi,not ((1 shl 11)-1) ;Mask off low 11 bits
+ jmp SaveValidResult
+
+;*******************************************************************************
+;
+;24-bit rounding routines
+;
+
+;***********
+Round24down:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm24
+ or eax,esi ;Low dword is just sticky bits
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+ xor esi,esi
+ and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits
+ test ch,bSign
+ jz SaveValidResult ;Chop if positive
+ jmp RoundUp24
+
+RndDenorm24:
+ test EMSEG:[CWmask],Underflow;Is exception unmasked?
+ jz RndSetUnder
+;We don't really store in denormalized format, but we need the number
+;to be rounded as if we do. If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to
+;keep look like integer bits, and performing round-to-integer.
+ add ecx,(IexpBias+22) shl 16 ;Adjust exponent so we're integer
+ call RoundToBit
+;Set underflow exception if precision exception is set
+ mov al,EMSEG:[CURerr]
+ and al,Precision
+ ror al,Precision-Underflow ;Move Precision bit to Underflow pos.
+ or EMSEG:[CURerr],al ;Signal Underflow if inexact
+ cmp cl,bTAG_ZERO
+ jz SaveResult
+ sub ecx,(IexpBias+22) shl 16;Restore unbiased exponent
+ cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Did we round out of denorm?
+ jae SaveResult
+ mov cl,bTAG_DEN
+ jmp SaveResult
+
+;***********
+Round24near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm24
+ or eax,esi ;Low dword is just sticky bits
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ xor esi,esi
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+ mov edx,ebx
+ and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits
+ test dl,1 shl 7 ;Round bit set?
+ jz SaveValidResult
+ and edx,(3 shl 7)-1 ;Mask to LSB and sticky bits
+ or eax,edx ;Combine all sticky bits
+ jz SaveValidResult
+RoundUp24:
+ mov EMSEG:[SWcc],RoundUp
+ add ebx,1 shl 8
+ jnc SaveValidResult
+ jmp BumpExponent ;Overflowed, increment exponent
+
+;***********
+Round24up:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm24
+ or eax,esi ;Low dword is just sticky bits
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+ xor esi,esi
+ and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits
+ test ch,bSign
+ jz RoundUp24 ;Round up if positive
+ jmp SaveValidResult
+
+;***********
+Round24chop:
+;***********
+ cmp ecx,TexpMin shl 16 ;Test for Underflow
+ jl RndDenorm24
+ or eax,esi ;Low dword is just sticky bits
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,eax ;Throwing away any bits?
+ jz SaveValidResult
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ xor esi,esi
+ and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits
+ jmp SaveValidResult
+
+;*******************************************************************************
+
+;*** RoundToInteger
+;
+;This routine is used by FISTP Int64 and BSTP. Unlike RoundToBit, this
+;unnormalizes the number into a 64-bit integer.
+;
+;Inputs:
+; edi = pointer to number to round in stack
+;Outputs:
+; CY set if invalid operation
+; ebx:edi = rounded integer if CY clear
+; ch = sign if CY clear
+;Note:
+; FIST/FISTP/BSTP exception rules are used: If the number is too big,
+; Invalid Operation occurs. Denormals are ignored.
+;
+;esi preserved
+
+RoundSpcl64Int:
+ cmp cl,bTAG_DEN
+ jz NormRound64Int ;Ignore denormal
+ cmp cl,bTAG_EMPTY
+ jnz RoundInvalid ;All other specials are invalid
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ stc ;Flag exception to caller
+ ret
+
+RoundInvalid:
+;Overflow on integer store is invalid according to IEEE
+ mov EMSEG:[CURerr],Invalid
+ stc ;Flag exception to caller
+ ret
+
+RoundToInteger:
+ mov ebx,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+ mov al,ch ;Save sign bit
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz RoundIntX ;Just return zero
+ ja RoundSpcl64Int
+NormRound64Int:
+ xor edx,edx
+ sar ecx,16 ;Bring exponent down
+ cmp ecx,-1 ;Is it less than 1?
+ jle Under64Int
+ cmp ecx,63
+ jg RoundInvalid
+ sub ecx,63
+ neg ecx ;cl = amount to shift right
+ mov ch,al ;Get sign out of al
+ xor eax,eax
+ cmp cl,32 ;Too big for one shift?
+ jl ShortShft64
+;32-bit shift right
+ xchg edx,edi
+ xchg ebx,edi ;ebx=0 now
+ shrd eax,edx,cl
+;Max total shift is 63 bits, so we know that the LSB of eax is still zero.
+;We can rotate this zero to the MSB so the sticky bits in eax can be combined
+;with those in edx without affecting the rounding bit in the MSB of edx.
+ ror eax,1 ;MSB is now zero
+ShortShft64:
+;Shift count in cl is modulo-32
+ shrd edx,edi,cl
+ shrd edi,ebx,cl
+ shr ebx,cl
+ or edx,eax ;Collapse sticky bits into one dword
+ jz RoundIntX ;No sticky or round bits, so don't round
+;Result will not be exact--check rounding mode
+Round64Int:
+ mov EMSEG:[CURerr],Precision;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearest64Int ;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+ bt edi,0 ;Look at LSB (for round even)
+ adc edx,(1 shl 31)-1 ;CY set if round up
+ jnc RoundIntX
+ mov EMSEG:[SWcc],RoundUp
+ add edi,1 ;Round
+ adc ebx,0
+ jc RoundInvalid
+RoundIntX:
+ ret ;CY clear, no Invalid exception
+
+Shift64Round:
+ or edi,edi
+ setnz dl ;Set sticky bit in edx
+ xor edi,edi ;Mantissa is all zero
+ jmp Round64Int
+
+Under64Int:
+;ZF set if exponent is -1
+ xchg ebx,edx ;64-bit right shift
+ mov ch,al ;Restore sign to ch
+ jz Shift64Round ;Exp. is -1, could need to round up
+ xor edi,edi ;Mantissa is all zero
+ mov EMSEG:[CURerr],Precision;Set flag on inexact result
+NotNearest64Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov al,EMSEG:[CWcntl] ;Get rounding control
+.erre (not RCup and RoundControl) eq RCdown
+ sar ch,7 ;Expand sign through whole byte
+ xor al,ch ;Flip round mode if -
+ and al,RoundControl
+ cmp al,RCup ;Rounding up?
+ jnz RoundIntOk ;No, chop it
+ mov EMSEG:[SWcc],RoundUp
+ add edi,1
+ adc ebx,0
+ jc RoundInvalid
+RoundIntOk:
+ clc
+ ret
+
+;*******************************************************************************
+
+;*** RoundToBit
+;
+;This is a relatively low performance routine used by FRNDINT and to
+;generate internal-format denormals. It can round to any bit position.
+;
+;Inputs:
+; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+;Purpose:
+; Round number to integer. Zero exponent means number is in the
+; range [1,2), so only the MSB will survive (MSB-1 is round bit).
+; Larger exponents keep more bits; 63 would mean no rounding.
+;Outputs:
+; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;
+;Does NOT detect overflow.
+
+NoSigBits:
+;Exponent was negative: no integer part
+ and ecx,bSign shl 8 ;Zero exponent, preserve sign
+ mov cl,bTAG_ZERO
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearNoSig ;Not just round-to-nearest
+ cmp edx,-1 ;Exponent of -1 ==> range [.5,1)
+ je HalfBitRound
+RndIntToZero:
+ xor ebx,ebx
+ mov esi,ebx ;Just return zero
+ ret
+
+NotNearNoSig:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov al,EMSEG:[CWcntl] ;Get rounding control
+ sar ch,7 ;Expand sign through whole byte
+ xor al,ch ;Flip rounding bits if negative
+ and al,RoundControl
+ cmp al,RCup ;Rounding up?
+ jnz RndIntToZero ;No, chop it
+RndIntToOne:
+ mov ebx,1 shl 31
+ xor esi,esi
+ mov cl,bTAG_SNGL
+ mov EMSEG:[SWcc],RoundUp
+ ret
+
+HalfBitRound:
+ add ebx,ebx ;Shift off MSB (round bit)
+ or ebx,esi
+ or ebx,eax
+ jnz RndIntToOne
+ ret ;Return zero
+
+;**********
+RoundToBit:
+;**********
+ mov edx,ecx ;Make copy of exponent
+ sar edx,16 ;Bring rounding exponent down
+ jl NoSigBits
+ mov cl,dl
+ cmp cl,32 ;Rounding in low word?
+ jae RoundLow
+;When cl = 31, the RoundBit is in the low half while the LSB is in the
+;high half. We must preserve the RoundBit when we move it to eax.
+ xchg eax,esi ;Low half becomes sticky bits
+ or ah,al ;Preserve lowest bits in ah
+ add esi,-1 ;Set CY if any original sticky bits
+ sbb al,al ;Put original sticky bits in al
+ mov esi,ebx
+ xor ebx,ebx ;Shift mantissa right 32 bits
+RoundLow:
+ mov edx,(1 shl 31) - 1
+ shr edx,cl ;Make mask
+;Note in the case of cl = 31, edx is now zero.
+ mov edi,esi
+ and edi,edx
+ or edi,eax ;Any bits being lost?
+ jz RndSetTag ;All done
+ inc edx ;Mask for LSB
+ or EMSEG:[CURerr],Precision;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearLow ;Not just round-to-nearest
+ mov edi,edx ;Save LSB mask
+ shr edi,1 ;Mask for round bit
+ jc SplitRound ;Round bit in eax?
+ test esi,edi ;Round bit set?
+ jz MaskOffLow
+ dec edi ;Mask for sticky bits
+ or edi,edx ;Sticky bits + LSB
+ and edi,esi
+ or edi,eax ;Any sticky bits set?
+ jz MaskOffLow
+RoundUpThenMask:
+ mov EMSEG:[SWcc],RoundUp
+ add esi,edx ;Round up
+ adc ebx,0
+ jc RoundBumpExp
+MaskOffLow:
+ dec edx ;Mask for round & sticky bits
+ not edx
+ and esi,edx ;Zero out low bits
+RndSetTag:
+ or ebx,ebx ;Is it normalized?
+ jns RoundedHighHalf
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ ret
+
+SplitRound:
+;Rounding high half in esi on rounding bit in eax
+ bt esi,0 ;Look at LSB
+ adc eax,(1 shl 31) - 1 ;Set CY if round up
+ jc RoundUpThenMask
+ or ebx,ebx ;Will set ZF for jnz below
+RoundedHighHalf:
+;Rounding occured in high half, which had been moved low.
+;Move it back up high.
+;
+;ZF set here on content of ebx. If not zero, rounding high half in esi
+;rippled forward into zero in ebx.
+ mov cl,bTAG_SNGL
+ jnz RndIntNorm ;Present high half should be zero
+ xchg ebx,esi ;Shift left 32 bits
+ ret
+
+RndIntNorm:
+;Rounded up high half of mantissa, which rolled over to 0.
+ add ecx,1 shl 16 ;Increase exponent
+ mov ebx,1 shl 31 ;Restore MSB
+ ret ;Tag already set to SNGL
+
+RoundBumpExp:
+;Mantissa was FFFFF... and rolled over to 0 when we rounded
+ add ecx,1 shl 16 ;Increase exponent
+ mov ebx,1 shl 31 ;Restore MSB
+ jmp MaskOffLow
+
+NotNearLow:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov al,EMSEG:[CWcntl] ;Get rounding control
+ sar ch,7 ;Expand sign through whole byte
+.erre (not RCup and RoundControl) eq RCdown
+ xor al,ch ;Flip rounding bits if negative
+ and al,RoundControl
+ cmp al,RCup ;Rounding up?
+ jz RoundUpThenMask ;yes
+ jmp MaskOffLow ;No, chop it
diff --git a/private/ntos/dll/i386/emsincos.asm b/private/ntos/dll/i386/emsincos.asm
new file mode 100644
index 000000000..8cd3bc0ac
--- /dev/null
+++ b/private/ntos/dll/i386/emsincos.asm
@@ -0,0 +1,571 @@
+; SCCSID = @(#)emsincos.asm 13.5 90/03/27
+ page ,132
+ subttl emsincos - fsin, fcos and fsincos
+;***
+;emulator.asm - 80387 emulator
+;
+; IBM/Microsoft Confidential
+;
+; Copyright (c) IBM Corporation 1987, 1989
+; Copyright (c) Microsoft Corporation 1987, 1989
+;
+; All Rights Reserved
+;
+;Purpose:
+; Code for fsin, fcos and fsincos
+;
+;Revision History:
+; See emulator.hst
+;
+;*******************************************************************************
+
+lab eFsincosStackOver
+ or SEL[CURerr], StackFlag or Invalid
+ test SEL[CWmask], Invalid
+ JSZ eFsincosStackOverRet
+
+ mov SEL[rsi.lMan0], 0 ; st(0) = Ind
+ mov SEL[rsi.lMan1], 0c0000000h
+ mov SEL[rsi.wExp], 7fffh - IexpBias
+ mov SEL[rsi.bTag], bTAG_NAN
+ mov SEL[rsi.bFlags], bSign
+
+ mov SEL[rdi.lMan0], 0 ; st(-1) = Ind
+ mov SEL[rdi.lMan1], 0c0000000h
+ mov SEL[rdi.wExp], 7fffh - IexpBias
+ mov SEL[rdi.bTag], bTAG_NAN
+ mov SEL[rdi.bFlags], bSign
+
+ mov SEL[CURstk], rdi ; push stack
+lab eFsincosStackOverRet
+ ret
+
+
+lab eFSINCOS
+ mov esi, SEL[CURStk] ; esi = st(0)
+ mov edi, esi
+ PrevStackElem edi ; edi = st(-1)
+
+ cmp SEL[edi.bTag], bTAG_EMPTY
+ JSNE eFsincosStackOver
+
+ cmp SEL[esi.bTag], bTAG_NAN
+ JSNE eFsincosNotSNaN
+
+ test SEL[esi.bMan7], 40h
+ JSNZ eFsincosNotSNaN
+
+ test SEL[CWmask], Invalid
+ JSNZ eFsincosNotSNaN
+
+ or SEL[CURerr], Invalid
+ ret
+
+lab eFsincosNotSNaN
+ifdef NT386
+ push eax
+ mov eax, dword ptr SEL[rsi]
+ mov dword ptr SEL[rdi], eax
+ mov eax, dword ptr SEL[rsi+4]
+ mov dword ptr SEL[rdi+4], eax
+ mov eax, dword ptr SEL[rsi+8]
+ mov dword ptr SEL[rdi+8], eax
+ add rsi, Reg87Len
+ add rdi, Reg87Len
+ pop eax
+else
+ push ds ; Copy current stack into st(-1)
+ pop es
+ movsd
+ movsd
+ movsd
+endif
+
+ call eFSIN
+ PUSHST
+ call eFCOS
+
+ ret
+
+
+lab eFcosSpecial
+ mov esp, ebp
+ pop ebp
+
+ mov SEL[RESULT], esi
+
+ mov al, SEL[esi.bTag]
+ cmp al, bTAG_ZERO
+ JSNE eFcosInf
+
+lab eFcosRetOne
+ mov SEL[esi.lMan0], 0
+ mov SEL[esi.lMan1], 080000000h
+ mov SEL[esi.wExp], 3fffh - IexpBias
+ mov SEL[esi.bFlags], 0
+ mov SEL[esi.bTag], bTAG_VALID
+ ret
+
+lab eFcosInf
+ cmp al, bTAG_INF
+ JE RetIndInv
+
+lab eFcosNaN
+ jmp OneArgOpNaNRet
+
+
+cProc eFCOS,<PLM,PUBLIC>,<>
+
+ localT temp
+ localB SignFlag
+
+cBegin
+ mov esi, SEL[CURstk]
+
+ cmp SEL[esi.bTag], bTAG_VALID
+ jne eFcosSpecial
+
+ or SEL[CURerr], Precision
+
+ and SEL[esi].bFlags, not bSign ; st(0) = fabs( st(0) );
+
+ call SinCosReduce ; Set ah to condition code.
+
+ add SEL[esi].wExp, IExpBias
+
+ push SEL[esi].wExp
+ push SEL[esi].lMan1
+ push SEL[esi].lMan0
+ lea ecx, [temp]
+ push ecx
+
+ mov bl, ah ; if octant 2, 3, 4, or 5 then final
+ and bl, bOCT2 or bOCT4 ; result must be negative
+ mov [SignFlag], bl
+
+ test ah, bOCT1 or bOCT2 ; if octant is 1, 2, 5, 6 then must
+ jpo CosCallSin ; do sin()
+
+ call __FASTLDCOS
+ jmp short CosCopyRes
+
+CosCallSin:
+ call __FASTLDSIN
+
+CosCopyRes:
+ mov eax, dword ptr [temp]
+ mov SEL[esi].lMan0, eax
+ mov eax, dword ptr [temp+4]
+ mov SEL[esi].lMan1, eax
+
+ mov ax, word ptr [temp+8]
+ sub ax, IExpBias
+ mov SEL[esi].wExp, ax
+
+ cmp [SignFlag], 0
+ jpe CosDone
+
+ or SEL[esi].bFlags, bSign ; Make result negative.
+CosDone:
+
+cEnd
+
+
+
+
+
+lab eFsinSpecial
+ mov esp, ebp
+ pop ebp
+
+ mov al, SEL[esi.bTag]
+ cmp al, bTAG_ZERO
+ JSNE eFsinInf
+
+lab eFsinZero
+ ret
+
+lab eFsinInf
+ cmp al, bTAG_INF
+ JE RetIndInv
+
+lab eFsinNaN
+ jmp OneArgOpNaNRet
+
+
+cProc eFSIN,<PLM,PUBLIC>,<>
+
+ localT temp
+ localB SignFlag
+
+cBegin
+ mov esi, SEL[CURstk]
+
+ cmp SEL[esi.bTag], bTAG_VALID
+ jne eFsinSpecial
+
+ or SEL[CURerr], Precision
+
+ mov al, SEL[esi].bFlags
+ and SEL[esi].bFlags, not bSign
+
+ shl al, 1 ; shift sign into carry.
+ sbb cl, cl ; set cl to -1 if argument is negative.
+
+ push ecx
+ call SinCosReduce ; Set ah to condition code.
+ pop ecx
+
+ cmp SEL[esi].bTag, bTAG_ZERO
+ je SinDone
+
+ add SEL[esi].wExp, IExpBias
+
+ push SEL[esi].wExp
+ push SEL[esi].lMan1
+ push SEL[esi].lMan0
+ lea ebx, [temp]
+ push ebx
+
+ mov bl, ah ; if octant 4, 5, 6 or 7 then final
+ and bl, bOCT4 ; result must be negative
+
+ neg cl ; set cl to odd parity if arg was < 0.0
+ xor bl, cl ; set bl to odd parity if result must be negative
+
+ mov [SignFlag], bl
+
+ test ah, bOCT1 or bOCT2 ; if octant is 1, 2, 5, 6 then must
+ jpo SinCallCos ; do cos()
+
+ call __FASTLDSIN
+ jmp short SinCopyResult
+
+SinCallCos:
+ call __FASTLDCOS
+
+SinCopyResult:
+ mov eax, dword ptr [temp]
+ mov SEL[esi].lMan0, eax
+ mov eax, dword ptr [temp+4]
+ mov SEL[esi].lMan1, eax
+
+ mov ax, word ptr [temp+8]
+ sub ax, IExpBias
+ mov SEL[esi].wExp, ax
+
+ cmp [SignFlag], 0
+ jpe SinDone
+
+ or SEL[esi].bFlags, bSign ; Make result negative.
+SinDone:
+
+cEnd
+
+
+
+lab SinCosReduce
+ mov SEL[TEMP1.bFlags], 0 ; TEMP1 = pi/4
+ mov SEL[TEMP1.bTag], bTAG_VALID
+ mov SEL[TEMP1.wExp], 3ffeh-IExpBias
+ mov SEL[TEMP1.wMan3], 0c90fh
+ mov SEL[TEMP1.wMan2], 0daa2h
+ mov SEL[TEMP1.wMan1], 2168h
+ mov SEL[TEMP1.wMan0], 0c235h
+
+ifdef NT386
+ mov edi, TEMP1
+else
+ mov edi, edataOFFSET TEMP1
+endif
+
+ push esi
+ call InternFPREM ; rsi = st(0), rdi = st(0)
+ pop esi
+
+ mov ah, SEL[SWcc]
+
+ test ah, bOCT1 ; check for even octant
+ jz EvenOct ; yes
+
+ add SEL[esi.wExp], IExpBias ; convert to true long double
+
+ push ds
+ push esi
+ push cs
+ push ecodeOFFSET PIBY4
+ push ds
+ push esi
+ push -1
+ call __FASTLDADD ; st(0) = pi/4 - st(0)
+ mov ah, SEL[SWcc]
+
+ sub SEL[esi.wExp], IExpBias ; convert to squirly emulator long double
+
+EvenOct:
+ retn
+
+
+
+labelW PIBY4
+ dw 0c235h, 02168h, 0daa2h, 0c90fh, 3ffeh
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; BUGBUG tedm: NT masm can't handle floating-point constants ;
+; because strtod and _strtold C-runtimes aren't ;
+; there. So the constants below must be pre- ;
+; assembled and defined as a byte stream. ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ifdef NOTDEF
+
+staticT FourByPI, +0.1273239544735162686151e+01
+
+staticT SinP0, +0.7853981633974483096141845e+00
+staticT SinP1, -0.8074551218828078152025820e-01
+staticT SinP2, +0.2490394570192716275251900e-02
+staticT SinP3, -0.3657620418214640005290000e-04
+staticT SinP4, +0.3133616889173253480000000e-06
+staticT SinP5, -0.1757247417617080600000000e-08
+staticT SinP6, +0.6948152035052200000000000e-11
+staticT SinP7, -0.2022531292930000000000000e-13
+
+staticT CosP0, +0.99999999999999999996415e+00
+staticT CosP1, -0.30842513753404245242414e+00
+staticT CosP2, +0.15854344243815410897540e-01
+staticT CosP3, -0.32599188692668755044000e-03
+staticT CosP4, +0.35908604458858195300000e-05
+staticT CosP5, -0.24611363826370050000000e-07
+staticT CosP6, +0.11500497024263000000000e-09
+staticT CosP7, -0.38577620372000000000000e-12
+
+else
+
+staticB FourByPI, <02Ah,015h,044h,04Eh,06Eh,083h,0F9h,0A2h,0FFh,03Fh>
+
+staticB SinP0 , <035h,0C2h,068h,021h,0A2h,0DAh,00Fh,0C9h,0FEh,03Fh>
+staticB SinP1 , <0DAh,095h,0F2h,02Dh,031h,0E7h,05Dh,0A5h,0FBh,0BFh>
+staticB SinP2 , <0E9h,0C6h,056h,0ADh,03Bh,0E3h,035h,0A3h,0F6h,03Fh>
+staticB SinP3 , <0D5h,0E7h,05Dh,015h,073h,066h,069h,099h,0F0h,0BFh>
+staticB SinP4 , <0BCh,032h,069h,0E1h,042h,01Ah,03Ch,0A8h,0E9h,03Fh>
+staticB SinP5 , <021h,077h,004h,05Fh,0A1h,0A5h,083h,0F1h,0E1h,0BFh>
+staticB SinP6 , <0FCh,01Ah,0D1h,006h,0CCh,063h,077h,0F4h,0D9h,03Fh>
+staticB SinP7 , <04Ah,003h,086h,040h,07Ch,065h,02Ch,0B6h,0D1h,0BFh>
+
+staticB CosP0 , <0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FEh,03Fh>
+staticB CosP1 , <02Fh,0F2h,02Eh,0F2h,04Dh,0E6h,0E9h,09Dh,0FDh,0BFh>
+staticB CosP2 , <02Fh,04Eh,0D5h,0DAh,040h,0F8h,0E0h,081h,0F9h,03Fh>
+staticB CosP3 , <09Dh,0DEh,06Ah,0E4h,0F1h,0E3h,0E9h,0AAh,0F3h,0BFh>
+staticB CosP4 , <031h,01Eh,0F9h,081h,041h,083h,0FAh,0F0h,0ECh,03Fh>
+staticB CosP5 , <076h,0B1h,000h,0A4h,01Eh,0F6h,068h,0D3h,0E5h,0BFh>
+staticB CosP6 , <0D8h,005h,06Fh,08Ah,0EAh,00Ah,0E6h,0FCh,0DDh,03Fh>
+staticB CosP7 , <003h,0D5h,00Ah,0ACh,0CCh,035h,02Ch,0D9h,0D5h,0BFh>
+
+endif
+
+cProc __FASTLDSIN,<PLM,PUBLIC>,<isi,idi>
+
+ parmT x
+ parmI RetOff
+
+ localT x2
+ localT poly
+ localI count
+
+cBegin
+
+ lea isi, [x] ; x = x * (4/PI)
+ push ss
+ push isi
+
+ push ss
+ push isi
+
+ mov iax, codeOFFSET FourByPI
+ push cs
+ push iax
+
+ call __FASTLDMULT
+
+
+ lea idi, [x2] ; x2 = x * x
+ push ss
+ push idi
+
+ push ss
+ push isi
+
+ push ss
+ push isi
+
+ call __FASTLDMULT
+
+if 0
+ push ss
+ pop es
+ lea idi, [poly]
+ mov isi, codeOFFSET SinP7
+ movsw
+ movsw
+ movsw
+ movsw
+ movsw
+endif
+ mov eax, dword ptr [SinP7] ; poly = SinP7
+ mov dword ptr [poly], eax
+ mov eax, dword ptr [SinP7+4]
+ mov dword ptr [poly+4], eax
+ mov ax, word ptr [SinP7+8]
+ mov word ptr [poly+8], ax
+
+ lea isi, [poly]
+ mov idi, codeOFFSET SinP6
+
+ mov [count], 7
+
+SinPolyLoop:
+ push ss
+ push isi ; poly = poly * x2
+
+ push ss
+ push isi
+
+ lea iax, [x2]
+ push ss
+ push iax
+
+ call __FASTLDMULT
+
+
+ push ss
+ push isi ; poly = poly + SinP[n]
+
+ push ss
+ push isi
+
+ push cs
+ push idi
+
+ xor iax, iax
+ push iax
+ call __FASTLDADD
+
+ sub idi, 10
+
+ dec [count]
+ jnz SinPolyLoop
+
+ push ss
+ push [RetOff] ; return x * poly
+
+ lea iax, [x]
+ push ss
+ push iax
+
+ push ss
+ push isi
+
+ call __FASTLDMULT
+
+ mov iax, [RetOff]
+ mov idx, ss
+cEnd
+
+
+
+
+cProc __FASTLDCOS,<PLM,PUBLIC>,<isi,idi>
+
+ parmT x
+ parmI RetOff
+
+ localT x2
+ localI count
+
+cBegin
+
+ lea isi, [x] ; x = x * (4/PI)
+ push ss
+ push isi
+
+ push ss
+ push isi
+
+ mov iax, codeOFFSET FourByPI
+ push cs
+ push iax
+
+ call __FASTLDMULT
+
+
+ lea idi, [x2] ; x2 = x * x
+ push ss
+ push idi
+
+ push ss
+ push isi
+
+ push ss
+ push isi
+
+ call __FASTLDMULT
+
+if 0
+ push ss ; (return) = CosP7
+ pop es
+ mov idi, [RetOff]
+ mov isi, codeOFFSET CosP7
+ movsw
+ movsw
+ movsw
+ movsw
+ movsw
+endif
+ mov isi, [RetOff]
+ mov eax, dword ptr [CosP7]
+ mov dword ptr ss:[isi], eax
+ mov eax, dword ptr [CosP7+4]
+ mov dword ptr ss:[isi+4], eax
+ mov ax, word ptr [CosP7+8]
+ mov word ptr ss:[isi+8], ax
+
+ mov idi, codeOFFSET CosP6
+
+ mov [count], 7
+
+CosPolyLoop:
+ push ss
+ push isi ; (return) = (return) * x2
+
+ push ss
+ push isi
+
+ lea iax, [x2]
+ push ss
+ push iax
+
+ call __FASTLDMULT
+
+
+ push ss
+ push isi ; (return) = (return) + SinP[n]
+
+ push ss
+ push isi
+
+ push cs
+ push idi
+
+ xor iax, iax
+ push iax
+
+ call __FASTLDADD
+
+
+ sub idi, 10
+
+ dec [count]
+ jnz CosPolyLoop
+
+ mov iax, isi
+ mov idx, ss
+cEnd
diff --git a/private/ntos/dll/i386/emstack.inc b/private/ntos/dll/i386/emstack.inc
new file mode 100644
index 000000000..a60c03aff
--- /dev/null
+++ b/private/ntos/dll/i386/emstack.inc
@@ -0,0 +1,72 @@
+ subttl emstack.asm - Emulator Stack Management Macros
+ page
+;***
+;emstack.asm - Emulator Stack Management Area
+;
+; Microsoft Confidential
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; Handles emulator stack.
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;POPSTret: pops the stack and returns. Uses esi.
+
+POPSTret macro reg
+ local stackwrap
+IFB <reg>
+ mov esi,EMSEG:[CURstk]
+_popreg equ esi
+ELSE
+_popreg equ reg
+ENDIF
+ mov EMSEG:[_popreg].bTag,bTAG_EMPTY
+ NextStackElem _popreg,stackwrap
+ mov EMSEG:[CURstk],_popreg
+ ret
+
+Wrap&stackwrap:
+ mov EMSEG:[CURstk],BEGstk
+ ret
+ endm
+
+;NextStackElem: Given pST(0) = [CURstk] in reg, returns pST(1)
+;Requires NextStackWrap macro with same arguments
+
+NextStackElem macro reg,stackwrap
+ cmp reg,INITstk ;JWM
+ jae Wrap&stackwrap
+ add reg,Reg87Len
+Cont&stackwrap:
+ endm
+
+NextStackWrap macro reg,stackwrap
+Wrap&stackwrap:
+ mov reg,BEGstk ;JWM
+ jmp Cont&stackwrap
+ endm
+
+
+;PrevStackElem: Given pST(0) = [CURstk] in reg, returns new pST(0)
+;after a push onto on the stack.
+;Requires PrevStackWrap macro with same arguments
+
+PrevStackElem macro reg,stackwrap
+ cmp reg,BEGstk ;JWM
+ jbe Wrap&stackwrap
+ sub reg,Reg87Len
+Cont&stackwrap:
+ endm
+
+PrevStackWrap macro reg,stackwrap
+Wrap&stackwrap:
+ mov reg,INITstk ;JWM
+ jmp Cont&stackwrap
+ endm
diff --git a/private/ntos/dll/i386/emstore.asm b/private/ntos/dll/i386/emstore.asm
new file mode 100644
index 000000000..aadeb6520
--- /dev/null
+++ b/private/ntos/dll/i386/emstore.asm
@@ -0,0 +1,803 @@
+ subttl emstore.asm - FST, FSTP, FIST, FISTP instructions
+ page
+;*******************************************************************************
+;emstore.asm - FST, FSTP, FIST, FISTP instructions
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Purpose:
+; FST, FSTP, FIST, FISTP instructions
+;Inputs:
+; edi = [CURstk]
+; dseg:esi = pointer to memory destination
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+EM_ENTRY eFSTP
+eFSTP:
+;******
+; edi = [CURstk]
+; esi = pointer to st(i) from instruction field
+
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY
+ jz short efstp_StackError
+;UNDONE: temporary hack to preserve condition codes
+ mov ax,[esp+4].OldStatus
+ mov EMSEG:[StatusWord],ax
+;UNDONE: end of hack
+
+;A common use of this instruction is FSTP st(0) just to pop the stack.
+;We check for this case and optimize it.
+ cmp esi,edi
+ jz short JustPop
+;Copy the register
+ mov eax,EMSEG:[edi].ExpSgn
+ mov EMSEG:[esi].ExpSgn,eax
+ mov eax,EMSEG:[edi].lManHi
+ mov EMSEG:[esi].lManHi,eax
+ mov eax,EMSEG:[edi].lManLo
+ mov EMSEG:[esi].lManLo,eax
+JustPop:
+ POPSTret edi
+
+efstp_StackError:
+ mov EMSEG:[CURerr],Invalid+StackFlag
+ ret
+
+
+;******
+EM_ENTRY eFST
+eFST:
+;******
+; edi = [CURstk]
+; esi = pointer to st(i) from instruction field
+
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY
+ jz StackError ;In emarith.asm
+;Copy the register
+ mov eax,EMSEG:[edi].ExpSgn
+ mov EMSEG:[esi].ExpSgn,eax
+ mov eax,EMSEG:[edi].lManHi
+ mov EMSEG:[esi].lManHi,eax
+ mov eax,EMSEG:[edi].lManLo
+ mov EMSEG:[esi].lManLo,eax
+DontPop:
+ ret
+
+
+;Come here if the instruction wants to pop the stack
+
+PopStackChk:
+ jc DontPop ;Get unmasked error?
+PopStack:
+ mov edi,EMSEG:[CURstk]
+ POPSTret edi
+
+
+StoreSpcl64:
+ cmp cl,bTAG_DEN
+ jz Denorm64
+.erre bTAG_NAN lt bTAG_EMPTY
+.erre bTAG_NAN gt bTAG_INF
+ cmp cl,bTAG_NAN
+ mov ecx,DexpMax shl 16 ;Insert special exponent for NAN/Inf.
+ jb StoreIEEE64 ;Go handle infinity
+ ja Empty64
+;Have a NAN.
+ test ebx,1 shl 30 ;Check for SNAN
+ jnz StoreIEEE64 ;Go store QNAN
+ or ebx,1 shl 30 ;Make SNAN into a QNAN
+ mov EMSEG:[CURerr],Invalid ;Flag the exception
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jnz StoreIEEE64 ;If so, update with masked response
+ stc ;Don't pop stack
+ ret
+
+Empty64:
+;It's empty--signal invalid operation
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing64 ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov dword ptr dseg:[esi],0
+ mov dword ptr dseg:[esi+4],0FFF80000H ;64-bit IEEE indefinite
+ ret ;CY clear
+
+Denorm64:
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is it masked?
+ jnz NormStore64 ;If so, ignore denormalization
+DoNothing64:
+ stc ;Don't pop stack
+ ret
+
+;*****************
+;Store Double Real
+;*****************
+
+EM_ENTRY eFSTP64
+eFSTP64:
+ push offset PopStackChk ;Return here after store
+
+EM_ENTRY eFST64
+eFST64:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ebx,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+ mov al,ch ;Save sign bit
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz short SignAndStore64 ;Just set sign and exit
+ ja StoreSpcl64
+NormStore64:
+;Note that we could have a denormal exception at this point.
+;Thus any additional exceptions must OR into [CURerr], not MOV.
+ xor cx,cx
+ add ecx,(DexpBias-TexpBias) shl 16 ;Correct bias
+ jl short Under64
+ cmp ecx,DexpMax shl 16 ;Exponent too big?
+ jge Over64
+ test edi,(1 shl 11) - 1 ;Any bits to round?
+ jz short StoreIEEE64
+
+Round64:
+ or EMSEG:[CURerr],Precision ;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearest64 ;Not just round-to-nearest
+ test edi,1 shl 10 ;Check rounding bit
+ jz short StoreIEEE64 ;If zero, don't round up
+ test edi,(3 shl 10)-1 ;Test LSB and sticky bits
+ jnz RoundUp64b
+
+StoreIEEE64:
+ or ecx, ecx ;now that value is rounded,
+ je short Under64 ;check exponent for underflow
+
+StoreIEEE64Continue:
+ and ebx,not (1 shl 31) ;Clear MSB--it's implied in IEEE64
+ shrd edi,ebx,11
+ shr ebx,11 ;Move mantissa down
+ shl ecx,4 ;Exponent up to position
+ or ebx,ecx ;Combine exponent
+SignAndStore64:
+ and al,bSign ;Just sign bit
+ shl eax,24 ;Sign to MSB
+ or ebx,eax ;Combine sign
+ mov dseg:[esi],edi
+ mov dseg:[esi+4],ebx
+;CY clear indicate no error
+ ret
+
+SetUnderflow:
+ or EMSEG:[CURerr],Underflow ;Unmasked underflow--do nothing
+DoNothing:
+ stc ;Indicate nothing was done
+ ret
+
+Under64:
+ dec cl ; Is cx == 1?
+ jz short StoreIEEE64Continue ; Yes, we've alread been here
+
+ test EMSEG:[CWmask],Underflow ;Is underflow masked?
+ jz SetUnderflow ;No, do nothing more
+;Produce masked underflow response
+;Note that the underflow exception does not occur if the number can be
+;represented exactly as a denormal.
+
+ sar ecx,16 ;Bring exponent down
+ cmp ecx,DexpMin-52 ;Allow for shift down to rounding bit
+ jl BigUnder64 ;Too small, just make it zero
+.erre DexpMin eq 0
+ neg ecx ;Use as shift count
+ inc ecx ;Shift by at least one
+ xor edx,edx ;Place for sticky bits
+ cmp cl,32 ;Long shift?
+ jb ShortDenorm
+ neg edi ;CY set if non-zero
+ sbb edx,edx ;-1 if bits shifted off, else zero
+ mov edi,ebx
+ xor ebx,ebx ;32-bit right shift
+ShortDenorm:
+;Shift count is modulo-32
+ shrd edx,edi,cl
+ shrd edi,ebx,cl
+ shr ebx,cl
+ cmp edx,1 ;CY set if zero, else clear
+ sbb edx,edx ;Zero if bits shifted off, else -1
+ inc edx ;1 if bits shifted off, else zero
+ or edi,edx ;Collapse sticky bits into edi
+
+ mov ecx, 1 ;Biased exponent is zero, put 1 into CL (noticed by Under64)
+ test edi,(1 shl 11) - 1 ;Any bits to round?
+ jz StoreIEEE64 ;If not, no exception
+ or EMSEG:[CURerr],Underflow
+ jmp Round64
+
+Over64:
+ test EMSEG:[CWmask],Overflow ;Is overflow masked?
+ jz SetOverflow ;No, do nothing more
+;Produce masked overflow response
+ or EMSEG:[CURerr],Overflow+Precision
+ mov ebx,DexpMax shl 20
+ xor edi,edi ;ebx:edi = positive infinity
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre RCchop eq RCup + RCdown ;Always return max value
+.erre RCnear eq 0 ;Never return max value
+ sar al,7 ;Expand sign through whole byte
+.erre (RCdown and bSign) eq 0 ;Don't want to change real sign
+ xor al,RCdown ;Flip sign for RCdown bit
+ and ah,al ;RCup & sign OR RCdown & not sign
+ test ah,RoundControl ;Look only at RC bits
+ jz SignAndStore64 ;Return infinity
+ dec ebx
+ dec edi ;Max value == infinity-1
+ jmp SignAndStore64
+
+SetOverflow:
+ or EMSEG:[CURerr],Overflow
+ stc ;Indicate nothing was done
+ ret
+
+BigUnder64:
+ or EMSEG:[CURerr],Underflow+Precision
+ xor ebx,ebx
+ mov edi,ebx ;Set it to zero
+ mov ecx,ebx ;Including exponent
+NotNearest64:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+ sar al,7 ;Expand sign through whole byte
+.erre (not RCup and RoundControl) eq RCdown
+ xor ah,al ;Flip rounding bits if negative
+ and ah,RoundControl
+ cmp ah,RCup
+ jnz StoreIEEE64 ;No, chop it
+
+RoundUp64b:
+ mov EMSEG:[SWcc],RoundUp
+ add edi,1 shl 11 ;Round up
+ adc ebx,0
+ jnc StoreIEEE64
+
+ add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
+ cmp ecx,DexpMax shl 16 ;Exponent too big?
+ jge Over64
+ jmp StoreIEEE64
+
+;*******************************************************************************
+
+StoreSpcl32:
+ cmp cl,bTAG_DEN
+ jz Denorm32
+.erre bTAG_NAN lt bTAG_EMPTY
+.erre bTAG_NAN gt bTAG_INF
+ cmp cl,bTAG_NAN
+ mov ecx,SexpMax shl 16 ;Insert special exponent
+ jb StoreIEEE32
+ ja Empty64
+;Have a NAN.
+ test ebx,1 shl 30 ;Check for SNAN
+ jnz StoreIEEE32 ;Go store QNAN
+ or ebx,1 shl 30 ;Make SNAN into a QNAN
+ mov EMSEG:[CURerr],Invalid ;Flag the exception
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jnz StoreIEEE32 ;If so, update with masked response
+ stc ;Don't pop stack
+ ret
+
+Empty32:
+;It's empty--signal invalid operation
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing32 ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov dword ptr dseg:[esi],0FFC00000H ;32-bit IEEE indefinite
+ ret ;CY clear
+
+Denorm32:
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is it masked?
+ jnz NormStore32 ;If so, ignore denormalization
+DoNothing32:
+ stc ;Don't pop stack
+ ret
+
+;*****************
+;Store Single Real
+;*****************
+
+EM_ENTRY eFSTP32
+eFSTP32:
+ push offset PopStackChk ;Return here after store
+
+EM_ENTRY eFST32
+eFST32:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ebx,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+ mov al,ch ;Save sign bit
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz SignAndStore32 ;Just set sign and exit
+ ja StoreSpcl32
+NormStore32:
+;Note that we could have a denormal exception at this point.
+;Thus any additional exceptions must OR into [CURerr], not MOV.
+ xor cx,cx
+ add ecx,(SexpBias-TexpBias) shl 16 ;Correct bias
+ jle Under32
+ cmp ecx,SexpMax shl 16 ;Exponent too big?
+ jge Over32
+;See if we need to round
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,edi ;Throwing away any bits?
+ jz StoreIEEE32
+;Result will not be exact--check rounding mode
+Round32:
+ or EMSEG:[CURerr],Precision ;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearest32 ;Not just round-to-nearest
+ test bl,1 shl 7 ;Round bit set?
+ jz StoreIEEE32
+ mov edx,ebx
+ and edx,(3 shl 7)-1 ;Mask to LSB and sticky bits
+ or edx,edi ;Combine with remaining sticky bits
+ jz StoreIEEE32
+ mov EMSEG:[SWcc],RoundUp
+ add ebx,1 shl 8 ;Round up
+ jc AddOneExp32
+StoreIEEE32:
+ and ebx,not (1 shl 31) ;Clear MSB--it's implied in IEEE32
+ shr ebx,8 ;Move mantissa down
+ shl ecx,7 ;Exponent up to position
+ or ebx,ecx ;Combine exponent
+SignAndStore32:
+ and al,bSign ;Just sign bit
+ shl eax,24 ;Sign to MSB
+ or ebx,eax ;Combine sign
+ mov dseg:[esi],ebx
+;CY clear indicate no error
+ ret
+
+Under32:
+ test EMSEG:[CWmask],Underflow ;Is underflow masked?
+ jz SetUnderflow ;No, do nothing more
+;Produce masked underflow response
+;Note that the underflow exception does not occur if the number can be
+;represented exactly as a denormal.
+ sar ecx,16 ;Bring exponent down
+ cmp ecx,SexpMin-23 ;Allow for shift down to rounding bit
+ jl BigUnder32 ;Too small, just make it zero
+.erre SexpMin eq 0
+ neg ecx ;Use as shift count
+ inc ecx ;Shift by at least one
+ xor edx,edx ;Place for sticky bits
+ shrd edx,ebx,cl
+ shr ebx,cl
+ xor ecx,ecx ;Biased exponent is zero
+ or edi,edx ;Combine sticky bits
+ mov edx,ebx ;Get low bits
+ and edx,(1 shl 8) - 1 ;Mask to last 8 bits
+ or edx,edi ;Throwing away any bits?
+ jz StoreIEEE32
+ or EMSEG:[CURerr],Underflow
+ jmp Round32
+
+AddOneExp32:
+ add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
+ cmp ecx,SexpMax shl 16 ;Exponent too big?
+ jl StoreIEEE32
+Over32:
+ test EMSEG:[CWmask],Overflow ;Is overflow masked?
+ jz SetOverflow ;No, do nothing more
+;Produce masked overflow response
+ or EMSEG:[CURerr],Overflow+Precision
+ mov ebx,SexpMax shl 23
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre RCchop eq RCup + RCdown ;Always return max value
+.erre RCnear eq 0 ;Never return max value
+ sar al,7 ;Expand sign through whole byte
+.erre (RCdown and bSign) eq 0 ;Don't want to change real sign
+ xor al,RCdown ;Flip sign for RCdown bit
+ and ah,al ;RCup & sign OR RCdown & not sign
+ test ah,RoundControl ;Look only at RC bits
+ jz SignAndStore32 ;Return infinity
+ dec ebx ;Max value == infinity-1
+ jmp SignAndStore32
+
+BigUnder32:
+ or EMSEG:[CURerr],Underflow+Precision
+ xor ebx,ebx ;Set it to zero
+ xor ecx,ecx ;Exponent too
+NotNearest32:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+ sar al,7 ;Expand sign through whole byte
+.erre (not RCup and RoundControl) eq RCdown
+ xor ah,al ;Flip rounding bits if negative
+ and ah,RoundControl
+ cmp ah,RCup
+ jnz StoreIEEE32 ;No, chop it
+ mov EMSEG:[SWcc],RoundUp
+ add ebx,1 shl 8 ;Round up
+ jnc StoreIEEE32
+ jmp AddOneExp32
+
+;*******************************************************************************
+
+StoreSpcl32Int:
+ cmp cl,bTAG_DEN
+ jz NormStore32Int ;Ignore denormal
+ cmp cl,bTAG_EMPTY
+ jnz Over32Int ;All other specials are invalid
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ jmp Invalid32Int
+
+DoNothing32Int:
+ stc ;Don't pop stack
+ ret
+
+CheckMax32:
+ ja Over32Int
+ test al,bSign ;Is it negative?
+ jnz Store32Int ;If so, answer is OK
+Over32Int:
+;Overflow on integer store is invalid according to IEEE
+ mov EMSEG:[CURerr],Invalid ;Must remove precision exception
+Invalid32Int:
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing32Int ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov dword ptr dseg:[esi],80000000H ;32-bit integer indefinite
+ ret ;CY clear
+
+;******************
+;Store Long Integer
+;******************
+
+EM_ENTRY eFISTP32
+eFISTP32:
+ push offset PopStackChk ;Return here after store
+
+EM_ENTRY eFIST32
+eFIST32:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ebx,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+ mov al,ch ;Save sign bit
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz Store32Int ;Just store zero and exit
+ ja StoreSpcl32Int
+NormStore32Int:
+ xor edx,edx
+ sar ecx,16 ;Bring exponent down
+ cmp ecx,-1 ;Is it less than 1?
+ jle Under32Int
+ cmp ecx,31
+ jg Over32Int
+ sub ecx,31
+ neg ecx ;cl = amount to shift right
+ shrd edx,edi,cl
+ shrd edi,ebx,cl ;Collect round and sticky bits
+ shr ebx,cl ;Align integer
+;See if we need to round
+ mov ecx,edi
+ or ecx,edx ;Throwing away any bits?
+ jz StoreIEEE32Int
+;Result will not be exact--check rounding mode
+Round32Int:
+ mov EMSEG:[CURerr],Precision ;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearest32Int ;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+ bt ebx,0 ;Look at LSB (for round even)
+ adc edx,-1 ;CY set if sticky bits <>0
+ adc edi,(1 shl 31)-1 ;CY set if round up
+ jnc StoreIEEE32Int
+ mov EMSEG:[SWcc],RoundUp
+ inc ebx
+ jz Over32Int
+StoreIEEE32Int:
+ cmp ebx,1 shl 31 ;Check for max value
+ jae CheckMax32
+SignAndStore32Int:
+ shl eax,24 ;Sign to MSB
+ cdq ;Extend sign through edx
+ xor ebx,edx ;Complement
+ sub ebx,edx ; and increment if negative
+ clc
+Store32Int:
+ mov dseg:[esi],ebx
+;CY clear indicates no error
+ ret
+
+Under32Int:
+;ZF set if exponent is -1
+ xchg edx,edi ;32-bit right shift
+ xchg edi,ebx ;ebx = 0 now
+ jz Round32Int ;If exponent was -1, ready to round
+ mov EMSEG:[CURerr],Precision ;Set flag on inexact result
+NotNearest32Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+ sar al,7 ;Expand sign through whole byte
+.erre (not RCup and RoundControl) eq RCdown
+ xor ah,al ;Flip rounding bits if negative
+ and ah,RoundControl
+ cmp ah,RCup ;Rounding up?
+ jnz StoreIEEE32Int ;No, chop it
+ mov EMSEG:[SWcc],RoundUp
+ inc ebx
+ jnc StoreIEEE32Int
+ jmp Over32Int
+
+;*******************************************************************************
+
+StoreSpcl16Int:
+ cmp cl,bTAG_DEN
+ jz NormStore16Int ;Ignore denormal
+ cmp cl,bTAG_EMPTY
+ jnz Over16Int ;All other specials are invalid
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ jmp Invalid16Int
+
+DoNothing16Int:
+ stc ;Don't pop stack
+ ret
+
+CheckMax16:
+ ja Over16Int
+ test al,bSign ;Is it negative?
+ jnz Store16Int ;If so, answer is OK
+Over16Int:
+;Overflow on integer store is invalid according to IEEE
+ mov EMSEG:[CURerr],Invalid
+Invalid16Int:
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing16Int ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov word ptr dseg:[esi],8000H ;16-bit integer indefinite
+ ret ;CY clear
+
+;*******************
+;Store Short Integer
+;*******************
+
+EM_ENTRY eFISTP16
+eFISTP16:
+ push offset PopStackChk ;Return here after store
+
+EM_ENTRY eFIST16
+eFIST16:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov ebx,EMSEG:[edi].lManHi
+ mov ecx,EMSEG:[edi].ExpSgn
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+ mov al,ch ;Save sign bit
+ cmp cl,bTAG_ZERO
+.erre bTAG_VALID lt bTAG_ZERO
+.erre bTAG_SNGL lt bTAG_ZERO
+ jz Store16Int ;Just store zero and exit
+ ja StoreSpcl16Int
+NormStore16Int:
+ xor edx,edx
+ sar ecx,16 ;Bring exponent down
+ cmp ecx,-1 ;Is it less than 1?
+ jle Under16Int
+ cmp ecx,15
+ jg Over16Int
+ sub ecx,31
+ neg ecx ;cl = amount to shift right
+ shrd edx,edi,cl
+ shrd edi,ebx,cl ;Collect round and sticky bits
+ shr ebx,cl ;Align integer
+;See if we need to round
+ mov ecx,edi
+ or ecx,edx ;Throwing away any bits?
+ jz StoreIEEE16Int
+;Result will not be exact--check rounding mode
+Round16Int:
+ mov EMSEG:[CURerr],Precision ;Set flag on inexact result
+ test EMSEG:[CWcntl],RoundControl ;Check rounding control bits
+.erre RCnear eq 0
+ jnz NotNearest16Int ;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+ bt ebx,0 ;Look at LSB (for round even)
+ adc edx,-1 ;CY set if sticky bits <>0
+ adc edi,(1 shl 31)-1 ;CY set if round up
+ jnc StoreIEEE16Int
+ mov EMSEG:[SWcc],RoundUp
+ inc ebx
+StoreIEEE16Int:
+ cmp ebx,1 shl 15 ;Check for max value
+ jae CheckMax16
+SignAndStore16Int:
+ shl eax,24 ;Sign to MSB
+ cdq ;Extend sign through edx
+ xor ebx,edx ;Complement
+ sub ebx,edx ; and increment if negative
+ clc
+Store16Int:
+ mov dseg:[esi],bx
+;CY clear indicates no error
+ ret
+
+Under16Int:
+;ZF set if exponent is -1
+ xchg edx,edi ;16-bit right shift
+ xchg edi,ebx ;ebx = 0 now
+ jz Round16Int ;If exponent was -1, ready to round
+ mov EMSEG:[CURerr],Precision ;Set flag on inexact result
+NotNearest16Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+ mov ah,EMSEG:[CWcntl] ;Get rounding control
+ sar al,7 ;Expand sign through whole byte
+.erre (not RCup and RoundControl) eq RCdown
+ xor ah,al ;Flip rounding bits if negative
+ and ah,RoundControl
+ cmp ah,RCup ;Rounding up?
+ jnz StoreIEEE16Int ;No, chop it
+ mov EMSEG:[SWcc],RoundUp
+ inc ebx
+ jnc StoreIEEE16Int
+ jmp Over16Int
+
+;*******************************************************************************
+
+;******************
+;Store Quad Integer
+;******************
+
+EM_ENTRY eFISTP64
+eFISTP64:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ call RoundToInteger
+ jc Invalid64Int
+;Have integer in ebx:edi
+;Sign in ch
+ cmp ebx,1 shl 31 ;Check for max value
+ jae CheckMax64
+ or ch,ch ;Check sign
+ jns Store64Int
+;64-bit negation
+ not ebx
+ neg edi
+ sbb ebx,-1
+Store64Int:
+ mov dseg:[esi],edi
+ mov dseg:[esi+4],ebx
+ jmp PopStack
+
+CheckMax64:
+ ja Over64Int
+ test al,bSign ;Is it negative?
+ jnz Store64Int ;If so, answer is OK
+Over64Int:
+;Overflow on integer store is invalid according to IEEE
+ mov EMSEG:[CURerr],Invalid
+Invalid64Int:
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing80 ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov dword ptr dseg:[esi],0
+ mov dword ptr dseg:[esi+4],80000000H ;64-bit integer indefinite
+ jmp PopStack
+
+;*******************************************************************************
+
+Empty80:
+;It's empty--signal invalid operation
+ mov EMSEG:[CURerr],StackFlag+Invalid
+ test EMSEG:[CWmask],Invalid ;Is it masked?
+ jz DoNothing80 ;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+ mov dword ptr dseg:[esi],0
+ mov dword ptr dseg:[esi+4],0C0000000H
+ mov word ptr dseg:[esi+8],0FFFFH ;80-bit IEEE indefinite
+ jmp PopStack
+
+DoNothing80:
+ ret
+
+;***************
+;Store Temp Real
+;***************
+
+EM_ENTRY eFSTP80
+eFSTP80:
+ mov EMSEG:[PrevDataOff],esi ;Save operand pointer
+ mov eax,EMSEG:[edi].ExpSgn
+ cmp al,bTAG_EMPTY
+ jz Empty80
+
+ push offset PopStack
+
+StoreTempReal:
+ mov ebx,EMSEG:[edi].lManHi
+ mov edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high eax, sign in ah bit 7, tag in al
+;memory destination is dseg:esi
+ mov ecx,eax ;get copy of sign and tag
+ shr ecx,16 ;Bring exponent down
+ cmp al,bTAG_ZERO
+ jz StoreIEEE80 ;Skip bias if zero
+ add ecx,IexpBias-TexpBias ;Correct bias
+ cmp al,bTAG_DEN
+ jz Denorm80
+StoreIEEE80:
+ and eax,bSign shl 8
+ or ecx,eax ;Combine sign with exponent
+ mov dseg:[esi],edi
+ mov dseg:[esi+4],ebx
+ mov dseg:[esi+8],cx
+
+; jmp PopStack
+ ret
+
+Denorm80:
+;Must change it to a denormal
+ dec ecx
+ neg ecx ;Use as shift count
+ cmp cl,32 ;Long shift?
+ jae LongDenorm
+ shrd edi,ebx,cl
+ shr ebx,cl
+ xor ecx,ecx ;Exponent is zero
+ jmp StoreIEEE80
+
+LongDenorm:
+;edi must be zero if we have 32 bits to shift
+ xchg ebx,edi ;32-bit right shift
+ shr edi,cl ;shift count is modulo-32
+ xor ecx,ecx ;Exponent is zero
+ jmp StoreIEEE80
diff --git a/private/ntos/dll/i386/emtrig.asm b/private/ntos/dll/i386/emtrig.asm
new file mode 100644
index 000000000..53d981aca
--- /dev/null
+++ b/private/ntos/dll/i386/emtrig.asm
@@ -0,0 +1,863 @@
+ subttl emtrig.asm - Trig functions sine, cosine, tangent
+ page
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emtrig.asm - Trig functions sine, cosine, tangent
+; by Tim Paterson
+;
+;Purpose:
+; FCOS, FPTAN, FSIN, FSINCOS instructions
+;Inputs:
+; edi = [CURstk]
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;XPi is the 66-bit value of Pi from the Intel manual
+XPiHi equ 0C90FDAA2H
+XPiMid equ 02168C234H
+XPiLo equ 0C0000000H ;Extension of pi
+PiOver4exp equ -1 ;Pi/4 ~= 3/4, so exponent is -1
+
+TinyAngleExp equ -32 ;Smallest angle we bother with
+MaxAngleExp equ 63 ;Angle that's too big
+
+Trig1Result:
+;Trig function reduction routine used by functions returning 1 value
+;(FSIN and FCOS)
+;edi = [CURstk] = argument pointer
+;Argument has already been checked for zero.
+;ZF = (tag == bTAG_ZERO)
+ jb TrigPrem
+;Tagged special
+ mov al,EMSEG:[edi].bTAG
+ cmp al,bTAG_DEN
+ jz TrigDenorm
+ add sp,4 ;Don't return to caller
+ cmp al,bTAG_INF
+ jnz SpclDestNotDen ;Check for Empty or NAN
+ mov EMSEG:[SWcc],C2 ;Can't reduce infinity
+ jmp ReturnIndefinite
+
+TrigDenorm:
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+ jnz TrigPrem ;Yes, continue
+ add sp,4 ;Don't return to caller
+TrigRet:
+ ret
+
+
+Trig2Inf:
+ mov EMSEG:[SWcc],C2 ;Can't reduce infinity
+ jmp Trig2Indefinite
+
+Trig2StackOver:
+ mov EMSEG:[SWcc],C1 ;Signal overflow
+Trig2StackUnder:
+ mov EMSEG:[CURerr],Invalid+StackFlag
+Trig2Indefinite:
+ add sp,4 ;Don't return to caller
+ call ReturnIndefinite
+ jz TrigRet ;Unmasked, don't change registers
+;Produce masked response
+ mov EMSEG:[CURstk],esi ;Push stack
+ mov edi,esi
+ jmp ReturnIndefinite
+
+Trig2Special:
+ cmp al,bTAG_DEN
+ jz TrigDenorm
+ cmp al,bTAG_INF
+ jz Trig2Inf
+;Must be a NAN
+ add sp,4 ;Don't return to caller
+ call DestNAN
+ jz TrigRet ;Unmasked, don't change registers
+;Produce masked response
+ mov EMSEG:[CURstk],esi ;Push stack
+ mov eax,EMSEG:[edi].ExpSgn
+ mov EMSEG:[esi].ExpSgn,eax
+ mov eax,EMSEG:[edi].lManHi
+ mov EMSEG:[esi].lManHi,eax
+ mov eax,EMSEG:[edi].lManLo
+ mov EMSEG:[esi].lManLo,eax
+ ret
+
+Trig2Zero:
+ add sp,4 ;Don't return to caller
+ mov EMSEG:[CURstk],esi
+ mov edi,esi
+;Amazing coincidence: both FSINCOS and FPTAN return the same result for
+;a zero argument:
+; FSINCOS returns ST(0) = cos(0) = 1, ST(1) = sin(0) = 0.
+; FPTAN returns ST(0) = 1 always, ST(1) = tan(0) = 0.
+;Return zero has same sign as argument zero, so we don't need to touch
+;it -- just push +1.0.
+ jmp ReturnOne
+
+TrigOutOfRange:
+ mov EMSEG:[SWcc],C2 ;Signal argument not reduced
+ add sp,4
+ ret
+
+PrevStackWrap esi,Trig2 ;Tied to PrevStackElem below
+
+Trig2Result:
+;Trig function reduction routine used by functions returning 2 values
+;(FSINCOS and FPTAN)
+;edi = [CURstk] = argument pointer
+ mov esi,edi
+ PrevStackElem esi,Trig2 ;esi points to second result location
+ mov al,EMSEG:[edi].bTAG ;Get tag
+ cmp al,bTAG_EMPTY ;Stack underflow if empty
+ jz Trig2StackUnder
+ cmp EMSEG:[esi].bTAG,bTAG_EMPTY ;Stack overflow if not empty
+ jnz Trig2StackOver
+ cmp al,bTAG_ZERO ;Is it Special?
+ ja Trig2Special
+ jz Trig2Zero
+;Fall into TrigPrem
+
+;****
+;TrigPrem
+;
+;This routine reduces an angle in radians to the range [0, pi/4].
+;Angles in odd-numbered octants have been subtracted from pi/4.
+;It uses a 66-bit value for pi, as required by the 387.
+;TrigPrem uses the same two-stage algorithm as FPREM (see
+;emfprem.asm). However, it is limited to an argument < 2^63.
+;
+;Inputs:
+; edi = [CURstk]
+;Outputs:
+; ebx:esi = remainder, normalized
+; high ecx = exponent, cl = tag
+; al = octant
+; edi = [CURstk]
+
+TrigPrem:
+ mov EMSEG:[Result],edi
+ mov eax,EMSEG:[edi].lManLo
+ mov edx,EMSEG:[edi].lManHi
+ movsx ebx,EMSEG:[edi].wExp
+ cmp ebx,MaxAngleExp
+ jge TrigOutOfRange
+ xor edi,edi ;Extend dividend
+ xor esi,esi ;Quotient, in case we skip stage 1
+.erre PiOver4exp eq -1
+ inc ebx ;Subtract exponent of pi/4
+ jl ExitTrigPrem ;If dividend is smaller, return it.
+;We now know that 0 <= ExpDif < 64, so it fits in bl.
+ cmp bl,31 ;Do we need to do stage 1?
+ jl FitPi ;No, start stage 2
+
+;FPREM stage 1
+;
+;Exponent difference is at least 31. Use 32-bit division to compute
+;quotient and exact remainder, reducing exponent difference by 31.
+;
+;edx:eax = dividend
+;ebx = exponent difference
+
+;Shift dividend right one bit to be sure DIV instruction won't overflow
+;This means we'll be reducing the exponent difference by 31, not 32
+ xor ebp,ebp ;Dividend extension
+ shrd ebp,eax,1
+ shrd eax,edx,1
+ shr edx,1
+
+ sub bl,31 ;Exponent reduced
+ mov ecx,XPiHi
+ div ecx ;Guess a quotient "digit"
+
+;Check out our guess.
+;Currently, remainder in edx = (high dividend) - (quotient * high pi).
+;(High dividend is the upper 64 bits--ebp has 1 bit.) The definition
+;of remainder is (all dividend) - (quotient * all pi). So if we
+;subtract (quotient * low pi) from edx:ebp, we'll get the true
+;remainder. If it's negative, our guess was too big.
+
+ mov esi,eax ;Save quotient
+ mov ecx,edx ;Save remainder
+
+;The pi/4 we use has two bits set below the first 64 bits. This means
+;we must add another 3/4 of the quotient into the amount to subtract,
+;which we'll compute by rounding the low 32 bits up 1, then subtracting
+;1/4 of quotient. But since we're computing the amount to subtract from
+;the remainder, we'll add the 1/4 of the quotient to the remainder instead
+;of subtracting it from the amount to subtract.
+
+.erre XPiLo eq (3 shl 30)
+ mov eax,XPiMid+1
+ mul esi ;Quotient * low pi
+;Note that ebp is either 0 or 800...00H
+ shr ebp,30 ;Move down to low end
+ shld ebp,esi,30 ;Move back up, adding 1/4 of quotient
+ mov edi,esi ;Another copy of quotient
+ shl edi,30 ;Keep last two bits
+;edx:eax has amount to subtract to get correct remainder from ecx:ebp:edi
+ sub ebp,eax
+ sbb ecx,edx ;Subtract from remainder
+ mov eax,ebp
+ mov edx,ecx ;Remainder back to edx:eax:edi
+ jnc TrigPremNorm ;Was quotient OK?
+TrigCorrect:
+ dec esi ;Quotient was too big
+ add edi,XPiLo
+ adc eax,XPiMid ;Add divisor back into remainder
+ adc edx,XPiHi
+ jnc TrigCorrect ;Repeat if quotient is still too big
+ jmp TrigPremNorm
+
+;FPREM stage 2
+;
+;Exponent difference is less than 32. Use restoring long division to
+;compute quotient bits until exponent difference is zero. Note that we
+;often get more than one bit/loop: BSR is used to scan off leading
+;zeros each time around. Since the divisor is normalized, we can
+;instantly compute a zero quotient bit for each leading zero bit.
+
+TrigPremLoop:
+;edx:eax:edi = dividend (remainder) minus pi/4
+;esi = quotient
+;ebx = exponent difference
+;
+;If D is current dividend and p is pi/4, then we have edx:eax:edi = D - p,
+;which is negative. We want 2*D - p, which is positive.
+;2*D - p = 2*(D - p) + p.
+ add edi,edi ;2*(D - p)
+ adc eax,eax
+ adc edx,edx
+
+ add edi,XPiLo ;2*(D-p) + p = 2*D - p
+ adc eax,XPiMid
+ adc edx,XPiHi
+
+ add esi,esi ;Double quotient too
+ dec ebx ;Decrement exponent difference
+PiFit:
+ inc esi
+TrigPremNorm:
+ bsr ecx,edx ;Find first 1 bit
+ jz TrigPremZero
+ not cl
+ and cl,1FH ;Convert bit no. to shift count
+ sub ebx,ecx ;Reduce exponent difference
+ jl TrigTooFar
+ shld edx,eax,cl
+ shld eax,edi,cl
+ shl edi,cl ;Finish normalize shift
+ shl esi,cl ;Shift quotient
+FitPi:
+;Dividend could be larger or smaller than divisor
+ sub edi,XPiLo
+ sbb eax,XPiMid
+ sbb edx,XPiHi
+ jnc PiFit
+;Couldn't subtract pi/2 from dividend.
+;edx:eax:edi = dividend - pi/4, which is negative
+ or ebx,ebx ;Is exponent difference zero?
+ jg TrigPremLoop
+;If quotient (octant number) is odd, we have subtracted an odd number of
+;pi/4's. However, simple angle reductions work in multiples of pi/2.
+;We will keep the extra pi/4 we just subtracted if the octant was odd.
+;This will give a result range of [-pi/4, pi/4].
+ test esi,1 ;Is octant odd?
+ jz EvenOctant
+NegPremResult:
+;-pi/4 < dividend < 0. Negate this since we use sign-magnitude representation.
+ not edx ;96-bit negation
+ not eax
+ neg edi
+ sbb eax,-1
+ sbb edx,-1
+;May need to normalize
+ bsr ecx,edx
+ jz TrigNorm32
+ lea ebx,[ebx+ecx-31] ;Fix up exponent for normalization
+ not cl ;Convert bit no. to shift count
+TrigShortNorm:
+ shld edx,eax,cl
+ shld eax,edi,cl
+ shl edi,cl ;Finish normalize shift
+RoundPrem:
+;Must round 66-bit result to 64 bits.
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set. If the LSB is zero, then the sticky bits remain zero and we always
+;round down. This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.
+ bt eax,0 ;Is mantissa even or odd? (set CY)
+ adc edi,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
+ adc eax,0
+ adc edx,0
+ExitTrigPrem:
+;edx:eax = remainder, normalized
+;esi = quotient
+;ebx = exponent difference, zero or less
+.erre PiOver4exp eq -1
+ dec ebx ;True exponent
+.erre bTAG_SNGL eq 0
+ shrd ecx,ebx,16 ;Exponent to high ecx
+ mov ebx,edx ;High mant. to ebx
+ xchg esi,eax ;Low mant. to esi, octant to eax
+ or esi,esi ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz cl ;if low half==0 then cl=0 else cl=1
+ mov edi,EMSEG:[CURstk]
+ test EMSEG:[edi].bSgn,bSign ;Was angle negative?
+ jnz FlipOct ;Yes, flip octant over
+ ret
+
+FlipOct:
+;Angle was negative. Subtract octant from 7.
+ neg al
+ add al,7
+ ret
+
+EvenOctant:
+;Restore dividend
+ add edi,XPiLo
+ adc eax,XPiMid
+ adc edx,XPiHi
+ jmp RoundPrem
+
+TrigTooFar:
+;Exponent difference in ebx went negative when reduced by shift count in ecx.
+;We need a quotient corresponding to exponent difference of zero.
+ add ecx,ebx ;Compute previous exponent difference
+ shl esi,cl ;Fix up quotient
+ sub ecx,ebx ;Restore shift count
+ test esi,1 ;Is octant odd?
+ jz TrigShortNorm ;No, go normalize
+ xor ebx,ebx ;Restore old exponent difference (zero)
+SubPiOver4:
+;We are here if exponent difference was zero and octant is odd.
+;As noted above, we need to reduce the angle by a multiple of pi/2,
+;not pi/4. We will subtract one more pi/4, which will make the
+;result range [-pi/4, pi/4].
+ sub edi,XPiLo
+ sbb eax,XPiMid
+ sbb edx,XPiHi
+ jmp NegPremResult
+
+TrigPremZero:
+;High dword of remainder is all zero, so we've reduced exponent difference
+;by 32 bits and overshot. We need a quotient corresponding to exponent
+;difference of zero, so we just shift it by the original difference. Then
+;we need to normalize the rest of the remainder.
+ mov ecx,ebx ;Get exponent difference
+ shl esi,cl ;Fix up quotient
+ test esi,1 ;Is octant odd?
+ jnz SubPiOver4 ;Yes, go subtract another pi/4
+TrigNorm32:
+ bsr ecx,eax
+ jz TinyTrig
+ lea ebx,[ebx+ecx-31-32] ;Fix up exponent for normalization
+ mov edx,eax
+ mov eax,edi ;Shift left by 32 bits
+ not cl ;Convert bit no. to shift count
+ shld edx,eax,cl ;Normalize remainder
+ shl eax,cl
+ jmp ExitTrigPrem
+
+TinyTrig:
+;Upper 64 bits of remainder are all zero. We are assured that the extended
+;remainder is never zero, though.
+ mov edx,edi ;Shift left 64 bits
+ bsr ecx,edi
+ lea ebx,[ebx+ecx-31-64] ;Fix up exponent for normalization
+ not cl ;Convert bit no. to shift count
+ shl edx,cl ;Normalize
+ jmp ExitTrigPrem
+
+;*******************************************************************************
+
+EM_ENTRY eFCOS
+eFCOS:
+ and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
+ cmp EMSEG:[edi].bTAG,bTAG_ZERO
+ jz ReturnOne
+ call Trig1Result
+;ebx:esi,ecx = reduced argument
+;eax = octant
+ mov ch,80H ;Assume negative
+ test al,110B ;Negative in octants 2 - 5
+ jpo @F ;Occurs when 1 of these bits are set
+ xor ch,ch ;Actually positve
+@@:
+ test al,011B ;Look for octants 0,3,4,7
+ jpo TakeSine ;Use sine if not
+TakeCosine:
+ cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
+ jl CosReturnOne ;cos(x) = 1 for tiny x
+CosNotTiny:
+ mov edi,offset tCosPoly
+;Note that argument needs to be saved in ArgTemp (by EvalPolySetup) in case
+;we were called from eFSINCOS and we'll need the arg for the sine. Argument
+;is not needed for cosine, however (just its square).
+ call EvalPolySetup ;In emftran.asm
+ mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out
+TransUnround:
+;The last operation performed a simple round nearest, without setting the
+;C1 status bit if round up occured. We reverse this last rounding now
+;so we can do the user's selected rounding mode. We also ensure that
+;the answer is never exact.
+ sub eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
+ jz UnroundExact ;Answer looks exact, but it's not
+ sbb esi,0
+ sbb ebx,0
+ jns PolyDropExponent ;We had rounded up exponent too
+FinalTransRound:
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow. Unmasked underflow requires [RoundMode] be set.
+ mov edx,EMSEG:[TransRound]
+ mov EMSEG:[RoundMode],edx
+ call edx ;Perform user's rounding
+RestoreRound:
+;Restore rounding vectors
+ mov EMSEG:[ZeroVector],offset SaveResult
+ mov eax,EMSEG:[SavedRoundMode]
+ mov EMSEG:[RoundMode],eax
+ ret
+
+UnroundExact:
+ inc eax ;Let's say our answer is a bit small
+ jmp FinalTransRound
+
+PolyDropExponent:
+ sub ecx,1 shl 16 ;Decrement exponent
+ or ebx,1 shl 31 ;Set MSB
+ jmp FinalTransRound
+
+
+SinRet:
+ ret
+
+SaveTinySin:
+;Argument in ebx:esi,ecx is small enough so that sin(x) = x, which happens
+;when x - x^3/6 = x [or 1 - x^2/6 = 1]. Note that the infinitely precise
+;result is slightly less than the argument. To get the correct answer for
+;any rounding mode, we decrement the argument and set up for rounding.
+ mov eax,-1 ;Set up rounding bits
+ sub esi,1
+ sbb ebx,0 ;Drop mantissa by one
+ js FinalTransRound ;Still normalized?
+;mantissa must have been 800..000H, set it to 0FFF...FFFH and drop exponent
+ mov ebx,eax ;ebx = -1
+ sub ecx,1 shl 16 ;Drop exponent by one
+ jmp FinalTransRound
+
+
+EM_ENTRY eFSIN
+eFSIN:
+ and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
+ cmp EMSEG:[edi].bTAG,bTAG_ZERO
+ jz SinRet ;Return zero for zero argument
+ call Trig1Result
+ mov ch,al
+ shl ch,7-2 ;Move bit 2 to bit 7 as sign bit
+ReducedSine:
+;ebx:esi,ecx = reduced argument
+;ch = correct sign
+;eax = octant
+ test al,011B ;Look for octants 0,3,4,7
+ jpo TakeCosine ;Use cosine if not
+TakeSine:
+ cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
+ jl SaveTinySin ;sin(x) = x for tiny x
+
+;The polynomial for sine is sin(x) = x * P(x^2). However, the degree zero
+;coefficient of P() is 1, so P() = R() + 1, where R() has no degree zero
+;term. Thus sin(x) = x * [R(x^2) + 1] = x * R(x^2) + x.
+;
+;What's important here is that adding 1 to R(x^2) can blow away a lot of
+;precision just before we do that last multiply by x. Note that x < pi/4 < 1,
+;so that x^2 is often << 1. The precision is lost when R(x^2) is shifted
+;right to align its binary point with 1.0. This can cause a loss of at
+;least 1 bit of precision after the final multiply by x in addition to
+;rounding errors.
+;
+;To avoid this precision loss, we use the alternate form given above,
+;sin(x) = x * R(x^2) + x. Instead of adding 1.0 and multiplying by x,
+;we multiply by x and add x--exactly the same level of difficulty. But
+;the mulitply has all of R(x^2)'s precision available.
+;
+;Because the polynomial R() has no zero-degree term, we give EvalPoly
+;one degree less (so we don't have to add zero as the last term).
+;Then we have to multiply once more by x^2 since we left the loop early.
+
+SineNotTiny:
+ mov edi,offset tSinPoly
+ call EvalPolySetup ;In emftran.asm
+SineFinish:
+
+ifdef NT386
+ mov edi,YFloatTemp
+else
+ mov edi,offset edata:FloatTemp
+endif
+ call PolyMulDouble ;Last coefficient in R(x^2)
+
+ifdef NT386
+ mov edi,YArgTemp ;Point to original x
+else
+ mov edi,offset edata:ArgTemp ;Point to original x
+endif
+
+ call PolyMulDouble ;Compute x * R(x^2)
+
+ifdef NT386
+ mov edi,YArgTemp ;Point to original x
+else
+ mov edi,offset edata:ArgTemp ;Point to original x
+endif
+
+ push offset TransUnround
+ jmp PolyAddDouble ;Compute x * R(x^2) + x
+
+
+EM_ENTRY eFPTAN
+eFPTAN:
+ and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
+ call Trig2Result
+ push offset TanPushOne ; Push 1.0 when we're all done
+;ebx:esi,ecx = reduced argument
+;eax = octant
+ mov ch,al
+ shl ch,7-1 ;Move bit 1 to bit 7 as sign bit
+;Note that ch bit 6 now has even/odd octant, which we'll need when we're
+;done to see if we should take reciprocal.
+ cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
+ jl TinyTan
+ mov edi,offset tTanPoly
+ call Eval2Poly ;In emftran.asm
+ mov edi,EMSEG:[CURstk] ;Point to first result
+ push offset TransUnround ;Return address of divide
+ test EMSEG:[ArgTemp].bSgn,0C0H ;Check low 2 bits of octant
+;Given the reduced input range, the result can never overflow or underflow.
+;It is must then be safe to assume neither operand is zero.
+ jpe DivDouble ;Tan() octants 0,3,4,7
+ jmp DivrDouble ;CoTan()
+
+TinyTan:
+ test ch,0C0H ;Check low 2 bits of octant
+ jpe SaveTinySin ;Octants 0,3,4,7: tan(x) = x for tiny x
+;Need reciprocal of reduced argument
+ mov edi,esi
+ mov esi,ebx ;Mantissa in esi:edi
+ mov ebx,ecx ;ExpSgn to ebx
+ mov edx,1 shl 31 ;Load 1.0
+ xor eax,eax
+.erre TexpBias eq 0
+ xor ecx,ecx ;Sign and exponent are zero
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+ push offset TransUnround ;Return address of divide
+;Note that this can never overflow, because the reduced argument is never
+;smaller than about 2^-65.
+ jmp DivDoubleReg
+
+
+PrevStackWrap edi,Tan ;Tied to PrevStackElem below
+
+TanPushOne:
+ PrevStackElem edi,Tan ;edi points to second result location
+ mov EMSEG:[CURstk],edi
+ReturnOne:
+ mov EMSEG:[edi].lManLo,0
+ mov EMSEG:[edi].lManHi,1 shl 31
+ mov EMSEG:[edi].ExpSgn,(0-TexpBias) shl 16 + bTAG_SNGL
+ ret
+
+
+PrevStackWrap edi,SinCos ;Tied to PrevStackElem below
+
+eFSINCOS:
+ and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
+ call Trig2Result
+;Figure out signs
+ mov ch,al ;Start with sign of sine
+ shl ch,7-2 ;Move bit 2 to bit 7 as sign bit
+ mov ah,80H ;Assume sign of cosine is negative
+ test al,110B ;Negative in octants 2 - 5
+ jpo @F ;Occurs when 1 of these bits are set
+ xor ah,ah ;Actually positve
+@@:
+;ch = sign of sine
+;ah = sign of cosine
+ cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
+ jl TinySinCos
+ push eax ;Save octant and sign of cosine
+ call ReducedSine ;On exit, edi = [CURstk]
+ pop eax
+;The Sin() funcion restored the rounding vectors to normal. Set them back.
+ mov EMSEG:[RoundMode],offset PolyRound
+ mov EMSEG:[ZeroVector],offset PolyZero
+ PrevStackElem edi,SinCos ;edi points to second result location
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[Result],edi
+;Load x^2 back into registers
+ mov ecx,EMSEG:[FloatTemp].ExpSgn
+ mov ebx,EMSEG:[FloatTemp].lManHi
+ mov esi,EMSEG:[FloatTemp].lManLo
+ mov EMSEG:[ArgTemp].bSgn,ah ;Save sign
+ test al,011B ;Look for octants 0,3,4,7
+ jpo FastSine ;Use sine if not
+ mov edi,offset tCosPoly
+ call EvalPoly ;In emftran.asm
+ mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out
+ jmp TransUnround
+
+FastSine:
+ mov edi,offset tSinPoly
+ push offset SineFinish
+ jmp EvalPoly ;In emftran.asm
+
+TinySinCos:
+;ch = sign of sine
+;ah = sign of cosine
+;ebx:esi,high ecx = reduced argument
+;edi = [CURstk]
+ test al,011B ;Look for octants 0,3,4,7
+ jpo TinyCosSin ;Take cosine first if not
+ push eax
+ call SaveTinySin ;For sine, arg is result
+ pop ecx
+;edi = [CURstk]
+;ch = sign of cosine
+;Set cosine to 1.0
+ PrevStackElem edi,TinySinCos ;edi points to second result location
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[Result],edi
+CosReturnOne:
+;Cosine is nearly equal to 1.0. Put in next smaller value and round it.
+ mov ebx,-1
+ mov esi,ebx ;Set mantissa to -1
+ mov eax,ebx ;Set up rounding bits
+.erre TexpBias eq 0
+ and ecx,bSign shl 8 ;Keep only sign
+ sub ecx,1 shl 16 ;Exponent of -1
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow. Unmasked underflow requires [RoundMode] be set.
+ jmp EMSEG:[TransRound]
+
+ PrevStackWrap edi,TinySinCos
+
+ PrevStackWrap edi,TinyCosSin
+
+TinyCosSin:
+;Sine is nearly 1.0, cosine is argument
+;
+;ch = sign of sine
+;ah = sign of cosine
+;ebx:esi,high ecx = reduced argument
+;edi = [CURstk]
+ xchg ah,ch ;Cosine sign to ch, sine sign to ah
+ push edi ;Save place for sine
+ PrevStackElem edi,TinyCosSin ;edi points to second result location
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[Result],edi
+ push eax
+ call SaveTinySin ;For sine, arg is result
+ pop ecx
+;ch = sign of sine
+ pop EMSEG:[Result] ;Set up location for sine
+ jmp CosReturnOne
+
+;*******************************************************************************
+
+;********************* Polynomial Coefficients *********************
+
+;These polynomial coefficients were all taken from "Computer Approximations"
+;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
+;conversions to hexadecimal were done with a character-string calculator
+;written in Visual Basic with precision set to 30 digits. Once the constants
+;were typed into this file, all transfers were done with cut-and-paste
+;operations to and from the calculator to help eliminate any typographical
+;errors.
+
+
+tCosPoly label word
+
+;These constants are derived from Hart #3824: cos(x) = P(x^2),
+;accurate to 19.45 digits over interval [0, pi/4]. The original
+;constants in Hart required that the argument x be divided by pi/4.
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by a power of 4/pi.
+;The power is given in the table.
+
+ dd 7 ;Degree seven
+
+; Original Hart constant power Scaled constant
+;
+;-0.38577 62037 2 E-12 14 -0.113521232057839395845871741043E-10
+;Hex value: 0.C7B56AF786699CF1BD13FD290 HFFDC
+ dq 0C7B56AF786699CF2H
+ dw (bSign shl 8)+bTAG_VALID,0FFDCH-1
+
+;+0.11500 49702 4263 E-9 12 +0.208755551456778828747793797596E-8
+;Hex value: 0.8F74AA3CCE49E68D6F5444A18 HFFE4
+ dq 08F74AA3CCE49E68DH
+ dw bTAG_VALID,0FFE4H-1
+
+;-0.24611 36382 63700 5 E-7 10 -0.275573128656960822243472872247E-6
+;Hex value: 0.93F27B7F10CC8A1703EFC8A04 HFFEB
+ dq 093F27B7F10CC8A17H
+ dw (bSign shl 8)+bTAG_VALID,0FFEBH-1
+
+;+0.35908 60445 88581 953 E-5 8 +0.248015872828994630247806807317E-4
+;Hex value: 0.D00D00CD6BB3ECD17E10D5830 HFFF1
+ dq 0D00D00CD6BB3ECD1H
+ dw bTAG_VALID,0FFF1H-1
+
+;-0.32599 18869 26687 55044 E-3 6 -0.138888888888589604343951947246E-2
+;Hex value: 0.B60B60B609B165894CFE522AC HFFF7
+ dq 0B60B60B609B16589H
+ dw (bSign shl 8)+bTAG_VALID,0FFF7H-1
+
+;+0.15854 34424 38154 10897 54 E-1 4 +0.416666666666664302573692446873E-1
+;Hex value: 0.AAAAAAAAAAA99A1AF53042B08 HFFFC
+ dq 0AAAAAAAAAAA99A1BH
+ dw bTAG_VALID,0FFFCH-1
+
+;-0.30842 51375 34042 45242 414 E0 2 -0.499999999999999992843582920899E0
+;Hex value: 0.FFFFFFFFFFFFFEF7F98D3BFA8 HFFFF
+ dq 0FFFFFFFFFFFFFEF8H
+ dw (bSign shl 8)+bTAG_VALID,0FFFFH-1
+
+;+0.99999 99999 99999 99996 415 E0 0 (no change)
+;Hex value 0.FFFFFFFFFFFFFFFF56B402618 H0
+ dq 0FFFFFFFFFFFFFFFFH
+ dw bTAG_VALID,00H-1
+
+
+tSinPoly label word
+
+;These constants are derived from Hart #3044: sin(x) = x * P(x^2),
+;accurate to 20.73 digits over interval [0, pi/4]. The original
+;constants in Hart required that the argument x be divided by pi/4.
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by a power of 4/pi.
+;The power is given in the table.
+
+ dd 7-1 ;Degree seven, but the last coefficient
+ ;is 1.0 and is not listed here.
+
+; Original Hart constant power Scaled constant
+;
+;-0.20225 31292 93 E-13 15 -0.757786788401271156262125540409E-12
+;Hex value: 0.D54C4AF2B524F0F2D6411C90A HFFD8
+ dq 0D54C4AF2B524F0F3H
+ dw (bSign shl 8)+bTAG_VALID,0FFD8H-1
+
+;+0.69481 52035 0522 E-11 13 +0.160583476232246065559545749398E-9
+;Hex value: 0.B0903AF085DA66030F16E43BC HFFE0
+ dq 0B0903AF085DA6603H
+ dw bTAG_VALID,0FFE0H-1
+
+;-0.17572 47417 61708 06 E-8 11 -0.250521047382673309542092418731E-7
+;Hex value: 0.D73229320D2AF05971AC96FF4 HFFE7
+ dq 0D73229320D2AF059H
+ dw (bSign shl 8)+bTAG_VALID,0FFE7H-1
+
+;+0.31336 16889 17325 348 E-6 9 +0.275573192133901687156480447942E-5
+;Hex value: 0.B8EF1D2984D2FBA28A9CC9DEE HFFEE
+ dq 0B8EF1D2984D2FBA3H
+ dw bTAG_VALID,0FFEEH-1
+
+;-0.36576 20418 21464 00052 9 E-4 7 -0.198412698412531058609618529749E-3
+;Hex value: 0.D00D00D00C3FDDD7916E5CB28 HFFF4
+ dq 0D00D00D00C3FDDD8H
+ dw (bSign shl 8)+bTAG_VALID,0FFF4H-1
+
+;+0.24903 94570 19271 62752 519 E-2 5 +0.83333333333333203341753387264E-2
+;Hex value: 0.8888888888884C95D619A0343 HFFFA
+ dq 08888888888884C96H
+ dw bTAG_VALID,0FFFAH-1
+
+;-0.80745 51218 82807 81520 2582 E-1 3 -0.166666666666666666281276062229E0
+;Hex value: 0.AAAAAAAAAAAAAA8E3AD80EAB8 HFFFE
+ dq 0AAAAAAAAAAAAAA8EH
+ dw (bSign shl 8)+bTAG_VALID,0FFFEH-1
+
+;+0.78539 81633 97448 30961 41845 E0 1 +0.99999999999999999999812025812E0
+;Hex value: 0.FFFFFFFFFFFFFFFFF71F88110 H0
+; dq 8000000000000000H ;This constant of 1.0 omitted here.
+; dw bTAG_VALID,0 ; It is handled in code.
+
+
+tTanPoly label word
+
+;These constants are derived from Hart #4286: tan(x) = x * P(x^2) / Q(x^2),
+;accurate to 19.94 digits over interval [0, pi/4]. The original
+;constants in Hart required that the argument x be divided by pi/4.
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by the same power of 4/pi
+;as the power of x the constant is used on. However, the highest
+;degree coefficient of Q() is 1, and after scaling this way it would
+;become (4/pi)^8. In order to keep this coefficient equal to one,
+;we scale everything again by (pi/4)^8. This scaling is partially
+;canceled by the original scaling by powers of 4/pi, and the net
+;resulting power of pi/4 is given in the table.
+
+
+ dd 3 ;First poly is degree 3
+
+; Original Hart constant power Scaled constant
+;
+;-.45649 31943 86656 31873 96113 7 E2 1 -35.8528916474714232910463077546
+;Hex value: 0.8F695C6D93AF6F97B6E022AB3 H6
+ dq 08F695C6D93AF6F98H
+ dw (bSign shl 8)+bTAG_VALID,06H-1
+
+;+.14189 85425 27617 78388 00394 831 E5 3 +6874.60229709782436592720603503
+;Hex value: 0.D6D4D181240D0D08C88DF4AA6 HD
+ dq 0D6D4D181240D0D09H
+ dw bTAG_VALID,0DH-1
+
+;-.89588 84400 67680 41087 29639 541 E6 5 -267733.884797157298951145495276
+;Hex value: 0.82BABC504220C62B1D0722684 H13
+ dq 082BABC504220C62BH
+ dw (bSign shl 8)+bTAG_VALID,013H-1
+
+;+.10888 60043 72816 87521 38857 983 E8 7 +2007248.9111748838841548144685
+;Hex value: 0.F506874A160EB9C0994AADD6A H15
+ dq 0F506874A160EB9C1H
+ dw bTAG_VALID,015H-1
+
+
+
+ dd 4 ;Second poly is degree 4
+;NOTE: Eval2Poly assumes the first coefficient is 1.0, so it is omitted
+
+; Original Hart constant power Scaled constant
+;
+;-.10146 56190 25288 53387 54401 947 E4 2 -625.890950057027419879480354834
+;Hex value: 0.9C790553635355A95241A5324 HA
+ dq 09C790553635355A9H
+ dw (bSign shl 8)+bTAG_VALID,0AH-1
+
+;+.13538 27128 05119 09382 89294 872 E6 4 +51513.6992033752080924797647367
+;Hex value: 0.C939B2FEFE0DC585E649870FE H10
+ dq 0C939B2FEFE0DC586H
+ dw bTAG_VALID,010H-1
+
+;-.39913 09518 03516 51504 43427 94 E7 6 -936816.855188785264866481436899
+;Hex value: 0.E4B70DAEDA6F89E5A7CE626FA H14
+ dq 0E4B70DAEDA6F89E6H
+ dw (bSign shl 8)+bTAG_VALID,014H-1
+
+;+.13863 79666 35676 29165 33913 361 E8 8 +2007248.91117488388417770850458
+;Hex value: 0.F506874A160EB9C0CCD8313BC H15
+ dq 0F506874A160EB9C1H
+ dw bTAG_VALID,015H-1
diff --git a/private/ntos/dll/i386/emulator.asm b/private/ntos/dll/i386/emulator.asm
new file mode 100644
index 000000000..73a2c1c36
--- /dev/null
+++ b/private/ntos/dll/i386/emulator.asm
@@ -0,0 +1,267 @@
+ page 78,132
+ title emulator - 80387 emulator for flat 32-bit OS
+;*******************************************************************************
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;emulator.asm - 80387 emulator
+; by Tim Paterson
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+; [] 11/13/92 JWM Bug fixes for esp-indexed addressing, handling of denormals.
+; [] 01/18/93 JWM Bug fixes for preservation of condition & error codes.
+;
+;*******************************************************************************
+
+ .386p
+ .387
+ .model flat,Pascal
+ option oldstructs ;JWM
+
+;*******************************************************************************
+;
+; Define segments.
+;
+;*******************************************************************************
+
+
+;These equates give access to the program that's using floating point.
+dseg equ ss ;Segment of program's data
+cseg equ es ;Segment of program's code
+
+edata segment dword public 'FAR_DATA'
+edata ends
+
+ecode segment dword public 'CODE'
+ecode ends
+
+
+assume cs:ecode
+
+ifdef NT386
+assume ds:nothing
+assume fs:edata
+else
+assume ds:edata
+assume fs:nothing
+endif
+
+assume es:nothing
+assume gs:nothing
+assume ss:nothing
+
+ifdef NT386
+ include ks386.inc
+ include nt386npx.inc
+ include callconv.inc
+ include ..\..\vdm\i386\vdmtb.inc
+endif ; NT386
+
+;*******************************************************************************
+;
+; List external functions.
+;
+;*******************************************************************************
+
+ifdef NT386
+ EXTRNP _NtRaiseException,3
+ EXTRNP _RtlRaiseStatus,1
+ EXTRNP _ZwRaiseException,3
+ EXTRNP _NpxNpSkipInstruction,1
+endif ; NT386
+
+ifdef _DOS32EXT
+ extern _SelKrnGetEmulData:NEAR
+ extern DOS32RAISEEXCEPTION:NEAR
+endif ; _DOS32EXT
+
+ifdef _CRUISER
+ extern DOS32IRAISEEXCEPTION:near
+endif ; CRUISER
+
+
+;*******************************************************************************
+;
+; Segment override macro (for NT)
+;
+;*******************************************************************************
+
+ifdef NT386
+ EMSEG EQU FS
+else
+ EMSEG EQU DS
+endif
+
+;;*******************************************************************************
+;;
+;; Include some more macros and constants.
+;;
+;;*******************************************************************************
+;
+ include em387.inc
+ include emstack.inc ; stack management macros
+;**************************************************************************
+;**************************************************************************
+;**************************************************************************
+subttl emulator.asm - Emulator Task DATA Segment
+page
+;*********************************************************************;
+; ;
+; Emulator Task DATA Segment ;
+; ;
+;*********************************************************************;
+
+edata segment
+
+ifdef NT386
+ db size EmulatorTebData dup (?) ; Make space for varibles
+else ; ifdef NT386
+
+Numlev equ 8 ; Number of stack registers
+
+InitControlWord equ 37FH ; Default - Round near,
+ ; 64 bits, all exceptions masked
+
+RoundMode dd ? ;Address of rounding routine
+SavedRoundMode dd ? ;For restoring RoundMode
+ZeroVector dd ? ;Address of sum-to-zero routine
+TransRound dd ? ;Round mode w/o precision
+Result dd ? ;Result pointer
+
+PrevCodeOff dd ?
+PrevDataOff dd ?
+
+(See note below on 'Emulator stack area')
+CURstk dd ?
+
+XBEGstk db (Numlev-1)*Reg87Len dup(?) ;Allocate register 1 - 7
+
+BEGstk EQU offset edata:XBEGstk
+INITstk EQU offset edata:XINITstk
+ENDstk EQU offset edata:XENDstk
+
+FloatTemp db Reg87Len dup(?)
+ArgTemp db Reg87Len dup(?)
+
+public Trap7Handler
+Trap7Handler dd 0
+
+;We're DWORD aligned at this point
+
+LongStatusWord label dword ;Combined Einstall, CURerr, StatusWord
+.erre Einstall eq $
+.erre StatusWord eq $+1
+.erre CURerr eq $+3
+
+Einstall db 0 ; Emulator installed flag
+
+StatusWord label word
+ SWerr db ? ; Initially no exceptions (sticky flags)
+CurErrCond label word ; Combined error and condition codes
+ SWcc db ? ; Condition codes from various operations
+
+ CURerr db ? ; initially 8087 exception flags clear
+ ; this is the internal flag reset after
+ ; each operation to detect per instruction
+ ; errors
+
+LongControlWord label dword ;Combined ControlWord and ErrMask
+.erre ControlWord eq $
+.erre ErrMask eq $+2
+
+ControlWord label word
+ CWmask db ? ; exception masks
+ CWcntl db ? ; arithmetic control flags
+
+ ErrMask db ?
+ dummy db ?
+
+endif ; ifdef NT386 else
+
+;*******************************************************************************
+;
+; Emulator stack area
+;
+;The top of stack pointer CURstk is initialized to the last register
+;in the list; on a real 8087, this corresponds to hardware register 0.
+;The stack grows toward lower addresses, so the first push (which is
+;hardware register 7) is stored into the second-to-last slot. This gives
+;the following relationship between hardware registers and memory
+;locations:
+;
+; BEGstk --> | reg 1 | (lowest memory address)
+; | reg 2 |
+; | reg 3 |
+; | reg 4 |
+; | reg 5 |
+; | reg 6 |
+; | reg 7 |
+; | reg 0 | <-- Initial top of stack (empty)
+; ENDstk -->
+;
+;This means that the wrap-around case on decrementing CURstk will not
+;occur until the last (8th) item is pushed.
+;
+;Note that the physical register numbers are only used in regard to
+;the tag word. All other operations are relative the current top.
+
+
+edata ends
+
+subttl emulator.asm
+page
+;*********************************************************************;
+; ;
+; Start of Code Segment ;
+; ;
+;*********************************************************************;
+
+
+ecode segment
+
+ public __fpemulatorbegin
+__fpemulatorbegin equ $ ; emulator really starts here
+
+ include emfinit.asm
+ include emerror.asm ; error handler
+ include emdisp.asm ; dispatch tables
+
+ include emf386.asm ; Flat 386 emulation entry
+ include emdecode.asm ; instruction decoder
+
+ include emarith.asm ; arithmetic dispatcher
+ include emfadd.asm ; add and subtract
+ include emfmul.asm ; multiply
+ include emfdiv.asm ; division
+ include emround.asm ; rounding
+ include emload.asm ; load memory operands
+ include emstore.asm ; store memory operands
+ include emfmisc.asm ; miscellaneous instructions
+ include emfcom.asm ; compare
+ include emfconst.asm ; constant loading
+ include emlsbcd.asm ; packed BCD conversion
+ include emxtract.asm ; xtract and scale
+ include emfprem.asm ; partial remainder
+ include emtrig.asm ; trig instructions
+ include emftran.asm ; transcendentals
+ include emlsenv.asm
+ include emfsqrt.asm ; square root
+ifndef NT386
+ include emccall.asm
+endif
+
+UNUSED:
+eFSETPM:
+eFNOP:
+eFENI:
+eFDISI:
+ ret ;Return to EMLFINISH
+
+
+ public __fpemulatorend
+__fpemulatorend equ $ ; emulator ends here
+
+ecode ends
+END
diff --git a/private/ntos/dll/i386/emxtract.asm b/private/ntos/dll/i386/emxtract.asm
new file mode 100644
index 000000000..ac8f7a6d1
--- /dev/null
+++ b/private/ntos/dll/i386/emxtract.asm
@@ -0,0 +1,309 @@
+ subttl emxtract - FXTRACT and FSCALE instructions
+ page
+;*******************************************************************************
+;emxtract - FXTRACT and FSCALE instructions
+;
+; Microsoft Confidential
+;
+; Copyright (c) Microsoft Corporation 1991
+; All Rights Reserved
+;
+;Inputs:
+; edi = [CURstk]
+;
+;Revision History:
+;
+; [] 09/05/91 TP Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+XtractStackOver:
+ mov EMSEG:[SWcc],C1 ;Flag stack overflow
+XtractEmpty:
+;Result is two Indefinites (if exception masked)
+ call StackError ;Put first indefinite at [edi] = ST(0)
+ jz XtractExit ;Error was unmasked--just exit
+ mov EMSEG:[CURstk],edi
+ mov eax,EMSEG:[edi].ExpSgn
+ mov EMSEG:[esi].ExpSgn,eax
+ mov eax,EMSEG:[edi].lManHi
+ mov EMSEG:[esi].lManHi,eax
+ mov eax,EMSEG:[edi].lManLo
+ mov EMSEG:[esi].lManLo,eax
+ ret
+
+ PrevStackWrap edi,Xtract
+
+EM_ENTRY eFXTRACT
+eFXTRACT:
+;edi = [CURstk]
+ mov esi,edi ;Save current ST
+ PrevStackElem edi,Xtract
+;edi = ST(0)
+;esi = ST(1) (operand)
+ mov eax,EMSEG:[esi].ExpSgn
+;Exception priority requires reporting stack underflow (i.e., using an EMPTY)
+;before stack overflow (i.e., no place for result). Yes, both can happen
+;together if they've screwed with the stack! (ST empty when ST(-1) isn't).
+ cmp al,bTAG_EMPTY ;Is operand empty?
+ jz XtractEmpty
+ cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is there an empty spot?
+ jnz XtractStackOver
+ cmp al,bTAG_ZERO ;Is it special?
+ jae XtractSpclOrZero
+XtractNormal:
+ mov EMSEG:[CURstk],edi
+.erre TexpBias eq 0
+ movzx ebx,ax ;Zero exponent
+;Save mantissa in ST(0)
+ mov EMSEG:[edi].ExpSgn,ebx
+ mov ebx,EMSEG:[esi].lManHi
+ mov EMSEG:[edi].lManHi,ebx
+ mov ebx,EMSEG:[esi].lManLo
+ mov EMSEG:[edi].lManLo,ebx
+ mov edi,esi ;Save ST(1) pointer in edi
+ shr eax,16 ;Move exponent down
+ call NormInt16 ;in emload.asm
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+ mov EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].lManHi,ebx
+ mov EMSEG:[edi].ExpSgn,ecx
+XtractExit:
+ ret
+
+XtractSpcl:
+ cmp al,bTAG_INF
+ jz XtractInf
+ cmp al,bTAG_NAN
+ jz XtractNAN
+;Must be denormal. Change tag to VALID or SNGL.
+ cmp EMSEG:[esi].lManLo,0 ;Any bits in low half?
+.erre bTAG_VALID eq 1
+.erre bTAG_SNGL eq 0
+ setnz al ;if low half==0 then al=0 else al=1
+ mov EMSEG:[CURerr],Denormal
+ test EMSEG:[CWmask],Denormal ;Is it masked?
+ jnz XtractNormal ;If so, ignore denormalization
+ ret
+
+XtractSpclOrZero:
+ ja XtractSpcl
+;Operand is zero. Result is ST(0) = 0 (same sign), ST(1) = -infinity
+ mov EMSEG:[CURerr],ZeroDivide
+ test EMSEG:[CWmask],ZeroDivide ;Exception masked?
+ jz XtractExit
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].ExpSgn,eax
+ mov eax,EMSEG:[esi].lManHi
+ mov EMSEG:[edi].lManHi,eax
+ mov eax,EMSEG:[esi].lManLo
+ mov EMSEG:[edi].lManLo,eax
+ mov EMSEG:[esi].ExpSgn,(IexpMax-IexpBias+TexpBias) shl 16 + bSign shl 8 + bTAG_INF
+ mov EMSEG:[esi].bMan7,80H ;Change zero to infinity
+ ret
+
+XtractInf:
+;Result is ST(0) = infinity (same sign), ST(1) = +infinity
+ mov EMSEG:[esi].bSgn,0 ;Ensure ST(1) is positive
+XtractQNAN:
+ mov EMSEG:[CURstk],edi
+ mov EMSEG:[edi].ExpSgn,eax
+ mov eax,EMSEG:[esi].lManHi
+ mov EMSEG:[edi].lManHi,eax
+ mov eax,EMSEG:[esi].lManLo
+ mov EMSEG:[edi].lManLo,eax
+ ret
+
+XtractNAN:
+;Result is two QNANs, signal Invalid Operation if SNAN
+ test EMSEG:[esi].bMan7,40H ;Is it SNAN?
+ jnz XtractQNAN
+ mov EMSEG:[CURerr],Invalid
+ test EMSEG:[CWmask],Invalid
+ jz XtractExit
+ or EMSEG:[esi].bMan7,40H ;Change to QNAN
+ jmp XtractQNAN
+
+;*******************************************************************************
+;
+;FSCALE instruction
+
+;Actual instruction entry point is in emarith.asm
+
+;Dispatch table for scale
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").
+;
+;Tag of source is shifted. Tag values are as follows:
+
+.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
+.erre TAG_VALID eq 1
+.erre TAG_ZERO eq 2
+.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFscaleDisp label dword ;Source (reg) Dest (*[di] = ST)
+ dd ScaleDouble ;single single
+ dd ScaleDouble ;single double
+ dd ScaleX ;single zero
+ dd ScaleSpclDest ;single special
+ dd ScaleDouble ;double single
+ dd ScaleDouble ;double double
+ dd ScaleX ;double zero
+ dd ScaleSpclDest ;double special
+ dd ScaleX ;zero single
+ dd ScaleX ;zero double
+ dd ScaleX ;zero zero
+ dd ScaleSpclDest ;zero special
+ dd ScaleSpclSource ;special single
+ dd ScaleSpclSource ;special double
+ dd ScaleSpclSource ;special zero
+ dd TwoOpBothSpcl ;special special
+ dd ScaleTwoInf ;Two infinites
+
+
+;The unmasked response to overflow and underflow with FSCALE is complicated
+;by the extreme range it can generate. Normally, the exponent is biased
+;by 24,576 in the appropriate direction to bring it back into range.
+;This may not be enough, however. If it isn't, a result of infinity
+;(with the correct sign) is returned for overflow, regardless of the
+;rounding mode. For underflow, zero (with the correct sign) is returned,
+;even if it could be represented as a denormal. This may be the only
+;operation in which the unmasked response destroys the operands beyond
+;recovery.
+
+BigScale:
+;Scale factor is much too big. Just shift mantissa right two bits to get
+;MSB out of sign bit and ensure no overflow when we add.
+ mov cl,2 ;Always shift 2 bits if it's big
+ jmp ScaleCont
+
+ScaleDouble:
+;ebx:esi = ST(1) mantissa
+;ecx = ST(1) sign in bit 15, exponent in high half
+;edi = pointer to ST(0)
+ rol ecx,16 ;Bring exponent down, sign to top
+ or ch,ch ;Check sign of exponent
+ js ScaleX ;No work if less than zero
+ cmp cx,30 ;Scale factor exceed 30 bits?
+ jge BigScale
+ not cl ;cl = amount to shift right (mod 32)
+ScaleCont:
+ shr ebx,cl ;ebx = exponent adjustment for ST(0)
+;Use two's complement if negative (complement and increment)
+ mov eax,ecx
+ cdq ;Extend sign through edx
+ xor ebx,edx ;Complement if negative
+ sub ebx,edx ;Increment if negative
+;Scale exponent
+ movsx eax,EMSEG:[edi].wExp ;Get exponent to adjust
+ add eax,ebx ;Can't overflow
+ cmp eax,IexpMax-IexpBias ;Within normal range?
+ jge ScaleOverflow
+ cmp eax,IexpMin-IexpBias
+ jle ScaleUnderflow
+SaveScaledExp:
+;Result fit withing normal range
+ mov EMSEG:[edi].wExp,ax ;Update exponent of ST(0)
+ScaleX:
+ ret
+
+ScaleOverflow:
+;eax = exponent that's too big
+ mov EMSEG:[CURerr],Overflow
+ test EMSEG:[CWmask],Overflow ;Is exception unmasked?
+ jz UnmaskedScaleOvfl
+;Produce masked overflow response
+ mov al,EMSEG:[CWcntl] ;Get rounding control
+ mov ah,al
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre RCchop eq RCup + RCdown ;Always return max value
+.erre RCnear eq 0 ;Never return max value
+ sar ch,7 ;Expand sign through whole byte
+.erre (RCdown and bSign) eq 0 ;Don't want to change real sign
+ xor ch,RCdown ;Flip sign for RCdown bit
+ and ah,ch ;RCup & sign OR RCdown & not sign
+ jz ScaleToInfinity ;Save Infinity
+;Get max value
+ sub ecx,1 shl 16 ;Drop exponent by 1
+ xor esi,esi
+ dec esi ;esi == -1
+ mov ebx,esi
+SaveScaleMax:
+ mov EMSEG:[edi].lManLo,esi
+ mov EMSEG:[edi].lManHi,ebx
+ mov EMSEG:[edi].ExpSgn,ecx
+ ret
+
+UnmaskedScaleOvfl:
+ sub eax,UnderBias ;Unmasked response
+ cmp eax,IexpMax-IexpBias ;Within normal range now?
+ jl SaveScaledExp ;Use exponent biased by 24K
+ScaleToInfinity:
+ mov ebx,1 shl 31
+ xor esi,esi
+ mov ecx,(IexpMax-IexpBias+TexpBias) shl 16 + bTAG_INF
+ mov ch,EMSEG:[edi].bSgn ;Give it same sign
+ jmp SaveScaleMax ;Use infinity
+
+ScaleUnderflow:
+;eax = exponent that's too big
+ test EMSEG:[CWmask],Underflow ;Is exception unmasked?
+ jz ScaleSetUnder
+ cmp eax,-32768 ;Does exponent fit in 16 bits?
+ jg @F
+ mov ax,-32768 ;Max value
+@@:
+;Set up for denormalizer
+ mov ebx,EMSEG:[edi].lManHi
+ mov esi,EMSEG:[edi].lManLo
+ shrd ecx,eax,16 ;Move exponent to high end of ecx
+ mov ch,EMSEG:[edi].bSgn ;Keep sign
+ xor eax,eax ;No sticky bits
+ mov EMSEG:[Result],edi
+ jmp Denormalize ;In emround.asm
+
+ScaleSetUnder:
+;Underflow exception not masked. Adjust exponent and try again.
+ mov EMSEG:[CURerr],Underflow
+ add eax,UnderBias ;Unmasked response
+ cmp eax,IexpMin-IexpBias ;Within normal range now?
+ jg SaveScaledExp ;Use exponent biased by 24K
+ mov EMSEG:[CURerr],Underflow
+ScaleToZero:
+ mov ecx,bTAG_ZERO
+ mov ch,EMSEG:[edi].bSgn ;Give it same sign
+ xor ebx,ebx
+ mov esi,ebx
+ jmp SaveScaleMax ;Set to zero
+
+;***
+ScaleSpclDest:
+ mov al,EMSEG:[edi].bTag ;Pick up tag
+ cmp al,bTAG_INF ;Scaling infinity?
+ jz ScaleRet ;No change if so
+ jmp SpclDest ;In emarith.asm
+
+ScaleRet:
+ ret
+
+;***
+ScaleSpclSource:
+ cmp cl,bTAG_INF ;Scaling by infinity?
+ jnz SpclSource ;in emarith.asm
+ or ch,ch ;Scaling by -infinity?
+ js ScaleToZero
+ cmp EMSEG:[edi].bTag,bTAG_ZERO ;Zero scaled by +infinity?
+ jnz ScaleToInfinity
+ jmp ReturnIndefinite ;Invalid operation
+
+;***
+ScaleTwoInf:
+ or ch,ch ;Scaling by +infinity?
+ jns ScaleRet ;All done then
+;Scaling infinity by -infinity
+ jmp ReturnIndefinite ;Invalid operation
diff --git a/private/ntos/dll/i386/ldrthunk.asm b/private/ntos/dll/i386/ldrthunk.asm
new file mode 100644
index 000000000..617c0ef00
--- /dev/null
+++ b/private/ntos/dll/i386/ldrthunk.asm
@@ -0,0 +1,89 @@
+ title "LdrInitializeThunk"
+;++
+;
+; Copyright (c) 1989 Microsoft Corporation
+;
+; Module Name:
+;
+; ldrthunk.s
+;
+; Abstract:
+;
+; This module implements the thunk for the LdrpInitialize APC routine.
+;
+; Author:
+;
+; Steven R. Wood (stevewo) 27-Apr-1990
+;
+; Environment:
+;
+; Any mode.
+;
+; Revision History:
+;
+;--
+
+.386p
+ .xlist
+include ks386.inc
+include callconv.inc ; calling convention macros
+ .list
+
+ EXTRNP _LdrpInitialize,3
+
+_TEXT SEGMENT DWORD PUBLIC 'CODE'
+ ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+ page , 132
+
+;++
+;
+; VOID
+; LdrInitializeThunk(
+; IN PVOID NormalContext,
+; IN PVOID SystemArgument1,
+; IN PVOID SystemArgument2
+; )
+;
+; Routine Description:
+;
+; This function computes a pointer to the context record on the stack
+; and jumps to the LdrpInitialize function with that pointer as its
+; parameter.
+;
+; Arguments:
+;
+; NormalContext - User Mode APC context parameter (ignored).
+;
+; SystemArgument1 - User Mode APC system argument 1 (ignored).
+;
+; SystemArgument2 - User Mode APC system argument 2 (ignored).
+;
+; Return Value:
+;
+; None.
+;
+;--
+
+cPublicProc _LdrInitializeThunk , 4
+
+NormalContext equ [esp + 4]
+SystemArgument1 equ [esp + 8]
+SystemArgument2 equ [esp + 12]
+Context equ [esp + 16]
+
+ lea eax,Context ; Calculate address of context record
+ mov NormalContext,eax ; Pass as first parameter to
+if DEVL
+ xor ebp,ebp ; Mark end of frame pointer list
+endif
+IFDEF STD_CALL
+ jmp _LdrpInitialize@12 ; LdrpInitialize
+ELSE
+ jmp _LdrpInitialize ; LdrpInitialize
+ENDIF
+
+stdENDP _LdrInitializeThunk
+
+_TEXT ends
+ end
diff --git a/private/ntos/dll/i386/npxemltr.def b/private/ntos/dll/i386/npxemltr.def
new file mode 100644
index 000000000..0ddc11dbc
--- /dev/null
+++ b/private/ntos/dll/i386/npxemltr.def
@@ -0,0 +1,17 @@
+; SCCSID = @(#)npxemltr.def 13.2 89/11/15
+;
+; IBM/Microsoft Confidential
+;
+; Copyright (c) IBM Corporation 1987, 1989
+; Copyright (c) Microsoft Corporation 1987, 1989
+;
+; All Rights Reserved
+LIBRARY npxemltr
+
+DESCRIPTION 'OS/2 V2.0 80387 emulator. Version 6.00.001'
+
+DATA NONE
+
+EXPORTS
+ NPXNPHandler
+ NPXFPDataSize
diff --git a/private/ntos/dll/i386/npxnp.c b/private/ntos/dll/i386/npxnp.c
new file mode 100644
index 000000000..745709033
--- /dev/null
+++ b/private/ntos/dll/i386/npxnp.c
@@ -0,0 +1,204 @@
+/*++
+
+Copyright (c) 1989 Microsoft Corporation
+
+Module Name:
+
+ npxnp.c
+
+Abstract:
+
+ This module contains support for non-Flat mode NPX faults when
+ the application has it's CR0_EM bit clear.
+
+Author:
+
+ Ken Reneris (kenr) 8-Dec-1994
+
+Environment:
+
+ User Mode only
+
+Revision History:
+
+--*/
+
+
+#include "csrdll.h"
+
+static UCHAR MOD16[] = { 0, 1, 2, 0 };
+static UCHAR MOD32[] = { 0, 1, 4, 0 };
+
+UCHAR
+NpxNpReadCSEip (
+ IN PCONTEXT Context
+ )
+#pragma warning(disable:4035)
+{
+ _asm {
+ push es
+ mov ecx, Context
+ mov eax, [ecx] CONTEXT.SegCs
+ mov es, ax
+ mov eax, [ecx] CONTEXT.Eip
+ inc dword ptr [ecx] CONTEXT.Eip ; Advance EIP
+ mov al, es:[eax]
+ pop es
+ }
+}
+#pragma warning(default:4035)
+
+
+VOID
+NpxNpSkipInstruction (
+ IN PCONTEXT Context
+ )
+/*++
+
+Routine Description:
+
+ This functions gains control when the system has no installed
+ NPX support, but the thread has cleared it's EM bit in CR0.
+
+ The purpose of this function is to move the instruction
+ pointer forward over the current NPX instruction.
+
+Enviroment:
+
+ 16:16 mode
+
+Arguments:
+
+Return Value:
+
+--*/
+{
+ BOOLEAN fPrefix;
+ UCHAR ibyte, Mod, rm;
+ UCHAR Address32Bits;
+ ULONG CallerCs;
+
+ Address32Bits = 0; // assume called from 16:16
+
+ //
+ // Lookup and determine callers default mode
+ //
+
+ CallerCs = Context->SegCs;
+ _asm {
+ mov eax, CallerCs
+ lar eax, eax
+ test eax, 400000h
+ jz short IsDefault16Bit
+
+ mov Address32Bits, 1
+
+IsDefault16Bit:
+ }
+
+ //
+ // No sense in using a try-except since we are not on the
+ // correct stack. A fault here could occur if the start
+ // of an NPX instruction is near the end of a selector, and the
+ // end of the instruction is past the selectors end. This
+ // would kill the app anyway.
+ //
+
+ //
+ // Read any instruction prefixes
+ //
+
+ fPrefix = TRUE;
+ while (fPrefix) {
+ ibyte = NpxNpReadCSEip(Context);
+
+ switch (ibyte) {
+ case 0x2e: // cs override, skip it
+ case 0x36: // ss override, skip it
+ case 0x3e: // ds override, skip it
+ case 0x26: // es override, skip it
+ case 0x64: // fs override, skip it
+ case 0x65: // gs override, skip it
+ case 0x66: // operand size override, skip it
+ break;
+
+ case 0x67:
+ // address size override
+ Address32Bits ^= 1;
+ break;
+
+ default:
+ fPrefix = FALSE;
+ break;
+ }
+ }
+
+ //
+ // Handle first byte of NPX instruction
+ //
+
+ if (ibyte == 0x9b) {
+
+ //
+ // FWait instruction - single byte opcode - all done
+ //
+
+ return;
+ }
+
+ if (ibyte < 0xD8 || ibyte > 0xDF) {
+
+ //
+ // Not an ESC instruction
+ //
+
+#if DBG
+ DbgPrint ("P5_FPU_PATCH: 16: Not NPX ESC instruction\n");
+#endif
+ return;
+ }
+
+ //
+ // Get ModR/M byte for NPX opcode
+ //
+
+ ibyte = NpxNpReadCSEip(Context);
+
+ if (ibyte > 0xbf) {
+ //
+ // Outside of ModR/M range for addressing, all done
+ //
+
+ return;
+ }
+
+ Mod = ibyte >> 6;
+ rm = ibyte & 0x7;
+ if (Address32Bits) {
+ Context->Eip += MOD32 [Mod];
+ if (Mod == 0 && rm == 5) {
+ // disp 32
+ Context->Eip += 4;
+ }
+
+ //
+ // If SIB byte, read it
+ //
+
+ if (rm == 4) {
+ ibyte = NpxNpReadCSEip(Context);
+
+ if (Mod == 0 && (ibyte & 7) == 5) {
+ // disp 32
+ Context->Eip += 4;
+ }
+ }
+
+ } else {
+ Context->Eip += MOD16 [Mod];
+ if (Mod == 0 && rm == 6) {
+ // disp 16
+ Context->Eip += 2;
+ }
+ }
+}
diff --git a/private/ntos/dll/i386/nt386npx.inc b/private/ntos/dll/i386/nt386npx.inc
new file mode 100644
index 000000000..b1095bc75
--- /dev/null
+++ b/private/ntos/dll/i386/nt386npx.inc
@@ -0,0 +1,40 @@
+NPX_CONTEXT_FULL EQU 00001000Fh ;/ Full context
+
+ContextFlags equ CsContextFlags
+ctx_env equ CsFloatSave
+ctx_stack equ CsFloatSave+FpRegisterArea ;need to change this puppy
+ctx_Cr0NpxState equ CsFloatSave+FpCr0NpxState
+ctx_SegGs equ CsSegGs
+ctx_SegFs equ CsSegFs
+ctx_SegEs equ CsSegEs
+ctx_SegDs equ CsSegDs
+ctx_RegEdi equ CsEdi
+ctx_RegEsi equ CsEsi
+ctx_RegEbp equ CsEbp
+ctx_RegEbx equ CsEbx
+ctx_RegEdx equ CsEdx
+ctx_RegEcx equ CsEcx
+ctx_RegEax equ CsEax
+ctx_RegEip equ CsEip
+ctx_SegCs equ CsSegCs
+ctx_EFlags equ CsEflags
+ctx_RegEsp equ CsEsp
+ctx_SegSs equ CsSegSs
+
+XCPT_FLOAT_INVALID_OPERATION EQU STATUS_FLOAT_INVALID_OPERATION
+XCPT_FLOAT_DENORMAL_OPERAND EQU STATUS_FLOAT_DENORMAL_OPERAND
+XCPT_FLOAT_DIVIDE_BY_ZERO EQU STATUS_FLOAT_DIVIDE_BY_ZERO
+XCPT_FLOAT_OVERFLOW EQU STATUS_FLOAT_OVERFLOW
+XCPT_FLOAT_UNDERFLOW EQU STATUS_FLOAT_UNDERFLOW
+XCPT_FLOAT_INEXACT_RESULT EQU STATUS_FLOAT_INEXACT_RESULT
+XCPT_FLOAT_STACK_CHECK EQU STATUS_FLOAT_STACK_CHECK
+
+ExceptionNum equ ErExceptionCode
+FHandlerFlags equ ErExceptionFlags
+NestedExceptionReportRecord equ ErExceptionRecord
+ExceptionAddress equ ErExceptionAddress
+CParameters equ ErNumberParameters
+
+
+Em87Busy equ 1
+Em87Idle equ 0
diff --git a/private/ntos/dll/i386/ntnapntr.asm b/private/ntos/dll/i386/ntnapntr.asm
new file mode 100644
index 000000000..3c1c425f3
--- /dev/null
+++ b/private/ntos/dll/i386/ntnapntr.asm
@@ -0,0 +1,449 @@
+;++
+;
+; Copyright (c) 1991 Microsoft Corporation
+;
+; Module Name:
+;
+; ntnap.asm
+;
+; Abstract:
+;
+; This module implements the system service dispatch procedure.
+; It also creates a "profile" of each service by counting and
+; timing calls.
+;
+; Author:
+;
+; Russ Blake (russbl) 22-Apr-1991
+;
+; Environment:
+;
+; User or kernel mode.
+;
+; Revision History:
+;
+;--
+
+include ks386.inc
+include callconv.inc ; calling convention macros
+include mac386.inc
+include ntnap.inc
+
+.386
+
+EXTRN _NapDllInit:near
+EXTRN _NapRecordInfo:near
+
+NapStart equ [ebp - 08h]
+NapEnd equ [ebp - 010h]
+NapServiceNum equ [ebp - 014h]
+
+NapLocalSize equ 4 * 5
+
+NapCalSrvNum equ 0FFFFFFFFh
+
+;++
+;
+; Routine Description:
+;
+; This routine is called to save registers during API profiling.
+; The objecttive is to preserve the caller's environment
+; while timing takes place and, once, while dll initialization
+; takes place. This routine svaes registers on the stack to
+; permit recursivce calls.
+;
+; There should be a matching call to NapRestoreRegs to restore
+; the registers.
+;
+; Arguments:
+;
+; All registers.
+;
+; Return Value:
+;
+; None. All registers are preserved on the stack.
+;
+;--
+
+
+.386p
+
+_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE'
+ ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+cPublicProc _NapSaveRegs
+
+ ;
+ ; This is how the stack looks like upon entering this routine:
+ ;
+ ; ---+----+----+----+----+----
+ ; | Return Address |
+ ; ---+----+----+----+----+----
+ ; esp+ esp+
+ ; 0 4
+ ;
+ ;
+ ; -> popping makes esp go ->
+ ; <- pushing makes esp go <-
+ ;
+
+ push ebp
+ mov ebp,esp ; Remember where we are during this stuff
+ ; ebp = Original esp - 4
+ push eax
+ push ebx
+ push ecx
+ push edx
+ push esi
+ push edi
+ pushfd
+ push ds
+ push es
+ push ss
+ push fs
+ push gs
+
+ mov eax,[ebp+4] ; Grab Return Address
+ push eax ; Put Return Address on Stack
+ mov ebp,[ebp+0] ; Restore original ebp
+
+ ;
+ ; This is how the stack looks like just before executing RET:
+ ;
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | Return Address | g s | f s |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; 0
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | s s | e s | d s |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; c
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | eflags | edi | esi |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | edx | ecx | ebx |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+----
+ ; | eax | original ebp | Return Address |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+----
+ ; was
+ ; ebp+
+ ; 0
+ ;
+
+ stdRET _NapSaveRegs
+
+stdENDP _NapSaveRegs
+
+cPublicProc _NapRestoreRegs,,near
+
+ ;
+ ; This is how the stack looks like upon entering this routine:
+ ;
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | Return Address | g s | f s |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; 0
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | s s | e s | d s |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; c
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | eflags | edi | esi |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; 18
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; | edx | ecx | ebx |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+
+ ; esp+
+ ; 24
+ ;
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+----
+ ; | eax | original ebp | Return Address |
+ ; +----+----+----+----+----+----+----+----+----+----+----+----+----
+ ; esp+ esp+ esp+
+ ; 30 34 38
+ ;
+ pop eax ; Get Return Address
+ push ebp ; Save a temporary copy of original BP
+ mov ebp,esp ; BP = Original SP + 4
+
+ mov [ebp+038h],eax ; Put Return Address on Stack
+ pop eax ; Get Original BP
+ mov [ebp+034h],eax ; Put it in the original BP place
+
+ pop gs
+ pop fs
+ pop ss
+ pop es
+ pop ds
+ popfd
+ pop edi
+ pop esi
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ pop ebp
+
+ stdRET _NapRestoreRegs
+
+stdENDP _NapRestoreRegs
+
+
+;++
+;
+; Routine Description:
+;
+; This routine is called by the initialization code in the
+; Nt Api Profiler to calibrate the cost of profiling.
+; It simulates the overhead of a profiled call to a system
+; service, but carefully avoids doing any of the normal
+; work associated with such a call.
+;
+; NOTE: This routine's code should exactly parallel that of
+; _NapDispatch, except for any operation normally
+; (i.e., when not profiling) executed to call a system service.
+; This amounts to an "int 2Eh" in the middle of the routine.
+;
+; Arguments:
+;
+; eax - Service Number of the routine being called. Must be -1
+; for all calls to this routine. The routine
+; _NapRecordInfo notes this value and discards
+; the call.
+;
+; edx - Pointer to the parameters to the Service; ignored by
+; this routine.
+;
+; Return Value:
+;
+; None.
+;
+;--
+
+
+cPublicProc _NapCalibrate , ,near
+
+
+ push ebp ; Locals: the value of
+ mov ebp, esp ; the perf counter before and
+ sub esp, NapLocalSize ; after the API call
+
+ mov eax, NapCalSrvNum ; special routine number
+ mov NapServicenum, eax ; is used for calibration
+ ; can't be passed in eax from
+ ; C routine, so load it here
+ ; save the service routine number
+
+
+ stdCall _NapSaveRegs ; save register state so call to
+ ; get counter does not destroy them
+
+ stdCall _NapDllInit ; initialize dll if necessary
+
+; Now call NtQueryPerformanceCounter to get the starting count;
+; Store this locally
+
+ push 0 ; don't need frequency: pass 0
+ lea eax, NapStart ; (eax) = pointer to counter
+ push eax ; pass pointer to counter
+ mov eax, NapCounterServiceNumber
+ lea edx, [esp] ; (edx) -> arguments
+ int 2Eh ; get the current counter value
+ add esp, 08h ; remove counter parameters
+
+; Restore caller's registers
+
+ stdCall _NapRestoreRegs
+
+; We're just calibrating the overhead, so we don't call the system
+; service here.
+
+; Save regsiters so we can complete the profile accounting.
+
+ stdCall _NapSaveRegs
+
+; Now get the ending counter.
+
+ push 0 ; don't need frequency: pass 0
+ lea eax, NapEnd ; (eax) = pointer to counter
+ push eax ; pass pointer to counter
+ mov eax, NapCounterServiceNumber
+ lea edx, [esp] ; (edx) -> arguments
+ int 2Eh ; get the current counter value
+ add esp, 08h ; remove counter parameters
+
+; Compute the time for this call and increment the nukmber of calls.
+
+ lea eax, NapEnd ; pointer to start/end counters
+ ; ID of this routine
+ stdCall _NapRecordInfo, <NapServiceNum, eax>
+
+ stdCall _NapRestoreRegs
+ ; restore caller's registers
+ leave ; we needed this for pseudo locals
+ stdRET _NapCalibrate
+stdENDP _NapCalibrate
+
+
+;++
+;
+; Routine Description:
+;
+; This routine is called by the USRSTUBS_ENTRY1 MACRO in the
+; services.prf to carry out profiling on an Nt system api call.
+;
+; Arguments:
+;
+; eax - Service Number of the routine being called. This number
+; is assigned by genprof.c from the table in services.tab.
+;
+; edx - Pointer to the parameters to the Service.
+;
+; Return Value:
+;
+; Whatever the system service returns.
+;
+;--
+
+
+
+cPublicProc _NapProfileDispatch , ,near
+
+ push ebp ; Locals: the value of
+ mov ebp, esp ; the perf counter before and
+ sub esp, NapLocalSize ; after the API call
+
+ mov NapServicenum, eax
+ ; save the service routine number
+
+ stdCall _NapSaveRegs ; save register state so call to
+ ; get counter does not destroy them
+
+ stdCall _NapDllInit ; initialize dll if necessary
+
+; Now call NtQueryPerformanceCounter to get the starting count;
+; Store this locally
+
+ push 0 ; don't need frequency: pass 0
+ lea eax, NapStart ; (eax) = pointer to counter
+ push eax ; pass pointer to counter
+ mov eax, NapCounterServiceNumber
+ lea edx, [esp] ; (edx) -> arguments
+ int 2Eh ; get the current counter value
+ add esp, 08h ; remove counter parameters
+
+; Restore caller's registers
+
+ stdCall _NapRestoreRegs
+
+ INT 2Eh ; invoke system service
+
+; Save regsiters so we can complete the profile accounting.
+
+ stdCall _NapSaveRegs
+
+; Now get the ending counter.
+
+ push 0 ; don't need frequency: pass 0
+ lea eax, NapEnd ; (eax) = pointer to counter
+ push eax ; pass pointer to counter
+ mov eax, NapCounterServiceNumber
+ lea edx, [esp] ; (edx) -> arguments
+ int 2Eh ; get the current counter value
+ add esp, 08h ; remove counter parameters
+
+; Compute the time for this call and increment the number of calls.
+
+ lea eax, NapEnd ; pointer to start/end counters
+ ; ID of this routine
+ stdCall _NapRecordInfo, <NapServiceNum, eax>
+
+ stdCall _NapRestoreRegs
+ ; restore caller's registers
+ leave ; we needed this for pseudo locals
+ stdRET _NapProfileDispatch
+stdENDP _NapProfileDispatch
+
+;++
+;
+; Routine Description:
+;
+; This routine is claled to get the spin lock associated with
+; a particular api. It prevents the simultaneous update
+; from multiple threads in this or other processors of the
+; profiling data for the api.
+;
+; Arguments:
+;
+; SpinLockAddr - address of the spin lock within the data
+; for the api being updated.
+;
+; Return Value:
+;
+; None.
+;
+;--
+
+
+cPublicProc _NapAcquireSpinLock , ,near
+
+ push eax
+ mov eax, [esp+8] ; get address of lock
+WaitForLock:
+ lock bts dword ptr [eax], 0 ; test and set the spinlock
+ jc SHORT WaitForLock ; spinlock owned: go to SpinLabel
+ pop eax
+
+ stdRET _NapAcquireSpinLock
+
+stdENDP _NapAcquireSpinLock
+
+
+;++
+;
+; Routine Description:
+;
+; This routine is called to release the spin lock associated with
+; a particular api.
+;
+; Arguments:
+;
+; SpinLockAddr - address of the spin lock within the data
+; for the api being updated.
+;
+; Return Value:
+;
+; None.
+;
+;--
+
+
+cPublicProc _NapReleaseSpinLock , ,near
+
+ push eax
+ mov eax, [esp+8] ; get address of lock
+ lock btr dword ptr [eax], 0 ; release spinlock
+ pop eax
+ stdRET _NapReleaseSpinLock
+
+stdENDP _NapReleaseSpinLock
+
+
+_TEXT ends
+
+ end
diff --git a/private/ntos/dll/i386/sources b/private/ntos/dll/i386/sources
new file mode 100644
index 000000000..5e5cd357a
--- /dev/null
+++ b/private/ntos/dll/i386/sources
@@ -0,0 +1,14 @@
+386_DLLLIBOBJECTS=$(BASEDIR)\public\sdk\lib\i386\exsup.lib
+
+i386_SOURCES=..\i386\critsect.asm \
+ ..\i386\ldrthunk.asm \
+ ..\i386\emulator.asm \
+ ..\i386\npxnp.c \
+ i386\usrstubs.asm
+
+ASM_DEFINES=-DNT386
+
+CONDITIONAL_INCLUDES=\
+ $(CONDITIONAL_INCLUDES) \
+ emccall.asm\
+ except32.inc