32 files changed, 11182 insertions, 0 deletions
diff --git a/private/ntos/dll/i386/critsect.asm b/private/ntos/dll/i386/critsect.asm
new file mode 100644
index 000000000..4b1f87cb4
--- /dev/null
+++ b/private/ntos/dll/i386/critsect.asm
@@ -0,0 +1,285 @@
+        title   "Critical Section Support"
+;++
+;
+;  Copyright (c) 1991  Microsoft Corporation
+;
+;  Module Name:
+;
+;     critsect.asm
+;
+;  Abstract:
+;
+;     This module implements functions to support user mode critical sections.
+;
+;  Author:
+;
+;     Bryan M. Willman (bryanwi) 2-Oct-91
+;
+;  Environment:
+;
+;     Any mode.
+;
+;  Revision History:
+;
+;
+;
+;   WARNING!!!!!!!!!! This code is duplicated in
+;   windows\base\client\i386\critsect.asm
+;
+;   Some day we should put it in a .inc file that both include.
+;
+;--
+
+.486p
+        .xlist
+include ks386.inc
+include callconv.inc                    ; calling convention macros
+        .list
+
+_DATA   SEGMENT DWORD PUBLIC 'DATA'
+    public _LdrpLockPrefixTable
+_LdrpLockPrefixTable    label dword
+        dd offset FLAT:Lock1
+        dd offset FLAT:Lock2
+        dd offset FLAT:Lock3
+        dd offset FLAT:Lock4
+        dd offset FLAT:Lock5
+        dd 0
+_DATA   ENDS
+
+_TEXT   SEGMENT PARA PUBLIC 'CODE'
+        ASSUME  DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+        EXTRNP  _RtlpWaitForCriticalSection,1
+        EXTRNP  _RtlpUnWaitCriticalSection,1
+if DEVL
+        EXTRNP  _RtlpNotOwnerCriticalSection,1
+endif
+
+CriticalSection equ     [esp + 4]
+
+        page , 132
+        subttl  "RtlEnterCriticalSection"
+
+;++
+;
+; NTSTATUS
+; RtlEnterCriticalSection(
+;    IN PRTL_CRITICAL_SECTION CriticalSection
+;    )
+;
+; Routine Description:
+;
+;    This function enters a critical section.
+;
+; Arguments:
+;
+;    CriticalSection - supplies a pointer to a critical section.
+;
+; Return Value:
+;
+;   STATUS_SUCCESS or raises an exception if an error occured.
+;
+;--
+
+        align   16
+cPublicProc _RtlEnterCriticalSection,1
+cPublicFpo 1,0
+
+        mov     ecx,fs:PcTeb                ; (ecx) == NtCurrentTeb()
+        mov     edx,CriticalSection         ; interlocked inc of
+        mov     eax,TbClientId+4[ecx]       ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+
+if DBG
+        cmp     dword ptr TbSpare1[ecx],0
+        jz      @f
+        int     3
+@@:
+endif ; DBG
+Lock1:
+   lock inc     dword ptr CsLockCount[edx]  ; ... CriticalSection->LockCount
+        jnz     @F
+
+setowner:
+        mov     CsOwningThread[edx],eax
+        mov     dword ptr CsRecursionCount[edx],1
+
+if DBG
+        inc     dword ptr TbCountOfOwnedCriticalSections[ecx]
+        push    edi
+        mov     edi,CsDebugInfo[edx]
+        inc     dword ptr CsEntryCount[edi]
+        pop     edi
+endif ; DBG
+
+        xor     eax,eax
+        stdRET  _RtlEnterCriticalSection
+
+        align   16
+@@:
+        cmp     CsOwningThread[edx],eax
+        jne     @F
+        inc     dword ptr CsRecursionCount[edx]
+if DBG
+        mov     eax,CsDebugInfo[edx]
+        inc     dword ptr CsEntryCount[eax]
+endif ; DBG
+        xor     eax,eax
+        stdRET  _RtlEnterCriticalSection
+
+@@:
+        stdCall _RtlpWaitForCriticalSection, <edx>
+        mov     ecx,fs:PcTeb                ; (ecx) == NtCurrentTeb()
+        mov     eax,TbClientId+4[ecx]       ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+        mov     edx,CriticalSection
+        jmp     setowner
+
+stdENDP _RtlEnterCriticalSection
+
+        page , 132
+        subttl  "RtlLeaveCriticalSection"
+;++
+;
+; NTSTATUS
+; RtlLeaveCriticalSection(
+;    IN PRTL_CRITICAL_SECTION CriticalSection
+;    )
+;
+; Routine Description:
+;
+;    This function leaves a critical section.
+;
+; Arguments:
+;
+;    CriticalSection - supplies a pointer to a critical section.
+;
+; Return Value:
+;
+;   STATUS_SUCCESS or raises an exception if an error occured.
+;
+;--
+
+        align   16
+cPublicProc _RtlLeaveCriticalSection,1
+cPublicFpo 1,0
+
+        mov     edx,CriticalSection
+if DBG
+        mov     ecx,fs:PcTeb                ; (ecx) == NtCurrentTeb()
+        mov     eax,TbClientId+4[ecx]       ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+        cmp     eax,CsOwningThread[edx]
+        je      @F
+        stdCall _RtlpNotOwnerCriticalSection, <edx>
+        mov     eax,STATUS_INVALID_OWNER
+        stdRET  _RtlLeaveCriticalSection
+@@:
+endif ; DBG
+        xor     eax,eax                     ; Assume STATUS_SUCCESS
+        dec     dword ptr CsRecursionCount[edx]
+        jnz     leave_recurs                ; skip if only leaving recursion
+
+        mov     CsOwningThread[edx],eax     ; clear owning thread id
+
+if DBG
+        mov     ecx,fs:PcTeb                ; (ecx) == NtCurrentTeb()
+        dec     dword ptr TbCountOfOwnedCriticalSections[ecx]
+endif ; DBG
+
+Lock2:
+   lock dec     dword ptr CsLockCount[edx]  ; interlocked dec of
+                                            ; ... CriticalSection->LockCount
+        jge     @F
+        stdRET  _RtlLeaveCriticalSection
+
+@@:
+        stdCall _RtlpUnWaitCriticalSection, <edx>
+        xor     eax,eax                     ; return STATUS_SUCCESS
+        stdRET  _RtlLeaveCriticalSection
+
+        align   16
+leave_recurs:
+Lock3:
+   lock dec     dword ptr CsLockCount[edx]  ; interlocked dec of
+                                            ; ... CriticalSection->LockCount
+        stdRET  _RtlLeaveCriticalSection
+
+_RtlLeaveCriticalSection    endp
+
+        page    ,132
+        subttl  "RtlTryEnterCriticalSection"
+;++
+;
+; BOOL
+; RtlTryEnterCriticalSection(
+;    IN PRTL_CRITICAL_SECTION CriticalSection
+;    )
+;
+; Routine Description:
+;
+;    This function attempts to enter a critical section without blocking.
+;
+; Arguments:
+;
+;    CriticalSection (a0) - Supplies a pointer to a critical section.
+;
+; Return Value:
+;
+;    If the critical section was successfully entered, then a value of TRUE
+;    is returned as the function value. Otherwise, a value of FALSE is returned.
+;
+;--
+
+CriticalSection equ     [esp + 4]
+
+cPublicProc _RtlTryEnterCriticalSection,1
+cPublicFpo 1,0
+
+        mov     ecx,CriticalSection         ; interlocked inc of
+        mov     eax, -1                     ; set value to compare against
+        mov     edx, 0                      ; set value to set
+Lock4:
+   lock cmpxchg dword ptr CsLockCount[ecx],edx  ; Attempt to acquire critsect
+        jnz     short tec10                 ; if nz, critsect already owned
+
+        mov     eax,fs:TbClientId+4         ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+        mov     CsOwningThread[ecx],eax
+        mov     dword ptr CsRecursionCount[ecx],1
+
+if DBG
+        mov     eax,fs:PcTeb                ; (ecx) == NtCurrentTeb()
+        inc     dword ptr TbCountOfOwnedCriticalSections[eax]
+endif ; DBG
+
+        mov     eax, 1                      ; set successful status
+
+        stdRET  _RtlTryEnterCriticalSection
+
+tec10:
+;
+; The critical section is already owned. If it is owned by another thread,
+; return FALSE immediately. If it is owned by this thread, we must increment
+; the lock count here.
+;
+        mov     eax, fs:TbClientId+4        ; (eax) == NtCurrentTeb()->ClientId.UniqueThread
+        cmp     CsOwningThread[ecx], eax
+        jz      tec20                       ; if eq, this thread is already the owner
+        xor     eax, eax                    ; set failure status
+        stdRET  _RtlTryEnterCriticalSection
+
+tec20:
+;
+; This thread is already the owner of the critical section. Perform an atomic
+; increment of the LockCount and a normal increment of the RecursionCount and
+; return success.
+;
+Lock5:
+   lock inc     dword ptr CsLockCount[ecx]
+        inc     dword ptr CsRecursionCount[ecx]
+        mov     eax, 1
+        stdRET  _RtlTryEnterCriticalSection
+
+stdENDP _RtlTryEnterCriticalSection
+
+
+_TEXT   ends
+        end
diff --git a/private/ntos/dll/i386/emarith.asm b/private/ntos/dll/i386/emarith.asm
new file mode 100644
index 000000000..3b09de0b6
--- /dev/null
+++ b/private/ntos/dll/i386/emarith.asm
@@ -0,0 +1,335 @@
+	subttl  emarith.asm - Arithmetic Operations
+	page
+;*******************************************************************************
+;emarith.asm - Arithmetic Operations
+;
+;        Microsoft Confidential
+;
+;        Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       Arithmetic Operations
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+	NextStackWrap   esi,TwoOp       ;Tied to NextStackElem below
+
+EM_ENTRY eFPREM
+eFPREM:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PremCont		;Return address if normal
+PremPointTopTwo:
+	push	offset PremSpclDone	;Return address if special
+	mov	ebp,offset tFpremDisp
+PointTopTwo:
+	mov	esi,edi
+	NextStackElem   esi,TwoOp
+TwoOpSiDi:
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+TwoOpSetResult:
+	mov	EMSEG:[Result],edi		;Save result pointer
+TwoOpResultSet:
+	mov     ah,EMSEG:[edi].bTag
+TwoOpDispAh:
+	mov	al,cl
+TwoOpDispatch:
+	and     eax,TAG_MASK + 100H*TAG_MASK	;Look at internal tags only
+	shl     al,TAG_SHIFT
+	or      al,ah
+	xor	ah,ah			;Zero ah
+;UNDONE:  masm bug!  ebp + scaled index requires a displacement.
+;UNDONE:  No displacement is needed here, so masm should generate a
+;UNDONE:  zero.  It doesn't!  dec eax so we can add 4*1 back.
+	dec	eax			;UNDONE
+	jmp     dword ptr cs:[ebp+4*eax+4];UNDONE Go to appropriate routine.
+
+EM_ENTRY eFPREM1
+eFPREM1:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset Prem1Cont	;Return address if normal
+	jmp	PremPointTopTwo
+
+EM_ENTRY eFSCALE
+eFSCALE:
+	mov	ebp,offset tFscaleDisp
+	jmp	PointTopTwo
+
+EM_ENTRY eFPATAN
+eFPATAN:
+	mov	ebp,offset tFpatanDisp
+TopTwoPop:
+	push	offset PopWhenDone
+	mov	esi,edi
+	add	edi,Reg87Len		;edi = ST(1)
+        cmp     edi,ENDstk
+	jb	TwoOpSiDi
+        mov     edi,BEGstk
+	jmp	TwoOpSiDi
+
+EM_ENTRY eFYL2X
+eFYL2X:
+	mov	ebp,offset tFyl2xDisp
+	jmp	TopTwoPop
+
+EM_ENTRY eFYL2XP1
+eFYL2XP1:
+	mov	ebp,offset tFyl2xp1Disp
+	jmp	TopTwoPop
+
+;*******************************************************************************
+
+page
+;-----------------------------------------------------------;
+;                                                           ;
+;       Special Case Routines for Arithmetic Functions      ;
+;                                                           ;
+;-----------------------------------------------------------;
+
+;There are four kinds of "specials", encoded in the tag:
+;
+;	Empty
+; 	Infinity
+;	NAN (which can be QNAN or SNAN)
+;	Denormal
+;
+;Empty always results in an Invalid Operation exception with Stack Flag set
+;and C1 (O/U#) bit clear, and returns Indefinite (a specific QNAN).
+;
+;Operations on NAN return the same NAN except it is always modified to a 
+;QNAN.  If both  operands are NAN, the one with the larger mantissa is
+;returned.  An SNAN causes an Invalid Operation exception except for
+;internal FP stack operations, FCHS, and FABS.  A QNAN does not cause
+;and exception.  
+;
+;Operations on Infinity return a result depending on the operation.
+;
+;UNDONE: Old code plays with sign of NAN when two NANs with equal
+;mantissas are used.  Why?
+
+;"***" means entry point from dispatch tables
+
+;***
+DivSpclSource:
+	cmp	cl,bTAG_INF
+	jnz	SpclSource
+;Division by infinity always returns zero
+	xor	ch,EMSEG:[edi].bSgn
+	jmp	SignedZero		;in emfmul.asm
+
+;***
+MulSpclSource:
+	cmp	cl,bTAG_INF
+	jnz	SpclSource
+MulByInf:
+	cmp	EMSEG:[edi].bTag,bTAG_ZERO	;Infinity * zero?
+	jz	ReturnIndefinite
+XorSourceSign:
+	xor	ch,EMSEG:[edi].bSgn
+	jmp	SaveResultEdi
+
+;***
+AddSpclSource:
+	cmp	cl,bTAG_INF
+	jnz	SpclSource
+	xor	ch,dl			;Flip sign of infinity if subtracting
+	jmp	SaveResultEdi
+
+DenormalSource:
+	mov	cl,bTAG_VALID		;Change denormal to DOUBLE
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
+	jnz	TwoOpResultSet
+AbortOp:
+	mov	cl,bTAG_NOPOP		;Unmasked, don't pop stack
+	ret
+
+DenormalDisp:
+;Repeat dispatch, but for normal ops
+	jmp     dword ptr cs:[ebp+4*(TAG_VALID + TAG_VALID shl TAG_SHIFT)]
+
+;***
+DivrSpclSource:
+	cmp	cl,bTAG_INF
+	jz	XorSourceSign		;Return infinity
+SpclSource:
+	cmp	cl,bTAG_DEN
+	jz	DenormalSource
+	cmp	cl,bTAG_EMPTY
+	jz	StackError
+;Must be a NAN
+SourceNAN:
+	test	ebx,1 shl 30		;Check for SNAN
+	jnz	SaveResultEdi		;If QNAN, just use it as result
+SourceSNAN:
+	or	EMSEG:[CURerr],Invalid	;Flag the error
+	or	ebx,1 shl 30		;Make it into a QNAN
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jnz	SaveResultEdi		;If so, update with masked response
+	mov	cl,bTAG_NOPOP		;Unmasked, don't pop stack
+	ret
+
+
+;***
+DivrSpclDest:
+	mov	eax,EMSEG:[edi].ExpSgn	;Pick up tag
+	cmp	al,bTAG_INF
+	jnz	SpclDest
+;Division by infinity always returns zero
+	xor	ch,ah
+	jmp	SignedZero		;in emfmul.asm
+
+;***
+MulSpclDest:
+	mov	al,EMSEG:[edi].bTag	;Pick up tag
+	cmp	al,bTAG_INF
+	jnz	SpclDest
+	cmp	cl,bTAG_ZERO		;Infinity * zero?
+	jz	ReturnIndefinite
+XorDestSign:
+	xor	EMSEG:[edi].bSgn,ch	;Xor signs
+	ret
+
+;***
+AddSpclDest:
+	mov	al,EMSEG:[edi].bTag	;Pick up tag
+	cmp	al,bTAG_INF
+	jnz	SpclDest
+	xor	EMSEG:[edi].bSgn,dh	;Flip sign of infinity if subtracting
+	ret
+
+DenormalDest:
+	mov	ah,bTAG_VALID		;Change denormal to DOUBLE
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
+	jnz	TwoOpDispAh
+	mov	cl,bTAG_NOPOP		;Unmasked, don't pop stack
+	ret
+
+;***
+DivSpclDest:
+	mov	al,EMSEG:[edi].bTag	;Pick up tag
+	cmp	al,bTAG_INF
+	jz	XorDestSign		;Return infinity
+SpclDest:
+	cmp	al,bTAG_DEN
+	jz	DenormalDest
+SpclDestNotDen:
+	cmp	al,bTAG_EMPTY
+	jz	StackError
+;Must be a NAN
+DestNAN:
+	test	EMSEG:[edi].bMan7,40H	;Check for SNAN
+	jnz	ReturnDest		;If QNAN, just use it as result
+DestSNAN:
+	or	EMSEG:[CURerr],Invalid	;Flag the error
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	AbortOp			;No - preserve value
+	or	EMSEG:[edi].bMan7,40H	;Make it into a QNAN
+	ret
+
+StackError:
+	mov	EMSEG:[CURerr],Invalid+StackFlag
+ReturnIndefinite:
+	or 	EMSEG:[CURerr],Invalid
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	AbortOp			;No - preserve value
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].lManHi,0C0000000H
+	mov	EMSEG:[edi].ExpSgn,TexpMax shl 16 + bSign shl 8 + bTAG_NAN
+ReturnDest:
+	ret
+
+
+AddTwoInf:
+;Adding two infinites.
+;If signs are the same, return that infinity.  Otherwise, Invalid Operation.
+	xor	ch,dl			;Possibly subtracting source
+	xor	ah,dh			;Possibly subtracting dest
+	xor	ch,ah			;Compare signs
+	js	ReturnIndefinite
+	mov	EMSEG:[edi].bSgn,ah	;Correct the sign if subtracting
+	ret
+
+;***
+TwoOpBothSpcl:
+;ebp = dispatch table address
+	mov	al,EMSEG:[edi].bTag
+	mov	ah,cl
+	cmp	ax,(bTAG_NAN shl 8) + bTag_NAN	;Are both NAN?
+	jz	TwoNANs
+	cmp	cl,bTAG_EMPTY
+	jz	StackError
+	cmp	al,bTAG_EMPTY
+	jz	StackError
+	cmp	cl,bTAG_NAN
+	jz	SourceNAN
+	cmp	al,bTAG_NAN
+	jz	DestNAN
+	cmp	ax,(bTAG_INF shl 8) + bTag_INF	;Are both infinity?
+	jz	TwoInfs
+;At least one of the operands is a denormal
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
+	jz	AbortOp			;If not, don't do operation
+;Denormal exception is masked, treat denormals as VALID
+;Dispatch through operation table in ebp again
+	cmp	ax,(bTAG_DEN shl 8) + bTag_DEN	;Are both denormal?
+	jz	DenormalDisp
+;Have an infinity and a denormal
+	cmp	al,bTAG_INF
+	jz	DestInf
+;Source is denormal, Dest is infinity
+	jmp	dword ptr [ebp+4*(TAG_SPCL + TAG_VALID shl TAG_SHIFT)]
+
+DestInf:
+;Source is infinity, Dest is denormal
+	jmp	dword ptr [ebp+4*(TAG_VALID + TAG_SPCL shl TAG_SHIFT)]
+
+TwoNANs:
+;Two NANs. Use largest mantissa
+	cmp	ebx,EMSEG:[edi].lManHi
+	ja	BiggerNAN
+	jb	DestBigger
+;Now we know they're both the same type, SNAN or QNAN
+	cmp	esi,EMSEG:[edi].lManLo
+	ja	SourceNAN
+;UNDONE: Old code did funny business with signs when mantissas were equal
+	jmp	DestNAN
+
+BiggerNAN:
+	test	EMSEG:[edi].bMan7,40H		;Is smaller one SNAN?
+	jz	SourceSNAN
+	jmp	SourceNAN
+
+DestBigger:
+	test	ebx,40H			;Is smaller one SNAN?
+	jz	DestSNAN
+	jmp	DestNAN
+
+TwoInfs:
+        mov     ah,EMSEG:[edi].bSgn
+	jmp	dword ptr [ebp+4*16]	;Go do code for two infinites
+
+
+;***
+DivideByMinusZero:
+	mov	ch,bSign
+;***
+DivideByZero:
+	or	EMSEG:[CURerr],ZeroDivide
+	test	EMSEG:[CWmask],ZeroDivide	;Is exception masked?
+	jz	AbortOp			;No - preserve value
+;Set up a signed infinity
+	xor	ch,EMSEG:[edi].bSgn		;Get result sign
+	and	ecx,1 shl 15		;Keep only sign bit
+	or	ecx,(4000H+TexpBias) shl 16 + bTAG_INF	;ExpSgn of infinity
+	mov	ebx,1 shl 31
+	xor	esi,esi
+	jmp	SaveResultEdi
diff --git a/private/ntos/dll/i386/emdecode.asm b/private/ntos/dll/i386/emdecode.asm
new file mode 100644
index 000000000..242a8aa69
--- /dev/null
+++ b/private/ntos/dll/i386/emdecode.asm
@@ -0,0 +1,39 @@
+	subttl	emdecode.asm - Instruction decoding
+	page
+;***
+;emdecode.asm - Instruction decoding
+;
+;	 Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;
+;	 All Rights Reserved
+;
+;Purpose:
+;	Further decoding of instructions done here.
+;
+;Revision History:
+;
+;    8/23/91  TP    Rewritten for 32 bits
+;
+;*******************************************************************************
+
+;On entry, eax = r/m bits * 4.  This is used to jump directly to the
+;correct instruction within the group.
+
+GroupFCHS:
+	jmp	tGroupFCHSdisp[eax]
+
+GroupFLD1:
+	jmp	tGroupFLD1disp[eax]
+
+GroupF2XM1:
+	jmp	tGroupF2XM1disp[eax]
+
+GroupFPREM:
+	jmp	tGroupFPREMdisp[eax]
+
+GroupFENI:
+	jmp	tGroupFENIdisp[eax]
+
+
diff --git a/private/ntos/dll/i386/emdisp.asm b/private/ntos/dll/i386/emdisp.asm
new file mode 100644
index 000000000..7e7402f66
--- /dev/null
+++ b/private/ntos/dll/i386/emdisp.asm
@@ -0,0 +1,298 @@
+	subttl	emdisp.asm - Emulator Dispatch Tables
+	page
+;
+;	 IBM/Microsoft Confidential
+;
+;	 Copyright (c) IBM Corporation 1987, 1989
+;	 Copyright (c) Microsoft Corporation 1987, 1989
+;
+;	 All Rights Reserved
+;
+;Revision History:  (also see emulator.hst)
+;
+;    1/21/92  JWM   Minor modifications for DOSX32 emulator
+;    8/23/91  TP    Direct dispatch off of 6-bit opcode
+;   10/30/89  WAJ   Added this header.
+;
+;*******************************************************************************
+
+;*********************************************************************;
+;								      ;
+;		Dispatch Tables 				      ;
+;								      ;
+;*********************************************************************;
+
+
+;   These tables are based upon the layout of the 8087 instructions
+;
+;      8087 instruction fields:   |escape|MF|Arith|MOD|Op|r/m|disp1|disp2|
+;	  field length in bits:       5    2   1    2	3   3	8     8
+;
+;   Disp1 and Disp2  are optional address bytes present only if MOD <> 11.
+;   When (MOD <> 11) r/m describes which regs (SI,DI,BX,BP) are added to
+;	Disp1 and Disp2 to calculate the effective address. This form
+;	(memory format) is used for Loads, Stores, Compares, and Arithmetic
+;   When using memory format MF determines the Type of the Memory operand
+;	i.e. Single Real, Double real, Single Integer, or Double Integer
+;   Arith is 0 for Arithmetic opetations (and compares), set to 1 otherwise
+;   Op mostly determines which type of operation to do though when not in
+;	memory format some of that is coded into MF and r/m
+;   All of the tables are set up to do a jump based upon one or more of the
+;	above fields. The outline for decoding instructions is:
+;
+;	    IF (memory format) THEN
+;	       Assemble Effective Address (using MOD and r/m and EffectiveAddressTab)
+;	       Jump through table to operation, using MF, Arith and Op bits
+;	    ELSE (Register format)
+;	       Jump through table to operation, using MF, Arith and Op bits
+
+	ALIGN	4
+
+;*********************************************************************;
+;
+; Memory address calculation tables
+
+EA386Tab	label	dword			; Uses |r/m|MOD+1| for indexing
+	dd	NoEffectiveAddress
+	dd	Exx00			; eax
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	Exx00			; ecx
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	Exx00			; edx
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	Exx00			; ebx
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	SIB00			; esp (S-I-B follows)
+	dd	SIB01
+	dd	SIB10
+	dd	NoEffectiveAddress
+	dd	Direct386		; ebp (00 = direct addressing)
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	Exx00			; esi
+	dd	Exx01
+	dd	Exx10
+	dd	NoEffectiveAddress
+	dd	Exx00			; edi
+	dd	Exx01
+	dd	Exx10
+
+;*********************************************************************;
+;
+;Opcode dispatching tables
+;Indexed by  | op1 | op2 |0 0|  (op1 = MF|Arith)
+
+	public	tOpRegDisp
+tOpRegDisp	label	dword
+	dd	eFADDtop
+	dd	eFMULtop
+	dd	eFCOM
+	dd	eFCOMP
+	dd	eFSUBtop
+	dd	eFSUBRtop
+	dd	eFDIVtop
+	dd	eFDIVRtop
+
+	dd	eFLDreg
+	dd	eFXCH
+	dd	eFNOP		;UNDONE: also reserved on 387
+	dd	eFSTP		;Special form 1
+	dd	GroupFCHS	;FCHS,FABS,FTST,FXAM
+	dd	GroupFLD1	;FLD1,FLDL2T,FLDL2E,FLDPI,FLDLG2,FLDLN2,FLDZ
+	dd	GroupF2XM1	;F2XM1,FYL2X,FPTAN,FPATAN,FXTRACT,FPREM1,FDECSTP,FINCSTP
+	dd	GroupFPREM	;FPREM,FYL2XP1,FSQRT,FSINCOS,FRNDINT,FSCALE,FSIN,FCOS
+
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+	dd	eFUCOMPP	;UNDONE: also reserved on 387
+	dd	UNUSED
+	dd	UNUSED
+
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+	dd	GroupFENI	;FENI,FDISI,FCLEX,FINIT
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+
+	dd	eFADDreg
+	dd	eFMULreg
+	dd	eFCOM		;Special form  2
+	dd	eFCOMP		;Special form  3
+	dd	eFSUBRreg
+	dd	eFSUBreg
+	dd	eFDIVRreg
+	dd	eFDIVreg
+
+	dd	eFFREE
+	dd	eFXCH		;Special form 4
+	dd	eFST
+	dd	eFSTP
+	dd	eFUCOM
+	dd	eFUCOMP
+	dd	UNUSED
+	dd	UNUSED
+
+	dd	eFADDPreg
+	dd	eFMULPreg
+	dd	eFCOMP		;Special form 5
+	dd	eFCOMPP		;UNDONE: also reserved on 387
+	dd	eFSUBRPreg
+	dd	eFSUBPreg
+	dd	eFDIVRPreg
+	dd	eFDIVPreg
+
+	dd	eFFREE		;Special form 6 UNDONE: "and pop stack"?
+	dd	eFXCH		;Special form 7
+	dd	eFSTP		;Special form 8
+	dd	eFSTP		;Special form 9
+	dd	eFSTSWax	;UNDONE: also reserved on 387
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+
+
+tOpMemDisp	label	dword
+;MF = 00 (32-bit Real), Arith = 0
+	dd	eFADD32
+	dd	eFMUL32
+	dd	eFCOM32
+	dd	eFCOMP32
+	dd	eFSUB32
+	dd	eFSUBR32
+	dd	eFDIV32
+	dd	eFDIVR32
+;MF = 00 (32-bit Real), Arith = 1
+	dd	eFLD32
+	dd	UNUSED
+	dd	eFST32
+	dd	eFSTP32
+	dd	eFLDENV
+	dd	eFLDCW
+	dd	eFSTENV
+	dd	eFSTCW
+;MF = 01 (32-bit Int), Arith = 0
+	dd	eFIADD32
+	dd	eFIMUL32
+	dd	eFICOM32
+	dd	eFICOMP32
+	dd	eFISUB32
+	dd	eFISUBR32
+	dd	eFIDIV32
+	dd	eFIDIVR32
+;MF = 01 (32-bit Int), Arith = 1
+	dd	eFILD32
+	dd	UNUSED
+	dd	eFIST32
+	dd	eFISTP32
+	dd	UNUSED
+	dd	eFLD80
+	dd	UNUSED
+	dd	eFSTP80
+;MF = 10 (64-bit Real), Arith = 0
+	dd	eFADD64
+	dd	eFMUL64
+	dd	eFCOM64
+	dd	eFCOMP64
+	dd	eFSUB64
+	dd	eFSUBR64
+	dd	eFDIV64
+	dd	eFDIVR64
+;MF = 10 (64-bit Real), Arith = 1
+	dd	eFLD64
+	dd	UNUSED
+	dd	eFST64
+	dd	eFSTP64
+	dd	eFRSTOR
+	dd	UNUSED
+	dd	eFSAVE
+	dd	eFSTSW
+;MF = 11 (16-bit Int), Arith = 0
+	dd	eFIADD16
+	dd	eFIMUL16
+	dd	eFICOM16
+	dd	eFICOMP16
+	dd	eFISUB16
+	dd	eFISUBR16
+	dd	eFIDIV16
+	dd	eFIDIVR16
+;MF = 11 (16-bit Int), Arith = 1
+	dd	eFILD16
+	dd	UNUSED
+	dd	eFIST16
+	dd	eFISTP16
+	dd	eFBLD
+	dd	eFILD64
+	dd	eFBSTP
+	dd	eFISTP64
+
+
+tGroupFLD1disp	label	dword
+	dd	eFLD1
+	dd	eFLDL2T
+	dd	eFLDL2E
+	dd	eFLDPI
+	dd	eFLDLG2
+	dd	eFLDLN2
+	dd	eFLDZ
+	dd	UNUSED
+
+
+tGroupF2XM1disp	label	dword
+	dd	eF2XM1
+	dd	eFYL2X
+	dd	eFPTAN
+	dd	eFPATAN
+	dd	eFXTRACT
+	dd	eFPREM1
+	dd	eFDECSTP
+	dd	eFINCSTP
+
+
+tGroupFCHSdisp	label	dword
+	dd	eFCHS
+	dd	eFABS
+	dd	UNUSED
+	dd	UNUSED
+	dd	eFTST
+	dd	eFXAM
+	dd	UNUSED
+	dd	UNUSED
+
+
+tGroupFPREMdisp	label	dword
+	dd	eFPREM
+	dd	eFYL2XP1
+	dd	eFSQRT
+	dd	eFSINCOS
+	dd	eFRNDINT
+	dd	eFSCALE
+	dd	eFSIN
+	dd	eFCOS
+
+
+tGroupFENIdisp	label	dword
+	dd	eFENI
+	dd	eFDISI
+	dd	eFCLEX
+	dd	eFINIT
+	dd	eFSETPM
+	dd	UNUSED
+	dd	UNUSED
+	dd	UNUSED
+
+
diff --git a/private/ntos/dll/i386/emerror.asm b/private/ntos/dll/i386/emerror.asm
new file mode 100644
index 000000000..3fc135a06
--- /dev/null
+++ b/private/ntos/dll/i386/emerror.asm
@@ -0,0 +1,469 @@
+        page    ,132
+        subttl  emerror.asm - Emulator error handler
+;***
+;emerror.asm - Emulator error handler
+;
+;        Microsoft Confidential
+;
+;        Copyright (c) Microsoft Corporation 1987, 1991
+;
+;        All Rights Reserved
+;
+;Purpose:
+;       Emulator error handler
+;
+;Revision History:  (also see emulator.hst)
+;
+;   10/30/89  WAJ   Added this header.
+;   11/15/89  WAJ   Major changes for Dos32RaiseExcpetion().
+;   12/01/89  WAJ   Now set cbExceptionInfo correctly.
+;   02/08/90  WAJ   Fixed GP fault in 32 bit exception handler.
+;   09/03/91  JWM   Modified entry/exit sequence for DOSX32.
+;   02/15/92  JWM   Adapted for NT.
+;
+;*******************************************************************************
+
+ifdef   _DOS32EXT
+include except32.inc
+endif
+
+;***    error_return - return to user code (regardless of error)
+;
+;       This macro returns to user code.  It goes to some lengths
+;       to restore the flags on the instruction immediately before
+;       the return so that any pending trace trap will be
+;       acknowledged immediately after the retfd (and before the
+;       next user instruction) instead of after the instruction
+;       following the return as would be the case if we returned
+;       using iretd.
+;
+;       ENTRY   ((SS:ESP)) = user's EAX
+;               ((SS:ESP)+4) = return EIP
+;               ((SS:ESP)+8) = return CS
+;               ((SS:ESP)+12) = user's EFLAGS
+;       EXIT    return to user program, above arguments
+;               popped off stack, user's EAX and EFLAGS
+;               restored.
+
+error_return    macro   noerror
+ifdef   _DOS32EXT
+        sti                                     ; JWM, 9/3/91
+        push    dword ptr [esp+8]               ; JWM, 9/6/91
+        popfd                                   ; JWM, 9/6/91
+endif                                           ; DOS32EXT
+
+ifdef NT386
+if DBG
+        push    dword ptr [esp+8]               ; On checked build, allow
+        popfd                                   ; single step to work
+endif
+endif
+        iretd
+        endm
+
+
+TESTif  macro   nam
+        mov     bl,err&nam      ; default error number
+   if (nam ge 100h)
+        test    ah,nam/256
+   else ;not (nam ge 100h)
+        test    al,nam
+   endif ;(nam ge 100h)
+        JSNZ    signalerror
+        endm
+
+EM_ENTRY eCommonExceptions
+CommonExceptions:
+        mov     ebx,[esp].[OldLongStatus]
+        and		ebx,LongSavedFlags		;preserve condition codes, error flags
+        or		EMSEG:[LongStatusWord],ebx					;merge saved status word, condition codes
+        pop     eax
+        pop     ecx
+        pop     edx
+        pop     ebx
+        add     esp,4                   ; toss esp value
+        pop     ebp
+        pop     esi
+        pop     edi
+        add     esp,8                   ;toss old PrevCodeOff and StatusWord
+        pop     ds
+        call    Emexcept
+        error_return    noerror
+
+ifdef _DOS32EXT
+
+EmExcept PROC C, OldEIP:DWORD, OldCS:DWORD, OldFlags:DWORD
+
+LOCAL   SSAR:DWORD
+LOCAL   ec:_DX32_CONTEXT
+
+    ;*
+    ;*  Set up SS access rights.
+    ;*
+
+        push    ds
+        mov     [ec.R_Eax], eax
+        GetEmData   ds,ax
+
+        mov     eax, ss
+        lar     eax, eax
+        mov     [SSAR], eax
+
+    ;*
+    ;*  Fill in ExceptionContext structure.
+    ;*
+
+
+        mov     [ec.NPXContextFlags], NPX_CONTEXT_FULL
+        mov     [ec.R_Edi], edi
+        mov     [ec.R_Esi], esi
+
+        mov     eax, [ebp]
+        mov     [ec.R_Ebp], eax
+
+        lea     eax, [OldFlags+4]
+        mov     [ec.R_Esp], eax
+
+        mov     [ec.R_Ebx], ebx
+        mov     [ec.R_Edx], edx
+        mov     [ec.R_Ecx], ecx
+
+        mov     eax, EMSEG:[PrevCodeOff]
+
+        mov     [ec.R_Eip], eax
+        mov     eax, [OldFlags]
+        mov     [ec.EFlags], eax
+
+        mov     eax, [OldCS]
+        movzx   eax,ax
+        mov     [ec.SegCs], eax
+        mov     ax,ss
+        movzx   eax,ax
+        mov     [ec.SegSs], eax
+
+        pop     eax
+        movzx   eax,ax
+        mov     [ec.SegDs], eax         ; ds was pushed on entry.
+
+        mov     ax,es
+        movzx   eax,ax
+        mov     [ec.SegEs], eax
+
+        mov     ax,fs
+        movzx   eax,ax
+        mov     [ec.SegFs], eax
+
+        mov     ax,gs
+        movzx   eax,ax
+        mov     [ec.SegGs], eax
+
+        lea     esi, [ec]
+        add     esi, 4
+
+        push    ebp
+        call    SaveState
+        pop     ebp
+
+        lea     eax, [ec]
+        push    ds
+        push    es
+
+        mov     bx, seg FLAT:CURstk
+        mov     ds, ebx
+        mov     es, ebx
+        push    eax
+
+        call    DOS32RAISEEXCEPTION
+
+        add     esp, 4
+
+        pop     es
+        pop     ds
+
+RaiseExceptRet:
+        or      eax, eax
+        JZ      ExceptNotHandled
+
+    ;*
+    ;* Copy new flags, cs, eip to new stack.
+    ;*
+
+        mov     ds, [ec.SegSs]
+        mov     esi, [ec.R_Esp]     ; ds:esi == new ss:esp
+
+        mov     eax, [ec.Eflags]            ; set up iretd frame
+        mov     [esi-4], eax
+
+        mov     eax, [ec.SegCs]
+        mov     [esi-8], eax
+
+        mov     eax, [ec.R_Eip]
+        mov     [esi-12], eax
+
+    ;*
+    ;*  Put new stack pointer on stack.
+    ;*
+
+        push    ds
+        sub     esi, 12
+        push    esi
+
+    ;*
+    ;*  Reset other registers.
+    ;*
+
+        mov     edi, [ec.R_Edi]
+        mov     esi, [ec.R_Esi]
+        mov     ebx, [ec.R_Ebx]
+        mov     edx, [ec.R_Edx]
+        mov     ecx, [ec.R_Ecx]
+        mov     eax, [ec.R_Eax]
+        mov     ds, [ec.SegDs]
+        mov     es, [ec.SegEs]
+        mov     fs, [ec.SegFs]
+        mov     gs, [ec.SegGs]
+
+        mov     ebp, [ec.R_Ebp]    ; must do this last.
+
+        lss     esp, fword ptr [esp] ; reset ss:esp
+
+        sti                             ; JWM, 9/3/91
+        push    [esp+8]                 ; JWM, 9/6/91
+        popfd                           ; JWM, 9/6/91
+
+        iretd                       ; reset flags, cs, eip
+
+ExceptNotHandled:
+EmExcept        ENDP
+
+endif                   ; ifdef _DOS32EXT
+
+ifdef NT386
+
+ISIZE                   equ     4
+ISizeEC                 equ     (ContextFrameLength + ISIZE - 1) and (not (ISIZE - 1))
+ISizeExceptStruct       equ     (ExceptionRecordLength + ISIZE - 1) and (not (ISIZE - 1))
+
+ec_off          EQU     4+ISizeEc
+estruct_off     EQU     ec_off+ISizeExceptStruct
+
+SSAR            EQU     <[ebp][-4]>
+ec              EQU     <[ebp][-ec_off]>
+eStruct         EQU     <[ebp][-estruct_off]>
+
+OldEIP          EQU     <ebp+8>
+OldCS           EQU     <ebp+12>
+OldFlags        EQU     <ebp+16>
+
+
+EmExcept PROC   NEAR
+
+        push    ebp
+        mov     ebp,esp
+        sub     esp,estruct_off
+
+
+    ;*
+    ;*  Set up SS access rights.
+    ;*
+
+        push    ds
+        mov     [ec.ctx_RegEax], eax
+        GetEmData   ds,ax
+
+        mov     eax, ss
+        lar     eax, eax
+        mov     [SSAR], eax
+
+    ;*
+    ;*  Fill in ExceptionContext structure.
+    ;*
+
+
+        mov     dword ptr [ec.ContextFlags], NPX_CONTEXT_FULL
+        mov     dword ptr [ec.ctx_Cr0NpxState], CR0_EM
+        mov     [ec.ctx_RegEdi], edi
+        mov     [ec.ctx_RegEsi], esi
+
+        mov     eax, [ebp]
+        mov     [ec.ctx_RegEbp], eax
+
+        lea     eax, [OldFlags+4]
+        mov     [ec.ctx_RegEsp], eax
+
+        mov     [ec.ctx_RegEbx], ebx
+        mov     [ec.ctx_RegEdx], edx
+        mov     [ec.ctx_RegEcx], ecx
+
+        mov     eax, [OldEIP]
+
+        mov     [ec.ctx_RegEip], eax
+        mov     eax, [OldFlags]
+        mov     [ec.ctx_EFlags], eax
+
+        mov     eax, [OldCS]
+        movzx   eax,ax
+        mov     [ec.ctx_SegCs], eax
+        mov     ax,ss
+        movzx   eax,ax
+        mov     [ec.ctx_SegSs], eax
+
+        pop     eax
+        movzx   eax,ax
+        mov     [ec.ctx_SegDs], eax             ; ds was pushed on entry.
+
+        mov     ax,es
+        movzx   eax,ax
+        mov     [ec.ctx_SegEs], eax
+
+        mov     ax,fs
+        movzx   eax,ax
+        mov     [ec.ctx_SegFs], eax
+
+        mov     ax,gs
+        movzx   eax,ax
+        mov     [ec.ctx_SegGs], eax
+
+        lea     esi, [ec]
+        add     esi, ctx_env
+
+        or      EMSEG:[StatusWord], 8000H		; set 'busy' bit
+        or      EMSEG:[SWerr], Summary                  ; set Summary bit
+        or      EMSEG:[CURerr], Summary
+
+        mov     cl, EMSEG:[ErrMask]
+        push    ecx
+        push    ebp
+        call    SaveState
+        pop     ebp
+        pop     ecx
+
+        call    GetEMSEGStatusWord                      ; EAX = status word
+        test    al, cl                          ; test status word against mask
+        jne     short Err00
+
+ifdef TRACENPX
+        mov     edx, 0C1020304h                 ; Raise bogus exception code, to trace with
+        jmp     short Err50
+endif
+        mov     al, Invalid
+
+;
+; According to the floating error priority, we test what is the cause of
+; the NPX error and raise an appropriate exception.
+;
+
+Err00:
+        test    al, Invalid                     ; Invalid Op?
+        jz      short Err10                     ; No, go check next
+
+        mov     edx, XCPT_FLOAT_INVALID_OPERATION
+        test    al, StackFlag                   ; Stack fault?
+        jz      short Err50                     ; No, go raise invalid op
+        mov     edx, XCPT_FLOAT_STACK_CHECK
+        jmp     short Err50                     ; Go raise stack fault
+
+Err10:  mov     edx, XCPT_FLOAT_DIVIDE_BY_ZERO
+        test    al, ZeroDivide
+        jnz     short Err50
+        mov     edx, XCPT_FLOAT_DENORMAL_OPERAND
+        test    al, Denormal
+        jnz     short Err50
+        mov     edx, XCPT_FLOAT_OVERFLOW
+        test    al, Overflow
+        jnz     short Err50
+        mov     edx, XCPT_FLOAT_UNDERFLOW
+        test    al, Underflow
+        jnz     short Err50
+        mov     edx, XCPT_FLOAT_INEXACT_RESULT
+
+Err50:  mov     [eStruct.ExceptionNum], edx
+
+        xor     eax,eax
+        mov     [eStruct.fHandlerFlags], eax
+        mov     [eStruct.NestedExceptionReportRecord], eax
+        mov     dword ptr [eStruct.CParameters], 1      ; GeorgioP convention
+        mov     [eStruct.ErExceptionInformation], eax   ; GeorgioP convention
+
+        mov     eax, EMSEG:[PrevCodeOff]
+        mov     [eStruct.ExceptionAddress], eax
+
+        lea     edx, [eStruct]
+
+        lea     eax, [ec]
+        push    ds
+        push    es
+
+
+;TRUE, this is a first-chance exception
+
+        stdCall _NtRaiseException,<edx, eax, 1>
+        stdCall _RtlRaiseStatus, <eax>
+
+        pop     es
+        pop     ds
+
+RaiseExceptRet:
+        or      eax, eax
+        JZ      ExceptNotHandled
+
+    ;*
+    ;* Copy new flags, cs, eip to new stack.
+    ;*
+
+        mov     ds, [ec.ctx_SegSs]
+        mov     esi, [ec.ctx_RegEsp]        ; ds:esi == new ss:esp
+
+        mov     eax, [ec.ctx_Eflags]        ; set up iretd frame
+        mov     [esi-4], eax
+
+        mov     eax, [ec.ctx_SegCs]
+        mov     [esi-8], eax
+
+        mov     eax, [ec.ctx_RegEip]
+        mov     [esi-12], eax
+
+    ;*
+    ;*  Put new stack pointer on stack.
+    ;*
+
+        push    ds
+        sub     esi, 12
+        push    esi
+
+    ;*
+    ;*  Reset other registers.
+    ;*
+
+        mov     edi, [ec.ctx_RegEdi]
+        mov     esi, [ec.ctx_RegEsi]
+        mov     ebx, [ec.ctx_RegEbx]
+        mov     edx, [ec.ctx_RegEdx]
+        mov     ecx, [ec.ctx_RegEcx]
+        mov     eax, [ec.ctx_RegEax]
+        mov     ds, [ec.ctx_SegDs]
+        mov     es, [ec.ctx_SegEs]
+        mov     fs, [ec.ctx_SegFs]
+        mov     gs, [ec.ctx_SegGs]
+
+        mov     ebp, [ec.ctx_RegEbp]    ; must do this last.
+
+        lss     esp, fword ptr [esp] ; reset ss:esp
+
+        sti                             ; JWM, 9/3/91
+        push    [esp+8]                 ; JWM, 9/6/91
+        popfd                           ; JWM, 9/6/91
+
+        iretd                       ; reset flags, cs, eip
+
+ExceptNotHandled:
+EmExcept        ENDP
+
+endif                   ; ifdef NT386
+
+ifdef  DEBUG
+
+lab PageFault
+        mov     al, byte ptr cs:[iax]
+        ret
+endif
diff --git a/private/ntos/dll/i386/emf386.asm b/private/ntos/dll/i386/emf386.asm
new file mode 100644
index 000000000..40e92abda
--- /dev/null
+++ b/private/ntos/dll/i386/emf386.asm
@@ -0,0 +1,552 @@
+        subttl  emf386.asm - 32 bit Emulator Interrupt Handler
+        page
+;***
+;emf386.asm - 32 bit Emulator Interrupt Handler
+;
+;        IBM/Microsoft Confidential
+;
+;        Copyright (c) IBM Corporation 1987, 1989
+;        Copyright (c) Microsoft Corporation 1987, 1989
+;
+;        All Rights Reserved
+;
+;Purpose:
+;       32 bit Emulator Interrupt Handler
+;
+;Revision History:  (also see emulator.hst)
+;
+;    1/21/92  JWM   Minor modifications for DOSX32 emulator
+;    8/23/91  TP    Reduce to only two decoding steps
+;
+;*******************************************************************************
+
+
+;*********************************************************************;
+;                                                                     ;
+;         Main Entry Point and Address Calculation Procedure          ;
+;                                                                     ;
+;               80386 version                                         ;
+;                                                                     ;
+;*********************************************************************;
+;
+; This routine fetches the 8087 instruction, calculates memory address
+; if necessary into ES:ESI and calls a routine to emulate the instruction.
+; Most of the dispatching is done through tables. (see comments in CONST)
+;
+; The instruction dispatching is designed to favor the 386 addressing modes
+
+
+ifdef _DOS32EXT                 ; JWM
+public __astart
+__astart:
+        mov     eax, 1
+        ret
+
+public _Ms32KrnlHandler
+_Ms32KrnlHandler:
+endif
+
+ifdef   NT386
+
+;
+; NPXEmulatorTable is a table read by the Windows/NT kernel in
+; order to support the R3 emulator
+;
+public _NPXEMULATORTABLE
+_NPXEMULATORTABLE   label   dword
+        dd      offset NpxNpHandler     ; Address of Ring3 Trap7 handler
+        dd      offset tRoundMode       ; Address of rounding vector table
+endif
+
+public NPXNPHandler
+NPXNPHandler:
+
+ifdef  DEBUG
+        int     3
+endif
+        cld                             ; clear direction flag forever
+
+ifdef NT386
+
+
+;-- BUGBUG - bryanwi - 16Oct91 - Hack FP fix, not pointing IDT:7 at this
+;   routine for 16bit code is the right thing to do.
+;
+;   Check to see if we are running on flat SS.  If so, assume things
+;   are OK and proceed.  (If a 16bit app loads the flat SS and then
+;   does an FP instruction, they're hosed, no skin off our nose.)
+;
+;   If SS not what we expect, then either (a) a flat apps is *very*
+;   confused, or (b) a 16 bit app has hit an FP instuction.  In either
+;   case, this emulator is not going to work.  Therefore, raise an exception.
+;
+
+        push    ax                      ; use form that will word with any SS
+        mov     ax,ss
+        or      ax,RPL_MASK
+        cmp     ax,(KGDT_R3_DATA OR RPL_MASK)
+        pop     ax
+        jz      OK_Segment              ; Segments are OK, proceed normally.
+
+        jmp     Around
+
+_DATA   SEGMENT  DWORD USE32 PUBLIC 'DATA'
+
+    align 4
+
+EmerStk         db      1024 dup (?)                    ; *** SaveContext is assumed to be
+SaveContext     db  size ContextFrameLength  dup (?)    ; *** at the top of the EmerStk by
+SaveException   db  size ExceptionRecordLength dup (?)  ; *** the function @ 13f:0
+
+_DATA   ENDS
+
+Around:
+;
+;   Trap occured in 16bit code, get to flat environment and raise exception
+;
+
+        push    eax                             ; save EAX on old stack
+        mov     ax, ds
+        push    eax                             ; Save DS on old stack
+
+        mov     ax,(KGDT_R3_DATA OR RPL_MASK)
+        mov     ds,ax
+    ASSUME  DS:FLAT
+
+        pop     dword ptr [SaveContext.CsSegDs] ; remove ds  from old stack
+        pop     dword ptr [SaveContext.CsEax]   ; remove eax from old stack
+        pop     dword ptr [SaveContext.CsEip]   ; copy eip   from old stack
+        pop     dword ptr [SaveContext.CsSegCs] ; copy cs    from old stack
+        pop     dword ptr [SaveContext.CsEflags] ; copy eflag from old stack
+
+        push    dword ptr [SaveContext.CsEFlags] ; restore eflag to old stack
+        push    dword ptr [SaveContext.CsSegCs] ; restore cs    to old stack
+        push    dword ptr [SaveContext.CsEip]   ; restore eip   to old stack
+        mov     dword ptr [SaveContext.CsEsp], esp
+
+;
+; Build rest of context frame
+;
+
+        mov     dword ptr [SaveContext.CsContextFlags],CONTEXT_CONTROL OR CONTEXT_SEGMENTS OR CONTEXT_INTEGER
+        mov     dword ptr [SaveContext.CsEbx], ebx
+        mov     dword ptr [SaveContext.CsEcx], ecx
+        mov     dword ptr [SaveContext.CsEdx], edx
+        mov     dword ptr [SaveContext.CsEsi], esi
+        mov     dword ptr [SaveContext.CsEdi], edi
+        mov     dword ptr [SaveContext.CsEbp], ebp
+        mov     dword ptr [SaveContext.CsSegEs], es
+        mov     dword ptr [SaveContext.CsSegFs], fs
+        mov     dword ptr [SaveContext.CsSegGs], gs
+        mov     dword ptr [SaveContext.CsSegSs], ss
+
+        mov     ss,ax                   ; Switch to new stack
+        mov     esp,(OFFSET FLAT:EmerStk) + 1024
+    ASSUME  SS:FLAT
+
+;
+;   ss: flat, esp -> EmerStk
+;
+
+        mov     ax,KGDT_R3_TEB OR RPL_MASK
+        mov     fs, ax
+        mov     ecx, fs:[TbVdm]
+        or      ecx, ecx
+        jne     short DoVdmFault
+
+        mov     ecx, offset SaveContext         ; (ecx) -> context record
+        mov     edx, offset SaveException       ; (edx) -> exception record
+
+        mov     dword ptr [edx.ErExceptionCode],STATUS_ILLEGAL_FLOAT_CONTEXT
+        mov     dword ptr [edx.ErExceptionFlags],0
+        mov     dword ptr [edx.ErExceptionRecord],0
+        mov     ebx, [ecx.CsEip]
+        mov     [edx.ErExceptionAddress],ebx
+        mov     [edx.ErNumberParameters],0
+
+;
+;   ZwRaiseException(edx=ExceptionRecord, ecx=ContextRecord, TRUE=FirstChance)
+;
+
+        stdCall _ZwRaiseException, <edx, ecx, 1>
+
+;
+;   If we come back HERE, things are hosed.  We cannot bugcheck because
+;   we are in user space, so int-3 and loop forever instead.
+;
+
+Forever:
+        int     3
+        jmp     short Forever
+
+DoVdmFault:
+;
+; Does the VDM want the fault, or should the instruction be skipped
+;
+        test    ds:[ecx].VtVdmContext.CsFloatSave.FpCr0NpxState, CR0_EM
+        jz      short SkipNpxInstruction
+
+        add     dword ptr [SaveContext.CsEsp], 12   ; remove from old stack
+
+; jump to the dos extender NPX exception handler
+
+;       jmp     far ptr 013fh:0
+        db      0eah
+        dd      0
+        dw      013fh
+
+SkipNpxInstruction:
+        mov     ax,(KGDT_R3_DATA OR RPL_MASK)
+        mov     es,ax
+
+        stdCall _NpxNpSkipInstruction, <offset SaveContext>
+
+        mov     ebx, dword ptr [SaveContext.CsEbx]
+        mov     ecx, dword ptr [SaveContext.CsEcx]
+        mov     edx, dword ptr [SaveContext.CsEdx]
+        mov     edi, dword ptr [SaveContext.CsEdi]
+        mov     esi, dword ptr [SaveContext.CsEsi]
+        mov     ebp, dword ptr [SaveContext.CsEbp]
+        mov     gs,  dword ptr [SaveContext.CsSegGs]
+        mov     fs,  dword ptr [SaveContext.CsSegFs]
+        mov     es,  dword ptr [SaveContext.CsSegEs]
+
+        mov     eax, dword ptr [SaveContext.CsEsp]
+        mov     ss,  dword ptr [SaveContext.CsSegSs]  ; switch to original stack
+        mov     esp, eax
+
+        add     esp, 12                     ; remove eflag, cs, eip
+        push    dword ptr [SaveContext.CsEflags]
+        push    dword ptr [SaveContext.CsSegCs]
+        push    dword ptr [SaveContext.CsEip]
+        mov     eax, dword ptr [SaveContext.CsEax]
+        mov     ds,  dword ptr [SaveContext.CsSegDs]
+
+        iretd                               ; restore eflag, cs, eip
+
+OK_Segment:
+endif
+
+
+        push    ds                      ; save segment registers
+
+        GetEmData   ds
+
+        push    EMSEG:[LongStatusWord]  ;In case we're saving status
+        push    EMSEG:[PrevCodeOff]     ;In case we save environment
+;Save registers in order of their index number
+        push    edi
+        push    esi
+        push    ebp
+        push    esp
+        add     dword ptr [esp],regFlg-regESP   ; adjust to original esp
+        push    ebx
+        push    edx
+        push    ecx
+        push    eax
+
+        cmp     EMSEG:[Einstall], 0     ; Make sure emulator is initialized.
+        je      InstalEm
+
+EmInstalled:
+        mov     edi,[esp].regEIP            ;edi = 387 instruction address
+        movzx   edx, word ptr cseg:[edi]    ;dx = esc and opcode
+
+; Check for unmasked errors
+        mov     al, EMSEG:[CURerr]      ; fetch errors
+        and     al, EMSEG:[ErrMask]
+        jnz     short PossibleException
+
+; UNDONE: rip test for FWAIT in final version
+        cmp     dl, 9bh                 ;FWAIT?
+        je      sawFWAIT
+
+NoException:
+Execute387inst:
+;Enter here if look-ahead found another 387 instruction
+        mov     EMSEG:[PrevCodeOff],edi
+        mov     EMSEG:[CurErrCond],0    ;clear error and cond. codes, show busy
+        add     edi, 2                  ; point past opcode
+        
+;CONSIDER:  remove the two instruction below and muck with EA386Tab
+;CONSIDER:  to optimize for mem ops instead of reg ops.
+        add     dh,40h                  ; No effective address?
+        jc      NoEffectiveAddress0     ;  yes, go do instruction
+        rol     dh,2                    ; rotate MOD field next to r/m field
+        mov     bl,dh
+        and     ebx,1FH                 ; Mask to MOD and r/m fields
+MemModeDispatch:                        ;Label for debugging
+        jmp     EA386Tab[4*ebx]
+
+
+InstalEm:
+        call    EmulatorInit
+        mov     edi,DefaultControlWord  ; Default mode to start in
+        mov     eax, edi
+        call    SetControlWord          ; Set it
+        mov     EMSEG:[LongControlWord], edi    ; reset reserved bits
+        jmp     EmInstalled
+
+; ************************
+
+;
+; We are about to execute a new FP instruction and there is an
+; unmasked expcetion.  Check to see if the new FP instruction is
+; a "no wait" instruction.   If so, let it proceede; otherwise, raise
+; the exception.
+;
+
+PossibleException:
+        cmp     edx, 0E3DBh             ; if fninit, no exception
+        je      short NoException
+
+        cmp     edx, 0E2DBh             ; if fnclex, no exception
+        je      short NoException
+
+        cmp     edx, 0E0DFh             ; if "fnstsw ax", no exception
+        je      short NoException
+
+        cmp     dl, 0D9h                ; possible encoding for fnstenv or fnstcw?
+        je      short pe20              ; yes, check mod r/m
+        cmp     dl, 0DDh                ; possible encoding for fnsave or fnstsw?
+        jne     short pe30
+
+pe20:   mov     bl, dh                  ; bl = op2
+        shr     bl, 3
+        and     bl, 7                   ; bl = mod r/m
+        cmp     bl, 6                   ; is it a 6 or 7?
+        jnc     short NoException       ; yes, no exception
+
+pe30:
+        jmp     CommonExceptions        ; unmasked exception is pending, raise it
+
+; ************************
+
+
+
+;       386 address modes
+
+;       SIB does not handle SS overrides for ebp
+
+SIB     macro   modval
+        local   SIBindex,SIBbase
+
+        movzx   ebx,byte ptr cseg:[edi] ; ebx = SIB field
+        inc     edi                     ; bump past SIB field
+        mov     eax,ebx
+        and     al,7                    ; mask down to base register
+
+if      modval eq 0
+        cmp     al,5                    ; base = ebp
+        jne     short SIBbase           ;   yes - get base register value
+        mov     eax,cseg:[edi]          ; eax = disp32
+        add     edi,4                   ; bump past displacement
+        SKIP    3,SIBindex
+endif
+
+SIBbase:
+        mov     eax,[esp+4*eax]         ; eax = base register value
+
+SIBindex:
+        mov     [esp].regESP,0          ; no esp indexing allowed
+        mov     cl,bl
+        shr     cl,6                    ; cl = scale factor
+        and     bl,7 shl 3              ; ebx = 8 * index register
+        shr     bl,1
+        mov     esi,[esp+1*ebx]         ; esi = index register value
+        shl     esi,cl                  ; esi = scaled index register value
+        add     esi,eax                 ; esi = SIB address value
+        endm
+
+
+        ALIGN   4
+
+SIB00:
+        SIB     00                      ; decode SIB field
+        jmp     CommonMemory
+
+        ALIGN   4
+
+SIB01:
+        SIB     01                      ; decode SIB field
+        movsx   eax,byte ptr cseg:[edi]
+        inc     edi
+        add     esi,eax
+        jmp     short CommonMemory
+
+        ALIGN   4
+
+SIB10:
+        SIB     10                      ; decode SIB field
+        mov     eax,cseg:[edi]
+        add     edi,4
+        add     esi,eax
+        jmp     short CommonMemory
+
+
+;       386 single register addressing
+
+        ALIGN   4
+
+Exx00:
+        and     bl,7 shl 2              ; mask off mod bits
+        mov     esi,[esp+1*ebx]
+        jmp     short CommonMemory
+
+        ALIGN   4
+
+Exx01:
+        and     bl,7 shl 2              ; mask off mod bits
+        mov     esi,[esp+1*ebx]
+        movsx   eax,byte ptr cseg:[edi]
+        inc     edi
+        add     esi,eax
+        jmp     short CommonMemory
+
+        ALIGN   4
+
+Exx10:
+        and     bl,7 shl 2              ; mask off mod bits
+        mov     esi,[esp+1*ebx]
+        add     esi,cseg:[edi]
+        add     edi,4
+        jmp     short CommonMemory
+
+
+;       386 direct addressing
+
+        ALIGN   4
+
+Direct386:
+        mov     esi,cseg:[edi]
+        add     edi,4
+
+CommonMemory:
+        MOV     [esp].regEIP,edi        ; final return offset
+
+
+; At this point ESI = memory address, dx = |Op|r/m|MOD|escape|MF|Arith|
+; Current format of opcode and address mode bytes (after rol dh,2)
+;
+;  7 6 5 4 3 2 1 0
+; |1 1 0 1 1| op1 |   dl
+;
+;  7 6 5 4 3 2 1 0
+; | op2 | r/m |mod|   dh
+;
+;op1 and op2 fields together make the FP opcode
+
+        rol     dx,5                    ; dl = | op1 | op2 |? ?|
+        and     edx,0FCH                ;Keep only op1 & op2 bits
+        push    offset EMLFINISH
+        mov     edi,EMSEG:[CURstk]
+MemOpDisp:                              ;Debugging label
+;edi = [CURstk]
+        jmp     tOpMemDisp[edx]
+
+
+        ALIGN   4
+
+
+NoEffectiveAddress0:
+        rol     dh,2
+NoEffectiveAddress:                     ; Either Register op or Miscellaneous
+        mov     [esp].regEIP,edi        ; final return offset
+
+;Current format of opcode and address mode bytes (after rol dh,2)
+;
+;  7 6 5 4 3 2 1 0
+; |1 1 0 1 1| op1 |   dl
+;
+;  7 6 5 4 3 2 1 0
+; | op2 | r/m |mod|   dh
+;
+;op1 and op2 fields together make the FP opcode
+
+        mov     al,dh                   ;Save r/m bits (contains reg. no.)
+        rol     dx,5                    ; dl = | op1 | op2 |? ?|
+        and     edx,0FCH                ;Keep only op1 & op2 bits
+        push    offset EMLFINISH
+        and     eax,7 shl 2             ;Mask to register number * 4
+        mov     edi,EMSEG:[CURstk]
+        lea     esi,[2*eax+eax]         ;Register no. * 12
+        add     esi,edi
+        cmp     esi,ENDstk              ;Run past end?
+        jae     RegWrap
+RegOpDisp:                              ;Debugging label
+;eax = r/m bits * 4
+;esi = FP register address
+;edi = [CURstk]
+        jmp     tOpRegDisp[edx]
+
+        ALIGN   4
+RegWrap:
+        sub     esi,ENDstk - BEGstk     ;Wrap around    JWM
+RegOpDispWrap:                          ;Debugging label
+        jmp     tOpRegDisp[edx]
+
+
+SawFwait:
+        inc     edi                     ; bump past FWAIT
+        mov     [esp].regEIP,edi        ; final return offset
+        mov     EMSEG:[CURErr],0        ; clear current error and cond. codes
+
+; return from routine;  restore registers and return
+
+        align   4
+EMLFINISH:
+; check for errors
+        mov     al, EMSEG:[CURerr]      ; fetch errors
+        or      al, EMSEG:[SWerr]
+        mov     EMSEG:[SWerr],al        ; set errors in sticky error flag
+        and     al,EMSEG:[ErrMask]
+        jnz     CommonExceptions
+
+ifdef TRACENPX
+        jmp     CommonExceptions
+endif
+
+if DBG eq 0
+
+;
+; On a free build, look ahead to next instruction
+;
+
+;09BH is FWAIT - just skip it
+;0D8H - 0DFH is 387 instruction, emulate it
+        mov     edi,[esp].regEIP        ;edi = 387 instruction address
+        mov     dx,cseg:[edi]
+        cmp     dl,09BH                 ;FWAIT?
+        jz      short SawFwait
+        sub     dl,0D8H
+        cmp     dl,8
+        jb      ReExecute
+endif
+        mov     ebx,[esp].[OldLongStatus]
+        and		ebx,LongSavedFlags		;preserve condition codes, error flags
+        or		EMSEG:[LongStatusWord],ebx	;merge saved status word, condition codes
+        
+        pop     eax
+        pop     ecx
+        pop     edx
+        pop     ebx
+        add     esp,4                   ; toss esp value
+        pop     ebp
+        pop     esi
+        pop     edi
+        add     esp,8                   ;toss old PrevCodeOff and StatusWord
+        mov     EMSEG:[CURerr],Summary  ;Indicate we are not busy
+        pop     ds
+        error_return                    ; common exit sequence
+
+ReExecute:
+        mov     eax,EMSEG:[LongStatusWord]
+        mov     ebx,[esp].[OldLongStatus]
+        and		ebx,LongSavedFlags		;preserve condition codes, error flags
+        or		eax,ebx					;merge saved status word, condition codes
+        mov     [esp].OldLongStatus,eax
+        mov     eax,EMSEG:[PrevCodeOff]
+        mov     [esp].OldCodeOff,eax
+        lea		eax,[esp+regFlg+4]		;must restore "saved" esp
+        mov		[esp].RegEsp,eax
+        jmp     Execute387inst
diff --git a/private/ntos/dll/i386/emfadd.asm b/private/ntos/dll/i386/emfadd.asm
new file mode 100644
index 000000000..cdd0a8f58
--- /dev/null
+++ b/private/ntos/dll/i386/emfadd.asm
@@ -0,0 +1,396 @@
+	subttl  emfadd.asm - Addition and Subtraction
+	page
+;*******************************************************************************
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;emfadd.asm - long double add and subtract
+;	by Tim Paterson
+;
+;Purpose:
+;	Long double add/subtract.
+;Outputs:
+;	Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+;*******************************************************************************
+; Dispatch for Add/Sub/Subr
+;
+; Signs are passed in dx:
+;       xor source sign with dl
+;       xor dest sign with dh
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+.erre   TAG_SNGL        eq      0       ;SINGLE: low 32 bits are zero
+.erre   TAG_VALID       eq      1
+.erre   TAG_ZERO        eq      2
+.erre   TAG_SPCL        eq      3       ;NAN, Infinity, Denormal, Empty
+;Any special case routines not found in this file are in emarith.asm
+tFaddDisp	label	dword		;Source (reg)	Dest (*[di])
+	dd	AddDouble		;single		single
+	dd	AddDouble		;single		double
+	dd	AddSourceSign		;single		zero
+	dd	AddSpclDest		;single		special
+	dd	AddDouble		;double		single
+	dd	AddDouble		;double		double
+	dd	AddSourceSign		;double		zero
+	dd	AddSpclDest		;double		special
+	dd	AddDestSign		;zero		single
+	dd	AddDestSign		;zero		double
+	dd	AddZeroZero		;zero		zero
+	dd	AddSpclDest		;zero		special
+	dd	AddSpclSource		;special	single
+	dd	AddSpclSource		;special	double
+	dd	AddSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	AddTwoInf		;Two infinities
+
+EM_ENTRY eFISUB16
+eFISUB16:
+        call    Load16Int
+        mov     dx,bSign                ;Change sign of source
+        jmp     AddSetResult
+
+EM_ENTRY eFISUBR16
+eFISUBR16:
+        call    Load16Int
+        mov     dx,bSign shl 8          ;Change sign of dest
+        jmp     AddSetResult
+
+EM_ENTRY eFIADD16
+eFIADD16:
+        call    Load16Int
+        xor     edx,edx                 ;Both signs positive
+        jmp     AddSetResult
+
+EM_ENTRY eFISUB32
+eFISUB32:
+        call    Load32Int
+        mov     dx,bSign                ;Change sign of source
+        jmp     AddSetResult
+
+EM_ENTRY eFISUBR32
+eFISUBR32:
+        call    Load32Int
+        mov     dx,bSign shl 8          ;Change sign of dest
+        jmp     AddSetResult
+
+EM_ENTRY eFIADD32
+eFIADD32:
+        call    Load32Int
+        xor     edx,edx                 ;Both signs positive
+        jmp     AddSetResult
+
+EM_ENTRY eFSUB32
+eFSUB32:
+        call    Load32Real
+        mov     dx,bSign                ;Change sign of source
+        jmp     AddSetResult
+
+EM_ENTRY eFSUBR32
+eFSUBR32:
+        call    Load32Real
+        mov     dx,bSign shl 8          ;Change sign of dest
+        jmp     AddSetResult
+
+EM_ENTRY eFADD32
+eFADD32:
+        call    Load32Real
+        xor     edx,edx                 ;Both signs positive
+        jmp     AddSetResult
+
+EM_ENTRY eFSUB64
+eFSUB64:
+        call    Load64Real
+        mov     dx,bSign                ;Change sign of source
+        jmp     AddSetResult
+
+EM_ENTRY eFSUBR64
+eFSUBR64:
+        call    Load64Real
+        mov     dx,bSign shl 8          ;Change sign of dest
+        jmp     AddSetResult
+
+EM_ENTRY eFADD64
+eFADD64:
+        call    Load64Real
+        xor     edx,edx                 ;Both signs positive
+        jmp     AddSetResult
+
+
+PolyAddDouble:
+;This entry point is used by polynomial evaluator.
+;It checks the operand in registers for zero, and doesn't require
+;signs to be set up in dx.
+;
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;edi = pointer to op2 in ds
+	xor	edx,edx			;Addition
+	cmp	cl,bTAG_ZERO		;Adding to zero?
+        jnz     AddDouble
+;Number in registers is zero, so just return value from memory.
+        mov     ecx,EMSEG:[edi].ExpSgn
+        mov     ebx,EMSEG:[edi].lManHi
+        mov     esi,EMSEG:[edi].lManLo
+        ret
+
+EM_ENTRY eFSUBPreg
+eFSUBPreg:
+        push    offset PopWhenDone
+
+EM_ENTRY eFSUBreg
+eFSUBreg:
+        xchg    esi,edi
+
+EM_ENTRY eFSUBtop
+eFSUBtop:
+        mov     dx,bSign                ;Change sign of source
+        jmp     AddHaveSgn
+
+EM_ENTRY eFSUBRPreg
+eFSUBRPreg:
+        push    offset PopWhenDone
+
+EM_ENTRY eFSUBRreg
+eFSUBRreg:
+        xchg    esi,edi
+
+EM_ENTRY eFSUBRtop
+eFSUBRtop:
+        mov     dx,bSign shl 8          ;Change sign of dest
+        jmp     AddHaveSgn
+
+
+InsignifAdd:
+	mov	eax,1			;Set sticky bit
+	shl	ch,1			;Get sign, CY set IFF subtracting mant.
+	jnc	ReturnOp1
+	sub	esi,eax			;Subtract 1 from mantissa
+	sbb	ebx,0
+	neg	eax
+ReturnOp1:
+;ebx:esi:eax = normalized unrounded mantissa
+;high half of ecx = exponent
+;high bit of ch = sign
+	jmp	EMSEG:[RoundMode]
+
+EM_ENTRY eFADDPreg
+eFADDPreg:
+        push    offset PopWhenDone
+
+EM_ENTRY eFADDreg
+eFADDreg:
+        xchg    esi,edi
+
+EM_ENTRY eFADDtop
+eFADDtop:
+        xor     edx,edx                 ;Both signs positive
+AddHaveSgn:
+        mov     ecx,EMSEG:[esi].ExpSgn
+        mov     ebx,EMSEG:[esi].lManHi
+        mov     esi,EMSEG:[esi].lManLo
+AddSetResult:
+        mov     ebp,offset tFaddDisp
+        mov     EMSEG:[Result],edi            ;Save result pointer
+        mov     al,cl
+        mov     ah,EMSEG:[edi].bTag
+        test    ax,ZEROorSPCL * 100H + ZEROorSPCL
+        jnz     TwoOpDispatch
+
+;.erre   AddDouble eq $                  ;Fall into AddDouble
+
+;*********
+AddDouble:
+;*********
+;
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;dl = sign change for op1
+;dh = sign change for op2
+;edi = pointer to op2
+
+	xor	ch,dl			;Flip sign if subtracting
+	mov	eax,EMSEG:[edi].ExpSgn
+	xor	ah,dh			;Flip sign if subtracting
+	mov	edx,EMSEG:[edi].lManHi
+	mov	edi,EMSEG:[edi].lManLo
+
+AddDoubleReg:
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+
+	cmp	eax,ecx			;Compare exponents
+.erre	TexpBias eq 0			;Not biased, use signed jump
+	jle	short HavLg		;op1 is larger, we have the right order
+	xchg	esi,edi
+	xchg	ebx,edx
+	xchg	eax,ecx
+HavLg:
+;Larger in ebx:esi.  Note that if the exponents were equal, things like
+;the sign bit or tag may have determined which is "larger".  It doesn't
+;matter which is which if the exponents are equal, however.
+	and	ah,80H			;Keep sign bit
+	sar	ch,1			;Extend sign into bit 6 of byte
+	xor	ch,ah			;See if signs are the same
+	xor	ax,ax			;Clear out sign and tag
+	neg	eax			;ax still 0
+	add	eax,ecx			;Get exponent difference
+	shr	eax,16			;Bring exp. difference down to low end
+	jz	short Aligned
+	cmp	eax,64+1		;Is difference in range?
+;CONSIDER: tell me again why 1/4 LSB could have effect.  It seems like
+;CONSIDER: 1/2 LSB is the limit.
+	ja	short InsignifAdd	;  (Even 1/4 LSB could have effect)
+	mov	cl,al			;Shift count to cl
+;High half ecx = exponent
+;ch bit 7 = sign difference
+;ch bit 6 = sign
+;cl = shift count
+	xor	eax,eax			;Prepare to take bits shifted out
+	cmp	cl,32			;More than a whole word?
+	jb	short ShortShift
+	xchg	eax,edx			;Save bits shifted out in eax
+	xchg	edi,eax
+	sub	cl,32
+	cmp	cl,8			;Safe to shift this much
+	jb	short ShortSticky
+;Collapse all (sticky) bits of eax into LSB of edi
+	neg	eax			;Sets CY if eax was not zero
+	sbb	eax,eax			;-1 if CY was set, zero otherwise
+	neg	eax			;Sticky bit in LSB only
+	or	di,ax			;Move sticky bit up
+	cmp	cl,32			;Less than another Dword?
+	jb	short ShortShift
+	mov	eax,edi
+	xor	edi,edi			;edx = edi = 0
+ShortSticky:
+;Shift will not be more than 8 bits
+	or	ah,al			;Move up sticky bits
+ShortShift:
+	shrd	eax,edi,cl		;Save bits shifted out in eax
+	shrd	edi,edx,cl
+	shr	edx,cl
+Aligned:
+	shl	ch,1			;Were signs the same?
+	jc	short SubMant		;No--go subtract mantissas
+;Add mantissas
+	add	esi,edi
+	adc	ebx,edx
+	jnc	short AddExit
+;Addition of mantissas overflowed. Bump exponent and shift right
+	shrd	eax,esi,1
+	shrd	esi,ebx,1		;Faster than RCR
+	sar	ebx,1
+	or	ebx,1 shl 31		;Set MSB
+	add	ecx,1 shl 16
+AddExit:
+;ebx:esi:eax = normalized unrounded mantissa
+;high half of ecx = exponent
+;high bit of ch = sign
+	jmp	EMSEG:[RoundMode]
+
+NegMant:
+;To get here, exponents must have been equal and op2 was bigger than op1.
+;Note that this means nothing ever got shifted into eax.
+	not	ch			;Change sign of result
+	not	ebx
+	neg	esi
+	sbb	ebx,-1
+	js	short AddExit		;Already normalized?
+	test	ebx,40000000H		;Only one bit out of normal?
+	jz	short NormalizeAdd
+	jmp	short NormOneBit
+
+SubMant:
+;Subtract mantissas
+	neg	eax			;Pretend minuend is zero extended
+	sbb	esi,edi
+	sbb	ebx,edx
+	jc	short NegMant
+	js	short AddExit		;Already normalized?
+NormChk:
+	test	ebx,40000000H		;Only one bit out of normal?
+	jz	short NormalizeAdd
+;One bit normalization
+NormOneBit:
+	sub	ecx,1 shl 16		;Adjust exponent
+ShiftOneBit:				;Entry point from emfmul.asm
+	shld	ebx,esi,1
+	shld	esi,eax,1
+	shl	eax,1
+	jmp	EMSEG:[RoundMode]
+
+;***********
+AddZeroZero:				;Entry point for adding two zeros
+;***********
+	mov	ah,EMSEG:[edi].bSgn	;Get sign of op
+	xor	ch,dl			;Possibly subtracting source
+	xor	ah,dh			;Possibly subtracting dest
+	xor	ch,ah			;Do signs match?
+	js	FindZeroSign		;No - use rounding mode to set sign
+	mov	EMSEG:[edi].bSgn,ah	;Correct the sign if subtracting
+	ret				;Result at [edi] is now correct
+
+ZeroChk:
+;Upper 64 bits were all zero, but there could be 1 bit in the MSB
+;of eax.
+	or	eax,eax
+	jnz	short OneBitLeft
+	mov	ebx,eax
+	mov	esi,eax			;Zero mantissa
+FindZeroSign:
+;Round to -0 if "round down" mode, round to +0 otherwise
+	xor	ecx,ecx			;Zero exponent, positive sign
+	mov	dl,EMSEG:[CWcntl]	;Get control word
+	and	dl,RoundControl
+        cmp	dl,RCdown		;Rounding down?
+	jnz	ZeroJmp
+	mov	ch,80H			;Set sign bit
+ZeroJmp:
+	mov	cl,bTAG_ZERO
+	jmp	EMSEG:[ZeroVector]
+
+OneBitLeft:
+	xchg	ebx,eax			;Bit now normalized
+	sub	ecx,64 shl 16		;Adjust exponent
+	jmp	EMSEG:[RoundMode]
+
+NormalizeAdd:
+;Inputs:
+;	ebx:esi:eax = 65-bit number
+;	ecx high half = exponent
+;
+;Since we are more than 1 bit out of normalization, exponents must have
+;differed by 0 or 1.  Thus rounding will not be necessary for 64 bits.
+	bsr	edx,ebx			;Scan for MSB
+	jnz	short ShortNorm
+	bsr	edx,esi
+	jz	short ZeroChk
+	sub	ecx,32 shl 16		;Adjust exponent
+	mov	ebx,esi			;Push it up 32 bits
+	mov	esi,eax
+ShortNorm:
+;Bit number in edx ranges from 0 to 31
+	mov	cl,dl
+	not	cl			;Convert bit number to shift count
+	shld	ebx,esi,cl
+	shld	esi,eax,cl
+	shl	edx,16			;Move exp. adjustment to high end
+	lea	ecx,[ecx+edx-(31 shl 16)] ;Adjust exponent
+	xor	eax,eax			;No extra bits
+	jmp	EMSEG:[RoundMode]
+
+AddDestSign:
+	xor	EMSEG:[edi].bSgn,dh
+	ret
+
+AddSourceSign:
+	xor	ch,dl
+	jmp	SaveResult
diff --git a/private/ntos/dll/i386/emfcom.asm b/private/ntos/dll/i386/emfcom.asm
new file mode 100644
index 000000000..43b5672c6
--- /dev/null
+++ b/private/ntos/dll/i386/emfcom.asm
@@ -0,0 +1,402 @@
+	subttl  emfcom.asm - Comparison Instructions
+	page
+;*******************************************************************************
+;emfcom.asm - Comparison Instructions
+;
+;        Microsoft Confidential
+;
+;        Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       FCOM,FCOMP,FCOMPP,FUCOM,FUCOMP,FUCOMPP,FTST,FXAM instructions
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;*******************************************************************************
+;Dispatch table for compare
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+.erre   TAG_SNGL        eq      0       ;SINGLE: low 32 bits are zero
+.erre   TAG_VALID       eq      1
+.erre   TAG_ZERO        eq      2
+.erre   TAG_SPCL        eq      3       ;NAN, Infinity, Denormal, Empty
+;Any special case routines not found in this file are in emarith.asm
+tFcomDisp       label   dword           ;Source (reg)   Dest (*[di] = ST)
+        dd      ComDouble               ;single         single
+        dd      ComDouble               ;single         double
+	dd	ComDestZero		;single		zero
+        dd      ComSpclDest             ;single         special
+        dd      ComDouble               ;double         single
+        dd      ComDouble               ;double         double
+        dd      ComDestZero             ;double         zero
+        dd      ComSpclDest             ;double         special
+        dd      ComSrcZero              ;zero           single
+        dd      ComSrcZero              ;zero           double
+        dd      ComEqual                ;zero           zero
+	dd	ComSpclDest		;zero		special
+	dd	ComSpclSource		;special	single
+	dd	ComSpclSource		;special	double
+	dd	ComSpclSource		;special	zero
+	dd	ComBothSpcl		;special	special
+
+
+EM_ENTRY eFICOMP16
+eFICOMP16:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	push	offset ComOpLoaded
+	jmp	Load16Int		;Returns to ComOpLoaded
+
+EM_ENTRY eFICOM16
+eFICOM16:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComOpLoaded
+	jmp	Load16Int		;Returns to ComOpLoaded
+
+EM_ENTRY eFICOMP32
+eFICOMP32:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	push	offset ComOpLoaded
+	jmp	Load32Int		;Returns to ComOpLoaded
+
+EM_ENTRY eFICOM32
+eFICOM32:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComOpLoaded
+	jmp	Load32Int		;Returns to ComOpLoaded
+
+EM_ENTRY eFCOMP32
+eFCOMP32:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	push	offset ComOpLoaded
+	jmp	Load32Real		;Returns to ComOpLoaded
+
+EM_ENTRY eFCOM32
+eFCOM32:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComOpLoaded
+	jmp	Load32Real		;Returns to ComOpLoaded
+
+EM_ENTRY eFCOMP64
+eFCOMP64:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	push	offset ComOpLoaded
+	jmp	Load64Real		;Returns to ComOpLoaded
+
+EM_ENTRY eFCOM64
+eFCOM64:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComOpLoaded
+	jmp	Load64Real		;Returns to ComOpLoaded
+
+EM_ENTRY eFUCOMPP
+eFUCOMPP:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComPop2
+	jmp	eFUCOM0
+
+EM_ENTRY eFUCOMP
+eFUCOMP:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	jmp	eFUCOM0
+
+EM_ENTRY eFUCOM
+eFUCOM:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+eFUCOM0:	
+;esi = pointer to st(i) from instruction field
+;edi = [CURstk]
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+	mov	dl,40H			;Flag FUCOM - Look for SNAN
+	jmp	UComOpLoaded
+
+EM_ENTRY eFCOMPP
+eFCOMPP:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset ComPop2
+	jmp	eFCOM0
+
+EM_ENTRY eFCOMP
+eFCOMP:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	push	offset PopWhenDone
+	jmp	eFCOM0
+
+EM_ENTRY eFCOM
+eFCOM:
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+eFCOM0:
+;esi = pointer to st(i) from instruction field
+;edi = [CURstk]
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+
+ComOpLoaded:
+;	mov	EMSEG:[UpdateCCodes],1
+	mov	dl,0			;flag FCOM - Look for any NAN
+UComOpLoaded:
+	mov     ebp,offset tFcomDisp
+	mov	al,cl
+	mov     ah,EMSEG:[edi].bTag
+	test	ax,ZEROorSPCL * 100H + ZEROorSPCL
+	jnz	TwoOpDispatch
+
+;.erre	ComDouble eq $			;Fall into ComDouble
+
+;*********
+ComDouble:
+;*********
+;
+;ebx:esi = op1 mantissa
+;ecx = op1 sign in bit 15, exponent in high half
+;edi = pointer to op2
+	mov	eax,EMSEG:[edi].ExpSgn
+	and	ax,bSign shl 8		;Keep sign only
+	and	cx,bSign shl 8
+	cmp	ah,ch			;Are signs the same?
+	jnz	StBigger
+	cmp	eax,ecx			;Are exponents the same?
+	jl	StSmaller
+	jg	StBigger
+	cmp	EMSEG:[edi].lManHi,ebx	;Compare mantissas
+	jnz	MantDif
+	cmp	EMSEG:[edi].lManLo,esi	;Set flags for ST - src
+	jz	ComEqual
+MantDif:
+	adc	al,al			;Copy CY flag to bit 0
+	rol	ah,1			;Rotate sign to bit 0
+	xor	al,ah			;Flip saved CY bit if negative
+	mov	EMSEG:[SWcc],al		;Set condition code
+	ret
+
+StSmaller:
+	not	ah
+StBigger:
+;ah = sign of ST
+;ch = sign of other operand
+;ST is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+.erre	C0 eq 1
+	shr	ah,7			;Bring sign down to bit 0, clear CY
+	mov	EMSEG:[SWcc],ah		;Bit set if ST smaller (negative)
+	ret
+
+ComEqual:
+	mov	EMSEG:[SWcc],CCequal
+	ret
+
+
+
+PopWhenDone:
+.erre	bTAG_NOPOP eq -1
+	inc	cl			;OK to pop?
+	jz	ComPopX			;No - had unmasked Invalid Operation
+
+	POPSTret
+
+ComPop2:
+.erre	bTAG_NOPOP eq -1
+	inc	cl			;OK to pop?
+	jz	ComPopX			;No - had unmasked Invalid Operation
+	mov	esi,EMSEG:[CURstk]
+	mov	EMSEG:[esi].bTag,bTAG_EMPTY
+	add	esi,Reg87Len*2
+	cmp	esi,ENDstk			;JWM
+	je	PopOneOver
+	ja	PopTwoOver
+	mov	EMSEG:[esi-Reg87Len].bTag,bTAG_EMPTY
+	mov	EMSEG:[CURstk],esi
+ComPopX:
+	ret
+
+PopOneOver:
+	mov	EMSEG:[CURstk],BEGstk		;JWM
+ifdef NT386
+	mov	EMSEG:[INITstk].bTAG,bTAG_EMPTY
+else
+	mov	EMSEG:[XINITstk].bTAG,bTAG_EMPTY
+endif
+	ret
+
+PopTwoOver:
+	mov	EMSEG:[CURstk],BEGstk+Reg87Len	;JWM
+ifdef NT386
+	mov	EMSEG:[BEGstk].bTAG,bTAG_EMPTY
+else
+	mov	EMSEG:[XBEGstk].bTAG,bTAG_EMPTY
+endif
+	ret
+
+;*******************************************************************************
+;Special cases for FCOM/FUCOM.
+;These don't share with those in emarith.asm because NANs are treated
+;differently.
+ComDestZero:
+;ST is zero, so Src is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+	not	ch			;C0 is 1 if ST < Src
+.erre	C0 eq 1
+	shr	ch,7			;Bring sign down to bit 0
+	mov	EMSEG:[SWcc],ch		;Bit set if Src smaller (negative)
+	ret
+
+ComSrcZero:
+;ST is bigger if it is positive (smaller if it is negative).
+;Use the sign bit directly as the "less than" bit C0.
+	mov	al,EMSEG:[edi].bSgn
+.erre	C0 eq 1
+	shr	al,7			;Bring sign down to bit 0
+	mov	EMSEG:[SWcc],al		;Bit set if ST smaller (negative)
+	ret
+
+ComSpclSource:
+	cmp	cl,bTAG_NAN
+	jz	ComSrcNAN
+	cmp	cl,bTAG_INF
+	jz	ComDestZero
+	cmp	cl,bTAG_DEN
+	jz	ComDenormal
+;Must be empty
+ComEmpty:
+	mov	EMSEG:[CURerr],Invalid+StackFlag
+	jmp	ComChkMask
+
+ComSrcNAN:
+	shl	edx,24			;Move dl to high byte
+	test	ebx,edx			;See if we report error with this NAN
+ComChkNAN:
+	jnz	Incomp
+ComInvalid:
+	mov	EMSEG:[CURerr],Invalid	;Flag the error
+ComChkMask:
+	test	EMSEG:[CWmask],Invalid	;Is exception masked?
+	jnz	Incomp
+	mov	cl,bTAG_NOPOP		;Unmasked, don't pop stack
+Incomp:
+	mov	EMSEG:[SWcc],CCincomprable
+	ret
+
+ComSpclDest:
+	mov	al,EMSEG:[edi].bTag
+	cmp	al,bTAG_INF
+	jz	ComSrcZero
+	cmp	al,bTAG_Empty
+	jz	ComEmpty
+	cmp	al,bTAG_DEN
+	jz	ComDenormal
+;Must be NAN
+ComDestNAN:
+	test	EMSEG:[edi].bMan7,dl		;See if we report error with this NAN
+	jmp	ComChkNAN
+
+ComBothSpcl:
+	mov	al,EMSEG:[edi].bTag
+	cmp	cl,bTAG_EMPTY
+	jz	ComEmpty
+	cmp	al,bTAG_EMPTY
+	jz	ComEmpty
+	cmp	cl,bTAG_NAN
+	jz	ComSrcNAN
+	cmp	al,bTAG_NAN
+	jz	ComDestNAN
+	mov	ah,cl
+	cmp	ax,(bTAG_INF shl 8) + bTag_INF	;Are both Infinity?
+	jz	ComDouble		;If so, compare their signs
+;Must have at least one denormal
+ComDenormal:
+	or	EMSEG:[CURerr],Denormal
+        jmp     ComDouble
+
+;*******************************************************************************
+
+XAM_Unsupported	equ	0
+XAM_NAN		equ	C0
+XAM_Norm	equ	C2
+XAM_Inf		equ	C2+C0
+XAM_Zero	equ	C3
+XAM_Empty	equ	C3+C0
+XAM_Den		equ	C3+C2
+
+tXamTag	label	byte
+.erre	TAG_SNGL	eq	$-tXamTag
+	db	XAM_Norm		;TAG_SNGL
+.erre	TAG_VALID	eq	$-tXamTag
+	db	XAM_Norm		;TAG_VALID
+.erre	TAG_ZERO	eq	$-tXamTag
+	db	XAM_Zero		;TAG_ZERO
+.erre	TAG_EMPTY	eq	$-tXamTag
+	db	XAM_Empty		;TAG_EMPTY
+	db	0
+	db	0
+	db	0
+.erre	TAG_INF 	eq	$-tXamTag
+	db	XAM_Inf 		;TAG_INF
+	db	0
+	db	0
+	db	0
+.erre	TAG_NAN 	eq	$-tXamTag
+	db	XAM_NAN 		;TAG_NAN
+	db	0
+	db	0
+	db	0
+.erre	TAG_DEN 	eq	$-tXamTag
+	db	XAM_Den 		;TAG_DEN
+
+EM_ENTRY eFXAM
+eFXAM:
+;edi = [CURstk]
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	mov	eax,EMSEG:[edi].ExpSgn	;Get sign and tag
+	mov	bl,ah			;Save sign
+	and	bl,bSign		;Keep only sign bit
+	and	eax,0FH			;Save low 4 bits of tag
+	mov	al,tXamTag[eax]		;Lookup cond. codes for this tag
+.erre	C1 eq 2		;Bit 1
+.erre	bSign eq 80H	;Bit 7
+	shr	bl,7-1			;Move sign bit to CC C1
+	or	al,bl
+	mov	EMSEG:[SWcc],al
+	ret
+
+;*******************************************************************************
+
+EM_ENTRY eFTST
+eFTST:
+;edi = [CURstk]
+    and		[esp].[OldLongStatus+4],NOT(ConditionCode SHL 16)	;clear C0,C1,C2,C3
+	mov	eax,EMSEG:[edi].ExpSgn
+	cmp	al,bTAG_ZERO
+	jz	ComEqual
+	ja	TestSpcl
+;Either single or double, non-zero.  Just check sign.
+TestSign:
+	shr	ah,7			;Bring sign down to bit 0
+	mov	EMSEG:[SWcc],ah		;Bit set if negative
+	ret
+
+TestSpcl:
+	cmp	al,bTAG_INF
+	jz	TestSign		;Normal test for Infinity
+	cmp	al,bTAG_EMPTY
+	jz	ComEmpty
+	cmp	al,bTAG_NAN
+	jz	ComInvalid
+;Must be denormal
+	mov	EMSEG:[CURerr],Denormal
+	jmp	TestSign
diff --git a/private/ntos/dll/i386/emfconst.asm b/private/ntos/dll/i386/emfconst.asm
new file mode 100644
index 000000000..210cccf65
--- /dev/null
+++ b/private/ntos/dll/i386/emfconst.asm
@@ -0,0 +1,126 @@
+	subttl	emfconst.asm - Loading of 387 on chip constants
+        page
+;*******************************************************************************
+;emfconst.asm - Loading of 387 on chip constants
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       FLDZ, FLD1, FLDPI, FLDL2T, FLDL2E, FLDLG2, FLDLN2 instructions
+;Inputs:
+;	edi = [CURstk]
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+	PrevStackWrap	edi,Ld1		;Tied to PrevStackElem below
+
+EM_ENTRY eFLD1
+eFLD1:
+;edi = [CURstk]
+	PrevStackElem	edi,Ld1		;Point to receiving location
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is it empty?
+	jnz	FldErr			;in emload.asm
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].lManHi,1 shl 31
+	mov	EMSEG:[edi].ExpSgn,bTAG_SNGL	;Exponent and sign are zero
+	ret
+
+
+	PrevStackWrap	edi,Ldz		;Tied to PrevStackElem below
+
+EM_ENTRY eFLDZ
+eFLDZ:
+;edi = [CURstk]
+	PrevStackElem	edi,Ldz		;Point to receiving location
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is it empty?
+	jnz	FldErr			;in emload.asm
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].lManHi,0
+	mov	EMSEG:[edi].ExpSgn,bTAG_ZERO	;Exponent and sign are zero
+	ret
+
+;*******************************************************************************
+
+;The 5 irrational constants need to be adjusted according to rounding mode.
+
+DefConst	macro	cName,low,high,expon,round
+c&cName&lo	equ	low
+c&cName&hi	equ	high
+c&cName&exp     equ     expon
+c&cName&rnd     equ     round
+	endm
+
+DefConst	FLDL2T,0CD1B8AFEH,0D49A784BH,00001H,0
+
+DefConst	FLDL2E,05C17F0BCH,0B8AA3B29H,00000H,1
+
+DefConst	FLDLG2,0FBCFF799H,09A209A84H,0FFFEH,1
+
+DefConst	FLDLN2,0D1CF79ACH,0B17217F7H,0FFFFH,1
+
+DefConst	FLDPI,02168C235H,0C90FDAA2H,00001H,1
+
+
+LoadConstant   macro   cName,nojmp
+EM_ENTRY e&cName
+e&cName:
+	mov	ebx,c&cName&hi
+	mov	edx,c&cName&lo
+        mov     ecx,c&cName&exp shl 16 + c&cName&rnd
+ifb	<nojmp>
+	jmp	CommonConst
+endif
+	endm
+
+LoadConstant	FLDL2T
+
+LoadConstant	FLDL2E
+
+LoadConstant	FLDLG2
+
+LoadConstant	FLDLN2
+
+LoadConstant	FLDPI,nojmp
+
+CommonConst:
+;ebx:edx = mantissa of constant, rounded to nearest
+;high ecx = exponent 
+;ch = sign
+;cl = rounding flag: 1 indicates roundup occured for round nearest, else 0
+;edi = [CURstk]
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearConst		;Adjust constant if not round nearest
+StoreConst:
+	mov	cl,bTAG_VALID
+	mov	esi,edx
+	jmp	FldCont			;In emload.asm
+
+NotNearConst:
+;It is known that the five constants positive irrational numbers.
+;This means they are never exact, and chop and round down always
+;produce the same answer.  It is also know that the values are such
+;that rounding only alters bits in the last byte.
+;
+;A flag in cl indicates if the number has been rounded up for round
+;nearest (1 = rounded up, 0 = rounded down).  In chop and round down 
+;modes, this flag can be directly subtracted to reverse the rounding.  
+;In round up mode, we want to add (1-flag) = -(flag-1).
+.erre	RCchop eq 0CH			;Two bits set only for chop
+	test	EMSEG:[CWcntl],RCdown	;DOWN bit set?
+	jnz	DirectRoundConst	;If so, it's chop or down
+;Round Up mode
+	dec	cl			;-1 if round up needed, else 0
+DirectRoundConst:
+	sub	dl,cl			;Directed rounding
+	jmp	StoreConst
diff --git a/private/ntos/dll/i386/emfdiv.asm b/private/ntos/dll/i386/emfdiv.asm
new file mode 100644
index 000000000..a06b910f2
--- /dev/null
+++ b/private/ntos/dll/i386/emfdiv.asm
@@ -0,0 +1,473 @@
+	subttl  emfdiv.asm - Division
+	page
+;*******************************************************************************
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;emfdiv.asm - long double divide
+;	by Tim Paterson
+;
+;Purpose:
+;	Long double division.
+;Inputs:
+;	ebx:esi = op1 mantissa
+;	ecx = op1 sign in bit 15, exponent in high half
+;	edi = pointer to op2 and result location
+;	[Result] = edi
+;
+;	Exponents are unbiased.  Denormals have been normalized using
+;	this expanded exponent range.  Neither operand is allowed to be zero.
+;Outputs:
+;	Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;Dispatch tables for division
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  edi points to dividend for fdiv,
+;to divisor for fdivr.  
+;
+;Tag of source is shifted.  Tag values are as follows:
+;
+.erre	TAG_SNGL	eq	0	;SINGLE: low 32 bits are zero
+.erre	TAG_VALID	eq	1
+.erre	TAG_ZERO	eq	2
+.erre	TAG_SPCL	eq	3	;NAN, Infinity, Denormal, Empty
+
+;dest = dest / source
+tFdivDisp	label	dword		;Source (reg)	Dest (*[di])
+	dd	DivSingle		;single		single
+	dd	DivSingle		;single		double
+	dd	XorDestSign		;single		zero
+	dd	DivSpclDest		;single		special
+	dd	DivDouble		;double		single
+	dd	DivDouble		;double		double
+	dd	XorDestSign		;double		zero
+	dd	DivSpclDest		;double		special
+	dd	DivideByZero		;zero		single
+	dd	DivideByZero		;zero		double
+	dd	ReturnIndefinite	;zero		zero
+	dd	DivSpclDest		;zero		special
+	dd	DivSpclSource		;special	single
+	dd	DivSpclSource		;special	double
+	dd	DivSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	ReturnIndefinite	;Two infinities
+
+;dest = source / dest
+tFdivrDisp	label	dword		;Source (reg)	Dest (*[di])
+	dd	DivrSingle		;single		single
+	dd	DivrDouble		;single		double
+	dd	DivideByZero		;single		zero
+	dd	DivrSpclDest		;single		special
+	dd	DivrSingle		;double		single
+	dd	DivrDouble		;double		double
+	dd	DivideByZero		;double		zero
+	dd	DivrSpclDest		;double		special
+	dd	XorSourceSign		;zero		single
+	dd	XorSourceSign		;zero		double
+	dd	ReturnIndefinite	;zero		zero
+	dd	DivrSpclDest		;zero		special
+	dd	DivrSpclSource		;special	single
+	dd	DivrSpclSource		;special	double
+	dd	DivrSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	ReturnIndefinite	;Two infinities
+
+
+EM_ENTRY eFIDIV16
+eFIDIV16:
+	push	offset DivSetResult
+	jmp	Load16Int		;Returns to DivSetResult
+
+EM_ENTRY eFIDIVR16
+eFIDIVR16:
+	push	offset DivrSetResult
+	jmp	Load16Int
+
+EM_ENTRY eFIDIV32
+eFIDIV32:
+	push	offset DivSetResult
+	jmp	Load32Int
+
+EM_ENTRY eFIDIVR32
+eFIDIVR32:
+	push	offset DivrSetResult
+	jmp	Load32Int
+
+EM_ENTRY eFDIV32
+eFDIV32:
+	push	offset DivSetResult
+	jmp	Load32Real			;Returns to DivSetResult
+
+EM_ENTRY eFDIVR32
+eFDIVR32:
+	push	offset DivrSetResult		;Returns to DivrSetResult
+	jmp	Load32Real
+
+EM_ENTRY eFDIV64
+eFDIV64:
+	push	offset DivSetResult
+	jmp	Load64Real			;Returns to DivSetResult
+
+EM_ENTRY eFDIVR64
+eFDIVR64:
+	push	offset DivrSetResult
+	jmp	Load64Real			;Returns to DivrSetResult
+
+
+EM_ENTRY eFDIVRPreg
+eFDIVRPreg:
+	push	offset PopWhenDone
+
+EM_ENTRY eFDIVRreg
+eFDIVRreg:
+	xchg	esi,edi
+
+EM_ENTRY eFDIVRtop
+eFDIVRtop:
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+DivrSetResult:
+;cl has tag of dividend
+	mov     ebp,offset tFdivrDisp
+	mov	EMSEG:[Result],edi		;Save result pointer
+	mov	ah,cl
+	mov     al,EMSEG:[edi].bTag
+	and	ah,not 1		;Ignore single vs. double on dividend
+	cmp	ax,1
+.erre	bTAG_VALID	eq	1
+.erre	bTAG_SNGL	eq	0
+	jz	DivrDouble		;Divisor was double
+	ja	TwoOpResultSet
+;.erre	DivrSingle eq $			;Fall into DivrSingle
+
+;*********
+DivrSingle:
+;*********
+;Computes op1/op2
+;Op1 is double, op2 is single (low 32 bits are zero)
+	mov	edx,ebx
+	mov	eax,esi			;Mantissa in edx:eax
+	mov	ebx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManHi
+	jmp	DivSingleReg
+
+
+SDivBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+	or	EMSEG:[CURerr],Underflow
+	test	EMSEG:[CWmask],Underflow	;Is exception masked?
+	jnz	UnderflowZero		;Yes, return zero (in emfmul.asm)
+	add	ecx,Underbias shl 16	;Fix up exponent
+	jmp	ContSdiv		;Continue with multiply
+
+
+EM_ENTRY eFDIVPreg
+eFDIVPreg:
+	push	offset PopWhenDone
+
+EM_ENTRY eFDIVreg
+eFDIVreg:
+	xchg	esi,edi
+
+EM_ENTRY eFDIVtop
+eFDIVtop:
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+DivSetResult:
+;cl has tag of divisor
+	mov     ebp,offset tFdivDisp
+	mov	EMSEG:[Result],edi		;Save result pointer
+	mov	al,cl
+	mov     ah,EMSEG:[edi].bTag
+	and	ah,not 1		;Ignore single vs. double on dividend
+	cmp	ax,1
+.erre	bTAG_VALID	eq	1
+.erre	bTAG_SNGL	eq	0
+	jz	DivDouble		;Divisor was double
+	ja	TwoOpResultSet
+;.erre	DivSingle eq $			;Fall into DivSingle
+
+;*********
+DivSingle:
+;*********
+;Computes op2/op1
+;Op2 is double, op1 is single (low 32 bits are zero)
+	xchg	edi,ebx			;Mantissa in edi, op2 ptr to ebx
+	xchg	ebx,ecx			;ExpSgn to ebx, op2 ptr to ecx
+	mov	edx,EMSEG:[ecx].lManHi
+	mov	eax,EMSEG:[ecx].lManLo
+	mov	ecx,EMSEG:[ecx].ExpSgn	;Op2 loaded
+
+DivSingleReg:
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in edi, exponent in high ebx, sign in bh bit 7
+
+	xor	ch,bh			;Compute result sign
+	xor	bx,bx			;Clear out sign and tag
+	sub	ecx,1 shl 16		;Exponent adjustment needed
+	sub	ecx,ebx			;Compute result exponent
+.erre	TexpBias eq 0			;Exponents not biased
+	jo	SDivBigUnderflow	;Dividing denormal by large number
+ContSdiv:
+
+;If dividend >= divisor, the DIV instruction will overflow.  Check for
+;this condition and shift the dividend right one bit if necessary.
+;
+;In previous versions of this algorithm for 24-bit and 53-bit mantissas,
+;this shift was always performed without a test.  This meant that a 1-bit
+;normalization might be required at the end.  This worked fine because
+;32 or 64 bits were calculated, so extra precision was available for
+;normalization.  However, this version needs all 64 bits that are calculated, 
+;so we can't afford a normalization shift at the end.  This test tells us
+;up front how to align so we'll be normalized.
+	xor	ebx,ebx			;Extend dividend
+	cmp	edi,edx			;Will DIV overflow?
+	ja	DoSdiv			;No, we're safe
+	shrd	ebx,eax,1
+	shrd	eax,edx,1
+	shr	edx,1
+	add	ecx,1 shl 16		;Bump exponent to account for shift
+DoSdiv:
+	div	edi
+	xchg	ebx,eax			;Save quotient in ebx, extend remainder
+	div	edi
+	mov	esi,eax
+;We have a 64-bit quotient in ebx:esi.  Now compare remainder*2 with divisor
+;to compute round and sticky bits.
+	mov	eax,-1			;Set round and sticky bits
+	shl	edx,1			;Double remainder
+	jc	RoundJmp		;If too big, round & sticky set
+	cmp	edx,edi			;Is remainder*2 > divisor?
+	ja	RoundJmp
+
+;Observe, oh wondering one, how you can assume the result of this last
+;compare is not equality.  Use the following notation: n=numerator,
+;d=denominator,q=quotient,r=remainder,b=base(2^64 here).  If
+;initially we had n < d then there was no shift and we will find q and r
+;so that q*d+r=n*b, if initially we had n >= d then there was a shift and
+;we will find q and r so that q*d+r=n*b/2.  If we have equality here
+;then r=d/2  ==>  n={possibly 2*}(2*q+1)*d/(2*b), since this can only
+;be integral if d is a multiple of b, but by definition b/2 <= d < b, we
+;have a contradiction.	Equality is thus impossible at this point.
+
+	cmp	edx,1			;Check for zero remainder
+	sbb	eax,-2			;eax==0 if CY, ==1 if NC (was -1)
+RoundJmp:
+	jmp	EMSEG:[RoundMode]
+
+;*******************************************************************************
+
+DDivBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+	or	EMSEG:[CURerr],Underflow
+	test	EMSEG:[CWmask],Underflow	;Is exception masked?
+	jnz	UnderflowZero		;Yes, return zero (in emfmul.asm)
+	add	ecx,Underbias shl 16	;Fix up exponent
+	jmp	ContDdiv		;Continue with multiply
+
+DivrDoubleSetFlag:
+;Special entry point used by FPATAN to set bit 6 of flag dword pushed
+;on stack before call.
+	or	byte ptr [esp+4],40H
+;*********
+DivrDouble:
+;*********
+;Computes op1/op2
+	mov	edx,ebx
+	mov	eax,esi			;Mantissa in edx:eax
+	mov	ebx,EMSEG:[edi].ExpSgn
+	mov	esi,EMSEG:[edi].lManHi
+	mov	edi,EMSEG:[edi].lManLo
+	jmp	short DivDoubleReg
+
+HighHalfEqual:
+;edx:eax:ebp = dividend
+;esi:edi = divisor
+;ecx = exponent and sign of result
+;
+;High half of dividend is equal to high half of divisor.  This will cause
+;the DIV instruction to overflow.  If whole dividend >= whole divisor, then
+;we just shift the dividend right 1 bit.
+	cmp	eax,edi			;Is dividend >= divisor?
+	jae	ShiftDividend		;Yes, divide it by two
+;DIV instruction would overflow, so skip it and calculate the effective
+;result.  Assume a quotient of 2^32-1 and calculate the remainder.  See
+;detailed comments under MaxQuo below--this is a copy of that code.
+	push	ecx			;Save exp. and sign
+	mov	ebx,-1			;Max quotient digit
+	sub	eax,edi			;Calculate correct remainder
+;Currently edx == esi, but the next instruction ensures that is no longer
+;true, since eax != 0.  This will allow us to skip the MaxQuo check at
+;DivFirstDigit.
+	add	edx,eax			;Should set CY if quotient fit
+	mov	eax,edi			;ecx:eax has new remainder
+	jc	ComputeSecond		;Remainder was positive
+;Quotient doesn't fit.  Note that we can no longer ensure that edx != esi
+;after making a correction.
+	mov	ecx,edx			;Need remainder in ecx:eax
+	jmp	DivCorrect1
+
+;*********
+DivDouble:
+;*********
+;Computes op2/op1
+	mov	eax,edi			;Move op2 pointer
+	mov	edi,esi
+	mov	esi,ebx			;Mantissa in esi:edi
+	mov	ebx,ecx			;ExpSgn to ebx
+	mov	ecx,EMSEG:[eax].ExpSgn	;Op2 loaded
+	mov	edx,EMSEG:[eax].lManHi
+	mov	eax,EMSEG:[eax].lManLo
+
+DivDoubleReg:
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+
+	xor	ch,bh			;Compute result sign
+	xor	bx,bx			;Clear out sign and tag
+	sub	ecx,1 shl 16		;Exponent adjustment needed
+	sub	ecx,ebx			;Compute result exponent
+.erre	TexpBias eq 0			;Exponents not biased
+	jo	DDivBigUnderflow	;Dividing denormal by large number
+ContDdiv:
+
+;If dividend >= divisor, we must shift the dividend right one bit.
+;This will ensure the result is normalized.
+;
+;In previous versions of this algorithm for 24-bit and 53-bit mantissas,
+;this shift was always performed without a test.  This meant that a 1-bit
+;normalization might be required at the end.  This worked fine because
+;32 or 64 bits were calculated, so extra precision was available for
+;normalization.  However, this version needs all 64 bits that are calculated, 
+;so we can't afford a normalization shift at the end.  This test tells us
+;up front how to align so we'll be normalized.
+	xor	ebp,ebp			;Extend dividend
+	cmp	esi,edx			;Dividend > divisor
+	ja	DoDdiv
+	jz	HighHalfEqual		;Go compare low halves
+ShiftDividend:
+	shrd	ebp,eax,1
+	shrd	eax,edx,1
+	shr	edx,1
+	add	ecx,1 shl 16		;Bump exponent to account for shift
+DoDdiv:
+	push	ecx			;Save exp. and sign
+
+;edx:eax:ebp = dividend
+;esi:edi = divisor
+;
+;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
+;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
+;divisor.  If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
+;this guess will be no more than 2 larger than the correct value of that
+;quotient digit (and never smaller).  Divisor meets magnitude condition 
+;because it's normalized.
+
+	div	esi			;Guess first quotient "digit"
+
+;Check out our guess.  
+;Currently, remainder in edx = dividend - (quotient * high half divisor).
+;The definition of remainder is dividend - (quotient * all divisor).  So
+;if we subtract (quotient * low half divisor) from edx, we'll get
+;the true remainder.  If it's negative, our guess was too big.
+
+	mov	ebx,eax			;Save quotient
+	mov	ecx,edx			;Save remainder
+	mul	edi			;Quotient * low half divisor
+	sub	ebp,eax			;Subtract from dividend extension
+	sbb	ecx,edx			;Subtract from remainder
+	mov	eax,ebp			;Low remainder to eax
+	jnc	DivFirstDigit		;Was quotient OK?
+DivCorrect1:
+	dec	ebx			;Quotient was too big
+	add	eax,edi			;Add divisor back into remainder
+	adc	ecx,esi
+	jnc	DivCorrect1		;Repeat if quotient is still too big
+DivFirstDigit:
+	cmp	ecx,esi			;Would DIV instruction overflow?
+	jae	short MaxQuo		;Yes, figure alternate quotient
+	mov	edx,ecx			;Remainder back to edx:eax
+
+;Compute 2nd quotient "digit"
+
+ComputeSecond:
+	div	esi			;Guess 2nd quotient "digit"
+	mov	ebp,eax			;Save quotient
+	mov	ecx,edx			;Save remainder
+	mul	edi			;Quotient * low half divisor
+	neg	eax			;Subtract from dividend extended with 0
+	sbb	ecx,edx			;Subtract from remainder
+	jnc	DivSecondDigit		;Was quotient OK?
+DivCorrect2:
+	dec	ebp			;Quotient was too big
+	add	eax,edi			;Add divisor back into remainder
+	adc	ecx,esi
+	jnc	DivCorrect2		;Repeat if quotient is still too big
+DivSecondDigit:
+;ebx:ebp = quotient
+;ecx:eax = remainder
+;esi:edi = divisor
+;Now compare remainder*2 with divisor to compute round and sticky bits.
+	mov	edx,-1			;Set round and sticky bits
+	shld	ecx,eax,1		;Double remainder
+	jc	DDivEnd			;If too big, round & sticky set
+	shl	eax,1
+	sub	edi,eax
+	sbb	esi,ecx			;Subtract remainder*2 from divisor
+	jb	DDivEnd			;If <0, use round & sticky bits set
+
+;Observe, oh wondering one, how you can assume the result of this last
+;compare is not equality.  Use the following notation: n=numerator,
+;d=denominator,q=quotient,r=remainder,b=base(2^64 here).  If
+;initially we had n < d then there was no shift and we will find q and r
+;so that q*d+r=n*b, if initially we had n >= d then there was a shift and
+;we will find q and r so that q*d+r=n*b/2.  If we have equality here
+;then r=d/2  ==>  n={possibly 2*}(2*q+1)*d/(2*b), since this can only
+;be integral if d is a multiple of b, but by definition b/2 <= d < b, we
+;have a contradiction.	Equality is thus impossible at this point.
+
+;No round bit, but set sticky bit if remainder != 0.
+	or	eax,ecx			;Is remainder zero?
+	add	eax,-1			;Set CY if non-zero
+	adc	edx,1			;edx==0 if NC, ==1 if CY (was -1)
+DDivEnd:
+	mov	esi,ebp			;Result in ebx:esi
+	mov	eax,edx			;Round/sticky bits to eax
+	pop	ecx			;Recover sign/exponent
+	jmp	EMSEG:[RoundMode]
+
+
+MaxQuo:
+;ebx = first quotient "digit"
+;ecx:eax = remainder
+;esi:edi = divisor
+;On exit, ebp = second quotient "digit"
+;
+;Come here if divide instruction would overflow. This must mean that ecx == esi,
+;i.e., the high halves of the dividend and divisor are equal. Assume a result
+;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
+; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
+;and divisor are equal, dividend - divisor * 2^32 can be computed by
+;subtracting only the low halves. When adding divisor (in esi) to this, note
+;that ecx == esi, and we want the result in ecx anyway.
+;
+;Note also that since the dividend is a previous remainder, the
+;dividend - divisor * 2^32 calculation must always be negative. Thus the 
+;addition of divisor back to it should generate a carry if it goes positive.
+
+	mov	ebp,-1			;Max quotient digit
+	sub	eax,edi			;Calculate correct remainder
+	add	ecx,eax			;Should set CY if quotient fit
+	mov	eax,edi			;ecx:eax has new remainder
+	jc	DivSecondDigit		;Remainder was positive
+	jmp	DivCorrect2
diff --git a/private/ntos/dll/i386/emfinit.asm b/private/ntos/dll/i386/emfinit.asm
new file mode 100644
index 000000000..baf0f0598
--- /dev/null
+++ b/private/ntos/dll/i386/emfinit.asm
@@ -0,0 +1,46 @@
+	subttl	emfinit.asm - Emulator initialization and FINIT instruction
+        page
+;*******************************************************************************
+;emfinit.asm - Emulator initialization and FINIT instruction
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+EM_ENTRY eEmulatorInit
+EmulatorInit:
+EM_ENTRY eFINIT
+eFINIT:
+	mov	esi,BEGstk
+	mov	EMSEG:[CURstk],INITstk
+	mov	ecx,Numlev
+	xor	eax,eax
+
+EmInitLoop:
+	mov	EMSEG:[esi].ExpSgn,bTAG_EMPTY		;Exponent and sign are zero
+	mov	EMSEG:[esi].lManHi,eax
+	mov	EMSEG:[esi].lManLo,eax
+
+	add	esi, Reg87Len
+	loop	EmInitLoop
+
+	mov	EMSEG:[StatusWord],ax			; clear status word
+	mov	[esp+4].OldStatus,ax			; clear saved status word.
+	mov	EMSEG:[PrevCodeOff],eax
+	mov	EMSEG:[PrevDataOff],eax
+	mov	EMSEG:[LongControlWord],InitControlWord
+	mov	eax,offset Round64near
+	mov	EMSEG:[RoundMode],eax			;Address of round routine
+	mov	EMSEG:[TransRound],eax			;Address of round routine
+	mov	EMSEG:[SavedRoundMode],eax
+	mov	EMSEG:[ZeroVector],offset SaveResult
+	mov	EMSEG:[Einstall], 1
+	ret
diff --git a/private/ntos/dll/i386/emfmisc.asm b/private/ntos/dll/i386/emfmisc.asm
new file mode 100644
index 000000000..6adc08dc3
--- /dev/null
+++ b/private/ntos/dll/i386/emfmisc.asm
@@ -0,0 +1,81 @@
+        subttl  emfmisc.asm - FABS, FCHS, FFREE, FXCH
+        page
+;*******************************************************************************
+;emfmisc.asm - FABS, FCHS, FFREE, FXCH
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       FABS, FCHS, FFREE, FXCH instructions
+;Inputs:
+;	edi = [CURstk]
+;	esi = pointer to st(i) from instruction field
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+EM_ENTRY eFABS
+eFABS:
+;******
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY
+	jz	StackError		;in emarith.asm
+	mov	EMSEG:[edi].bSgn,0		;Turn sign bit off
+	ret
+
+;******
+EM_ENTRY eFCHS
+eFCHS:
+;******
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY
+	jz	StackError		;in emarith.asm
+	not	EMSEG:[edi].bSgn		;Flip the sign
+	ret
+
+;******
+EM_ENTRY eFFREE
+eFFREE:
+;******
+	mov	EMSEG:[esi].bTag,bTAG_EMPTY
+	ret
+
+;******
+EM_ENTRY eFXCH
+eFXCH:
+;******
+	cmp	EMSEG:[esi].bTag,bTAG_EMPTY
+	jz	XchDestEmpty
+XchgChkSrc:
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY
+	jz	XchSrcEmpty
+DoSwap:
+;Swap [esi] with [edi]
+	mov	eax,EMSEG:[edi]
+	xchg	eax,EMSEG:[esi]
+	mov	EMSEG:[edi],eax
+	mov	eax,EMSEG:[edi+4]
+	xchg	eax,EMSEG:[esi+4]
+	mov	EMSEG:[edi+4],eax
+	mov	eax,EMSEG:[edi+8]
+	xchg	eax,EMSEG:[esi+8]
+	mov	EMSEG:[edi+8],eax
+	ret
+
+XchDestEmpty:
+	call	ReturnIndefinite	;in emarith.asm - ZF set if unmasked
+	jnz	XchgChkSrc		;Continue if masked
+	ret
+
+XchSrcEmpty:
+	xchg	edi,esi			;pass pointer in esi
+	call	ReturnIndefinite	;in emarith.asm - ZF set if unmasked
+	xchg	edi,esi
+	jnz	DoSwap			;Continue if masked
+	ret
diff --git a/private/ntos/dll/i386/emfmul.asm b/private/ntos/dll/i386/emfmul.asm
new file mode 100644
index 000000000..2a5fcca9f
--- /dev/null
+++ b/private/ntos/dll/i386/emfmul.asm
@@ -0,0 +1,238 @@
+	subttl  emfmul.asm - Multiplication
+	page
+;*******************************************************************************
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;emfmul.asm - long double multiply
+;	by Tim Paterson
+;
+;Purpose:
+;	Long double multiplication.
+;Inputs:
+;	ebx:esi = op1 mantissa
+;	ecx = op1 sign in bit 15, exponent in high half
+;	edi = pointer to op2 and result location
+;	[Result] = edi
+;
+;	Exponents are unbiased.  Denormals have been normalized using
+;	this expanded exponent range.  Neither operand is allowed to be zero.
+;Outputs:
+;	Jumps to [RoundMode] to round and store result.
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+;Dispatch table for multiply
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+
+.erre	TAG_SNGL	eq	0	;SINGLE: low 32 bits are zero
+.erre	TAG_VALID	eq	1
+.erre	TAG_ZERO	eq	2
+.erre	TAG_SPCL	eq	3	;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFmulDisp	label	dword		;Source (reg)	Dest (*[di])
+	dd	MulSingle		;single		single
+	dd	MulDouble		;single		double
+	dd	XorDestSign		;single		zero
+	dd	MulSpclDest		;single		special
+	dd	MulDouble		;double		single
+	dd	MulDouble		;double		double
+	dd	XorDestSign		;double		zero
+	dd	MulSpclDest		;double		special
+	dd	XorSourceSign		;zero		single
+	dd	XorSourceSign		;zero		double
+	dd	XorDestSign		;zero		zero
+	dd	MulSpclDest		;zero		special
+	dd	MulSpclSource		;special	single
+	dd	MulSpclSource		;special	double
+	dd	MulSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	XorDestSign		;Two infinities
+
+
+EM_ENTRY eFIMUL16
+eFIMUL16:
+	push	offset MulSetResult
+	jmp	Load16Int			;Returns to MulSetResult
+
+EM_ENTRY eFIMUL32
+eFIMUL32:
+	push	offset MulSetResult
+	jmp	Load32Int			;Returns to MulSetResult
+
+EM_ENTRY eFMUL32
+eFMUL32:
+	push	offset MulSetResult
+	jmp	Load32Real			;Returns to MulSetResult
+
+EM_ENTRY eFMUL64
+eFMUL64:
+	push	offset MulSetResult
+	jmp	Load64Real			;Returns to MulSetResult
+
+EM_ENTRY eFMULPreg
+eFMULPreg:
+	push	offset PopWhenDone
+
+EM_ENTRY eFMULreg
+eFMULreg:
+	xchg	esi,edi
+
+EM_ENTRY eFMULtop
+eFMULtop:
+	mov	ecx,EMSEG:[esi].ExpSgn
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+MulSetResult:
+	mov     ebp,offset tFmulDisp
+	mov	EMSEG:[Result],edi		;Save result pointer
+	mov	al,cl
+	or	al,EMSEG:[edi].bTag
+	cmp	al,bTAG_VALID
+.erre	bTAG_VALID	eq	1
+.erre	bTAG_SNGL	eq	0
+	jz	MulDouble
+	ja	TwoOpResultSet
+;.erre	MulSingle eq $			;Fall into MulSingle
+
+
+;*********
+MulSingle:
+;*********
+
+	mov	edx,EMSEG:[edi].ExpSgn
+	mov	eax,EMSEG:[edi].lManHi
+
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 high mantissa in eax, exponent in high edx, sign in dh bit 7
+
+	xor	ch,dh			;Compute result sign
+	xor	dx,dx			;Clear out sign and tag
+	add	ecx,edx			;Result exponent
+.erre	TexpBias eq 0			;Exponents not biased
+	jo	SMulBigUnderflow	;Multiplying two denormals
+ContSmul:
+
+;Value in ecx is correct exponent if result is not normalized.
+;If result comes out normalized, 1 will be added.
+
+	mul	ebx			;Compute product
+	mov	ebx,edx
+	mov	esi,eax
+	xor	eax,eax			;Extend with zero
+
+;Result in ebx:esi:eax
+;ecx = exponent minus one in high half, sign in ch
+	or	ebx,ebx			;Check for normalization
+	jns	ShiftOneBit		;In emfadd.asm
+	add	ecx,1 shl 16		;Adjust exponent
+	jmp	EMSEG:[RoundMode]
+
+SMulBigUnderflow:
+	or	EMSEG:[CURerr],Underflow
+	add	ecx,Underbias shl 16	;Fix up exponent
+	test	EMSEG:[CWmask],Underflow	;Is exception masked?
+	jz	ContSmul		;No, continue with multiply
+UnderflowZero:
+	or	EMSEG:[CURerr],Precision
+SignedZero:
+	and	ecx,bSign shl 8		;Preserve sign bit
+	xor	ebx,ebx
+	mov	esi,ebx
+	mov	cl,bTAG_ZERO
+	jmp	EMSEG:[ZeroVector]
+
+;*******************************************************************************
+
+DMulBigUnderflow:
+;Overflow flag set could only occur with denormals (true exp < -32768)
+	or	EMSEG:[CURerr],Underflow
+	test	EMSEG:[CWmask],Underflow	;Is exception masked?
+	jnz	UnderflowZero		;Yes, return zero
+	add	ecx,Underbias shl 16	;Fix up exponent
+	jmp	ContDmul		;Continue with multiply
+
+PolyMulToZero:
+	ret				;Return the zero in registers
+
+PolyMulDouble:
+;This entry point is used by polynomial evaluator.
+;It checks the operand in registers for zero.
+	cmp	cl,bTAG_ZERO		;Adding to zero?
+	jz	PolyMulToZero
+
+;*********
+MulDouble:
+;*********
+
+	mov	eax,EMSEG:[edi].ExpSgn
+	mov	edx,EMSEG:[edi].lManHi
+	mov	edi,EMSEG:[edi].lManLo
+
+MulDoubleReg:				;Entry point used by transcendentals
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+
+	xor	ch,ah			;Compute result sign
+	xor	ax,ax			;Clear out sign and tag
+	add	ecx,eax			;Result exponent
+.erre	TexpBias eq 0			;Exponents not biased
+	jo	DMulBigUnderflow	;Multiplying two denormals
+ContDmul:
+
+;Value in ecx is correct exponent if result is not normalized.
+;If result comes out normalized, 1 will be added.
+
+	mov	ebp,edx			;edx is used by MUL instruction
+
+;Generate and sum partial products, from least to most significant
+
+	mov	eax,edi
+	mul	esi			;Lowest partial product
+	add	eax,-1			;CY set IFF eax<>0
+	sbb	cl,cl			;Sticky bit: 0 if zero, -1 if nz
+	xchg	edi,edx			;Save high result
+
+;First product: cl reflects low dword non-zero (sticky bit), edi has high dword
+
+	mov	eax,ebx
+	mul	edx
+	add	edi,eax
+	adc	edx,0			;Sum first results
+	xchg	edx,esi			;High result to esi
+
+;Second product: accumulated in esi:edi:cl
+
+	mov	eax,ebp			;Next mult. to eax
+	mul	edx
+	add	edi,eax			;Sum low results
+	adc	esi,edx			;Sum high results
+	mov	eax,ebx
+	mov	ebx,0			;Preserve CY flag
+	adc	ebx,ebx			;Keep carry out of high sum
+
+;Third product: accumulated in ebx:esi:edi:cl
+
+	mul	ebp
+	add	esi,eax
+	adc	ebx,edx
+	mov	eax,edi
+	or	al,cl			;Collapse sticky bits into eax
+
+;Result in ebx:esi:eax
+;ecx = exponent minus one in high half, sign in ch
+MulDivNorm:
+	or	ebx,ebx			;Check for normalization
+	jns	ShiftOneBit		;In emfadd.asm
+	add	ecx,1 shl 16		;Adjust exponent
+	jmp	EMSEG:[RoundMode]
diff --git a/private/ntos/dll/i386/emfprem.asm b/private/ntos/dll/i386/emfprem.asm
new file mode 100644
index 000000000..3cb8670bb
--- /dev/null
+++ b/private/ntos/dll/i386/emfprem.asm
@@ -0,0 +1,407 @@
+	subttl emfprem.asm - FPREM and FPREM1 instructions
+	page
+;*******************************************************************************
+;emfprem.asm - FPREM and FPREM1 instructions
+;	by Tim Paterson
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;Inputs:
+;	edi = [CURstk]
+;	ST(1) loaded into ebx:esi & ecx
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+;Dispatch table for remainder
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+
+.erre   TAG_SNGL        eq      0       ;SINGLE: low 32 bits are zero
+.erre   TAG_VALID       eq      1
+.erre   TAG_ZERO        eq      2
+.erre   TAG_SPCL        eq      3       ;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+					;Divisor	Dividend
+tFpremDisp	label	dword		;Source(ST(1))	Dest (ST(0))
+	dd	PremDouble		;single		single
+	dd	PremDouble		;single		double
+	dd	PremX			;single		zero
+	dd	PremSpclDest		;single		special
+	dd	PremDouble		;double		single
+	dd	PremDouble		;double		double
+	dd	PremX			;double		zero
+	dd	PremSpclDest		;double		special
+	dd	ReturnIndefinite	;zero		single
+	dd	ReturnIndefinite	;zero		double
+	dd	ReturnIndefinite	;zero		zero
+	dd	PremSpclDest		;zero		special
+	dd	PremSpclSource		;special	single
+	dd	PremSpclSource		;special	double
+	dd	PremSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	ReturnIndefinite	;Two infinites
+
+
+PremSpclDone:
+	add	sp,4			;Clean off return address for normal
+	ret
+
+;***
+PremSpclDest:
+	mov	al,EMSEG:[edi].bTag		;Pick up tag
+	cmp	al,bTAG_INF		;Dividing infinity?
+	jz	ReturnIndefinite	;Invalid operation if so
+	jmp	SpclDest		;In emarith.asm
+
+;***
+PremSpclSource:
+	cmp	cl,bTAG_INF		;Dividing by infinity?
+	jnz	SpclSource		;in emarith.asm
+PremX:
+;Return Dest unchanged, quotient = 0
+	mov     EMSEG:[SWcc],0
+	ret
+;*******************************************************************************
+
+;Map quotient bits to condition codes
+
+Q0	equ	C1
+Q1	equ	C3
+Q2	equ	C0
+
+MapQuo	label	byte
+	db	0
+	db	Q0
+	db	Q1
+	db	Q1+Q0
+	db	Q2
+	db	Q2+Q0
+	db	Q2+Q1
+	db	Q2+Q1+Q0
+
+Prem1Cont:
+
+;edx:eax = remainder, normalized
+;ebx:esi = divisor
+;ebp = quotient
+;edi = exponent difference, zero or less
+;ecx = 0 (positive sign)
+;
+;At this point, 0 <= remainder < divisor.  However, for FPREM1 we need
+; -divisor/2 <= remainder <= divisor/2.  If remainder = divisor/2, whether
+;we choose + or - is dependent on whichever gives us an even quotient
+;(the usual IEEE rounding rule).  Quotient must be incremented if we
+;use negative remainder.
+
+	cmp	edi,-1
+	jl	PremCont		;Remainder < divisor/2
+	jg	NegRemainExp0		;Remainder > divisor/2
+;Exponent is -1
+	cmp	edx,ebx
+	jl	PremCont		;Remainder < divisor/2
+	jg	NegRemain		;Remainder > divisor/2
+	cmp	eax,esi
+	jl	PremCont		;Remainder < divisor/2
+	jg	NegRemain		;Remainder > divisor/2
+;Remainder = divisor/2.  Ensure quotient is even
+	test	ebp,1			;Even?
+	jz	PremCont
+NegRemain:
+;Theoretically we subtract divisor from remainder once more, leaving us
+;with a negative remainder.  But since we use sign/magnitude representation,
+;we want the abs() of that with sign bit set--so subtract remainder from
+;(larger) divisor.  Note that exponent difference is -1, so we must align
+;binary points first.
+	add	esi,esi
+	adc	ebx,ebx			;Double divisor to align binary points
+NegRemainExp0:
+	sub	esi,eax
+	sbb	ebx,edx			;Subtract remainder
+	mov	eax,esi
+	mov	edx,ebx			;Result in edx:eax
+	mov	ch,bSign		;Flip sign of remainder
+	inc	ebp			;Increase quotient
+;Must normalize result of subtraction
+	bsr	ecx,edx			;Look for 1 bit
+	jnz	@F
+	sub	edi,32
+	xchg	edx,eax			;Shift left 32 bits
+	bsr	ecx,edx
+@@:
+	lea     edi,[edi+ecx-31]        ;Fix up exponent for normalization
+	not     cl
+	shld	edx,eax,cl
+	shl	eax,cl
+	mov	ch,bSign		;Flip sign of remainder
+
+PremCont:
+;edx:eax = remainder, normalized
+;ebp = quotient
+;edi = exponent difference, zero or less
+;ch = sign
+	or	eax,eax			;Low bits zero?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz   cl                      ;if low half==0 then cl=0 else cl=1
+	mov	esi,EMSEG:[CURstk]
+	mov     ebx,esi
+	NextStackElem   ebx,Prem
+	add	di,EMSEG:[ebx].wExp		;Compute result exponent
+	cmp	di,IexpMin-IexpBias
+	jle	PremUnderflow
+SavePremResult:
+	mov	EMSEG:[esi].lManLo,eax
+	xor	EMSEG:[esi].bSgn,ch
+	mov	EMSEG:[esi].lManHi,edx
+	and	ebp,7			;Keep last 3 bits of quotient only
+					;  and give write buffers a break
+	mov	EMSEG:[esi].wExp,di
+	mov	EMSEG:[esi].bTag,cl
+	mov	al,MapQuo[ebp]		;Get cond. codes for this quotient
+	mov	EMSEG:[SWcc],al
+	ret
+
+	NextStackWrap   ebx,Prem        ;Tied to NextStackElem above
+
+PremUnderflow:
+	test	EMSEG:[CWmask],Underflow	;Is exception unmasked?
+	jz	UnmaskedPremUnder
+	mov	cl,bTAG_DEN
+	jmp	SavePremResult
+
+UnmaskedPremUnder:
+	add	edi,UnderBias		;Additional exp. bias for unmasked resp.
+	or	EMSEG:[CURerr],Underflow
+	jmp	SavePremResult
+
+;*******************************************************************************
+
+PremDouble:
+;edi = [CURstk]
+;ebx:esi = ST(1) mantissa, ecx = ExpSgn
+
+	add	sp,4			;Clean off return address for special
+	mov	eax,EMSEG:[edi].lManLo
+	mov	edx,EMSEG:[edi].lManHi
+	movsx	edi,EMSEG:[edi].wExp
+	xor	ebp,ebp			;Quotient, in case we skip stage 1
+	sar	ecx,16			;Bring exponent down
+	sub	edi,ecx			;Get exponent difference
+	jl	ExitPremLoop		;If dividend is smaller, return it.
+
+;FPREM is performed in two stages.  The first stage is used only if the
+;exponent difference is greater than 31.  It reduces the exponent difference
+;by 32, and repeats until the difference is less than 32.  Note that
+;unlike the hardware FPREM instruction, we are not limited to reducing
+;the exponent by only 63--we just keep looping until it's done.
+;
+;The second stage performs ordinary 1-bit-at-a-time long division.
+;It stops when the exponent difference is zero, meaning we have an
+;integer quotient and the final remainder.
+;
+;edx:eax = dividend
+;ebx:esi = divisor
+;edi = exponent difference
+;ebp = 0 (initial quotient)
+
+	cmp	edi,32			;Do we need to do stage 1?
+	jl	FitDivisor		;No, start stage 2
+
+;FPREM stage 1
+;
+;Exponent difference is at least 32.  Use 32-bit division to compute
+;quotient and exact remainder, reducing exponent difference by 32.
+
+;DIV instruction will overflow if dividend >= divisor.  In this case,
+;subtract divisor from dividend to ensure no overflow.  This will change
+;the quotient, but that doesn't matter because we only need the last
+;3 bits of the quotient (and we're about to calculate 32 quotient bits).
+;This subtraction will not affect the remainder.
+
+	sub	eax,esi
+	sbb	edx,ebx	
+	jnc	FpremReduce32		;Was dividend big?
+	add	eax,esi			;Restore dividend, it was smaller
+	adc	edx,ebx
+
+;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
+;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
+;divisor.  If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
+;this guess will be no more than 2 larger than the correct value of that
+;quotient digit (and never smaller).  Divisor meets magnitude condition 
+;because it's normalized.
+;
+;This loop typically takes 117 clocks.
+
+;edx:eax = dividend
+;ebx:esi = divisor
+;edi = exponent difference
+;ebp = quotient (zero)
+
+FpremReduce32:
+;We know that dividend < divisor, but it is still possible that 
+;high dividend == high divisor, which will cause the DIV instruction
+;to overflow.
+	cmp	edx,ebx			;Will DIV instruction overflow?
+	jae	PremOvfl
+	div	ebx			;Guess a quotient "digit"
+
+;Currently, remainder in edx = dividend - (quotient * high half divisor).
+;The definition of remainder is dividend - (quotient * all divisor).  So
+;if we subtract (quotient * low half divisor) from edx, we'll get
+;the true remainder.  If it's negative, our guess was too big.
+
+	mov	ebp,eax			;Save quotient
+	mov	ecx,edx			;Save remainder
+	mul	esi			;Quotient * low half divisor
+	neg	eax			;Subtract from dividend extended with 0
+	sbb	ecx,edx			;Subtract from remainder
+	mov	edx,ecx			;Remainder back to edx:eax
+	jnc	HavPremQuo		;Was quotient OK?
+FpremCorrect:
+	dec	ebp			;Quotient was too big
+	add	eax,esi			;Add divisor back into remainder
+	adc	edx,ebx
+	jnc	FpremCorrect		;Repeat if quotient is still too big
+HavPremQuo:
+	sub	edi,32			;Exponent reduced
+	cmp	edi,32			;Exponent difference within 31?
+	jl	PremNormalize		;Do it a bit a time
+	or	edx,edx			;Check for zero remainder
+	jnz	FpremReduce32
+	or	eax,eax			;Remainder 0?
+	jz	ExactPrem
+	xchg	edx,eax			;Shift left 32 bits
+	sub	edi,32			;Another 32 bits reduced
+	cmp	edi,32
+	jge	FpremReduce32
+	xor	ebp,ebp			;No quotient bits are valid
+	jmp	PremNormalize
+
+PremOvfl:
+;edx:eax = dividend
+;ebx:esi = divisor
+;On exit, ebp = second quotient "digit"
+;
+;Come here if divide instruction would overflow. This must mean that edx == ebx,
+;i.e., the high halves of the dividend and divisor are equal. Assume a result
+;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
+; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
+;and divisor are equal, dividend - divisor * 2^32 can be computed by
+;subtracting only the low halves. When adding divisor (in ebx) to this, note
+;that edx == ebx, and we want the result in edx anyway.
+;
+;Note also that since dividend < divisor, the
+;dividend - divisor * 2^32 calculation must always be negative. Thus the 
+;addition of divisor back to it should generate a carry if it goes positive.
+
+	mov	ebp,-1			;Max quotient digit
+	sub	eax,esi			;Calculate correct remainder
+	add	edx,eax			;Should set CY if quotient fit
+	mov	eax,esi			;edx:eax has new remainder
+	jc	HavPremQuo		;Remainder was positive
+	jmp	FpremCorrect
+
+ExactPrem:
+;eax = 0
+	mov	esi,EMSEG:[CURstk]
+	mov	EMSEG:[esi].lManLo,eax
+	mov	EMSEG:[esi].lManHi,eax
+	add	sp,4			;Clean off first return address
+	mov	EMSEG:[esi].wExp,ax
+	mov	EMSEG:[esi].bTag,bTAG_ZERO
+	ret
+
+
+;FPREM stage 2
+;
+;Exponent difference is less than 32.  Use restoring long division to
+;compute quotient bits until exponent difference is zero.  Note that we
+;often get more than one bit/loop:  BSR is used to scan off leading
+;zeros each time around.  Since the divisor is normalized, we can
+;instantly compute a zero quotient bit for each leading zero bit.
+;
+;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
+;plus 3 clocks/bit (BSR time).  If we had to use this for 32-bit reductions
+;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
+;instead of the 112 required by stage 1!
+
+FpremLoop:
+;edx:eax = dividend (remainder) minus divisor
+;ebx:esi = divisor
+;ebp = quotient
+;edi = exponent difference, less than 32
+;
+;If R is current remainder and d is divisor, then we have edx:eax = R - d, 
+;which is negative.  We want 2*R - d, which is positive.  
+;2*R - d = 2*(R - d) + d.
+	add	eax,eax			;2*(R - d)
+	adc	edx,edx
+	add	eax,esi			;2*(R-d) + d = 2*R - d
+	adc	edx,ebx	
+	add	ebp,ebp			;Double quotient too
+	dec	edi			;Decrement exponent difference
+DivisorFit:
+	inc	ebp			;Count one in quotient
+PremNormalize:
+	bsr	ecx,edx			;Find first 1 bit
+	jz	PremHighZero
+	not     cl
+	and     cl,1FH                  ;Convert bit no. to shift count
+	shld	edx,eax,cl		;Normalize
+	shl	eax,cl
+	sub	edi,ecx			;Reduce exponent difference
+	jl	PremTooFar
+	shl	ebp,cl			;Shift quotient
+FitDivisor:
+;Dividend could be larger or smaller than divisor
+	sub	eax,esi
+	sbb	edx,ebx
+	jnc	DivisorFit
+;Couldn't subtract divisor from dividend.
+	or	edi,edi			;Is exponent difference zero or less?
+	jg	FpremLoop
+	add	eax,esi			;Restore dividend
+	adc	edx,ebx
+	xor	ecx,ecx			;Sign is positive
+	ret
+
+PremTooFar:
+;Exponent difference in edi went negative when reduced by shift count in ecx.
+;We need a quotient corresponding to exponent difference of zero.
+	add	ecx,edi			;Restore exponent difference
+	shl	ebp,cl			;Fix up quotient
+ExitPremLoop:
+;edx:eax = remainder, normalized
+;ebp = quotient
+;edi = exponent difference, zero or less
+	xor	ecx,ecx			;Sign is positive
+	ret
+
+PremHighZero:
+;High half of remainder is all zero, so we've reduced exponent difference
+;by 32 bits and overshot.  We need a quotient corresponding to exponent 
+;difference of zero, so we just shift it by the original difference.  Then
+;we need to normalize the low half remainder.
+	mov	ecx,edi
+	shl	ebp,cl			;Fix up quotient
+	bsr	ecx,eax
+	jz	ExactPrem
+	lea     edi,[edi+ecx-63]        ;Fix up exponent for normalization
+	xchg	eax,edx			;Shift by 32 bits
+	not     cl
+        shl     edx,cl                  ;Normalize remainder
+        xor     ecx,ecx                 ;Sign is positive
+        ret
diff --git a/private/ntos/dll/i386/emfsqrt.asm b/private/ntos/dll/i386/emfsqrt.asm
new file mode 100644
index 000000000..cc36f7b0a
--- /dev/null
+++ b/private/ntos/dll/i386/emfsqrt.asm
@@ -0,0 +1,267 @@
+	subttl	emfsqrt.asm - FSQRT instruction
+	page
+;*******************************************************************************
+;emfsqrt.asm - FSQRT instruction
+;	by Tim Paterson
+;
+;	 Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;Inputs:
+;	edi = [CURstk]
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;A linear approximation of the square root function is used to get the
+;intial value for Newton-Raphson iteration.  This approximation gives
+;nearly 5-bit accuracy over the required input interval, [1,4).  The
+;equation for the linear approximation of y = sqrt(x) is y = mx + b,
+;where m is the slope (named SQRT_COEF) and b is the y-intercept (named
+;SQRT_INTERCEPT).
+;
+;(The values for m and b were computed with Excel Solver in two passes: 
+;the first pass computed them full precision, minimizing absolute error;
+;the second computed only b after m was rounded to an 8-bit value.)
+;
+;The resulting values have the following maximum error:
+;
+;inp. value -->		1		 2.18972	3.82505
+;----------------------------------------------------------------
+;abs. err., full prec.	0.04544		-0.03233	0.04423
+;abs. err., truncated	0.04544		-0.04546	0.04423
+;
+;The three input values shown represent the left end point, the maximum 
+;error (derivative of absolute error == 0), and the right end point.  
+;The right end point is not 4 because the approximation reaches 2.000
+;at the value given--we abandon the linear approximation at that point
+;and use that same value for all greater input values.	This linear
+;approximation is computed with 8-bit operations, so truncations can
+;add a negative error.  This increases maximum error only when it is
+;already negative, as shown in the table.
+;
+;Each iteration of Newton-Raphson approximation more than doubles the
+;number of bits of accuracy.  Suppose the current guess is A, and it has
+;an absolute error of e (i.e., A+e or A-e is the root).  Then the absolute
+;error after the next iteration is e^2/2A.  This error is always positive.
+;However, the divide instruction truncates, which introduces an error
+;that is always negative.  Sometimes a constant or rounding bit is added
+;to balance the positive and negative errors.  The maximum possible error 
+;is given in comments below for each iteration.  (Note that when we compute 
+;the error from e^2/2A, A could be in the range 1 to 2--we use 1 to get
+;max error.)  Remember that the binary point is to the RIGHT of the MSB
+;when looking at these error numbers.
+
+
+;SQRT_INTERCEPT is used when the binary point is to the right of the MSB.
+;Multiplying it by 64K would put the binary point to the left of the MSB,
+;so it must be divided by two to be aligned.
+SQRT_INTERCEPT	equ	23185		; 0.70755 * 65536 / 2
+
+;SQRT_COEF would have the binary point to the left of the MSB if multiplied
+;by 256.  However, this would leave it with a leading zero, so we multiply
+;it by two more to normalize it.
+SQRT_COEF	equ	173		; 0.33789 * 256 * 2
+
+SqrtSpcl:
+	cmp	al,bTAG_DEN
+	jz	SqrtDen
+	cmp	al,bTAG_INF
+	jnz	SpclDestNotDen
+;Have infinity
+	or	ah,ah			;Is it negative?
+	js	ReturnIndefinite
+SqrtRet:
+	ret
+
+
+MaxStartRoot:
+;The first iteration is calculated as  (ax / bh) * 100H + bx.  The first 
+;trial root in bx should be 10000H (which is too big).  But it's very
+;easy to calculate (ax / 100H) * 100H + 10000H = ax.
+	mov	bx,ax
+	cmp	ax,-1			;Would subsequent DIV overflow?
+	jb	FirstTrialRoot
+;The reduced argument is so close to 4.0 that the 16-bit DIV instruction
+;used in the next iteration would overflow.  If the argument is 4-A 
+;then a guess of 2.0 is in error by approximately A/4.  [This is not
+;an upper bound.  The error is a little by more than this by an
+;addition with the magnitude of A^2.  This is an insignificant amount
+;when A is small.]  This means that the first guess of 2.0 is quite
+;accurate, and we'll use it to bypass some of the iteration steps. 
+;This will eliminate the DIV overflow by skipping the DIV.
+;
+;One iteration is performed by: (Arg/Guess + Guess)/2.  When Guess = 2,
+;this becomes (Arg/2 + 2)/2 = Arg/4 + 1.  We get Arg/2 just by assuming
+;the binary point is one bit further left; then a single right shift is
+;needed to get Arg/4.  By shifting in a 1 bit on the left, we account for
+;adding 1 at the same time.  [Note that if Arg = 4 - A, then Arg/4 + 1
+; = (4 - A)/4 + 1 = 1 - A/4 + 1 = 2 - A/4.  In other words, we just
+;subtract out exactly what we estimate our error to be, A/4.]
+;
+;Since the upper 16 bits are 0FFFFH, A <= 2^-14, so error <= 2^-16 =
+; +0.00001526, -0.
+	mov	ebx,esi			;Return root in ebx
+	sar	ebx,1			;Trial root = arg/2
+	cmp	esi,ebx			;Will 32-bit division overflow?
+	jb	StartThirdIteration	;No, our 32-bit guess is good
+;Argument is really, really close to 4.0: with an initial trial root of
+;2.0, max absolute error is 2^-32 = +2.328E-10, -0.  One trivial
+;iteration will get us 65-bit accuracy, max abs. error = +2.71E-20, -0.
+	mov	ebx,esi
+	mov	eax,ecx			;65-bit root*2 in ebx:eax (MSB implied)
+	shl	ecx,2			;ecx = low half*4
+	jmp	RoundRoot
+
+SqrtDen:
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal ;Is denormal exception masked?
+	jnz	SqrtRet			;If not, quit
+
+;******
+EM_ENTRY eFSQRT
+eFSQRT:
+;******
+	mov	eax,EMSEG:[edi].ExpSgn
+	cmp	al,bTAG_ZERO
+	jz	SqrtRet
+	ja	SqrtSpcl
+	or	ah,ah
+	js	ReturnIndefinite
+	mov	esi,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].lManLo
+	sar	EMSEG:[edi].wExp,1	;Divide exponent by two
+	mov	edi,0			;Extend mantissa
+	jc	RootAligned		;If odd exponent, leave it normalized
+	shrd	edi,ecx,1
+	shrd	ecx,esi,1
+	shr	esi,1			;Denormalize, extending into edi
+RootAligned:
+;esi:ecx:edi has mantissa, 2 MSBs are left of binary point. Range is [1,4).
+	shld	eax,esi,16		;Get high word of mantissa
+	movzx	ebx,ah			;High byte to bl
+;UNDONE:  MASM 6 bug!!
+;UNDONE:  SQRT_COEF (=0AEH) get sign extended!!
+	mov	dx,SQRT_COEF		;UNDONE
+	imul	bx,dx			;UNDONE
+;UNDONE imul	bx,SQRT_COEF		;Product in bx
+;Multiply by SQRT_COEF causes binary point to shift left 1 bit.
+	add	bx,SQRT_INTERCEPT	;5-bit approx. square root in bh
+	jc	MaxStartRoot
+;Max absolute error is +/- 0.04546
+	div	bh			;See how close we are
+	add	bh,al			;quotient + divisor (always sets CY)
+FirstTrialRoot:
+;Avoid RCR because it takes 9 clocks on 386.  Use SHRD (3 clocks) instead.
+	mov	dl,1			;Need bit set
+	shrd	bx,dx,1			;(quotient + divisor)/2
+;bx has 9-bit approx. square root, normalized
+;Max absolute error is +0.001033, -0.003906
+	movzx	eax,si
+	shld	edx,esi,16		;dx:ax has high half mantissa
+	div	bx			;Test our approximation
+	add	ebx,eax			;quotient + divisor
+	shl	ebx,15			;Normalize (quotient + divisor)/2
+;ebx has 17-bit approx. square root, normalized
+;Max absolute error is +0.000007629, -0.00001526
+;Add adjustment factor to center the error range at +/-0.00001144
+	or	bh,20H			;Add in 0.000003815
+StartThirdIteration:
+	mov	edx,esi
+	mov	eax,ecx
+	div	ebx			;Test approximation
+	stc				;Set bit for rounding (= 2.328E-10)
+	adc	ebx,eax			;quotient + divisor + round bit
+;Avoid RCR because it takes 9 clocks on 386.  Use SHRD (3 clocks) instead.
+	mov	dl,1			;Need bit set
+	shrd	ebx,edx,1		;(quotient + divisor)/2, rounded
+;ebx has 32-bit approx. square root, normalized
+;Max absolute error is +2.983E-10, -2.328E-10
+	mov	edx,esi			;Last time we need high half
+	mov	eax,ecx
+	shld	ecx,edi,2		;ecx = low half*4, w/extension back in
+	div	ebx			;Test approximation
+	xchg	edi,eax			;Save 1st quotient, get extension
+	mov	esi,eax
+	or	esi,edx			;Any remainder?
+	jz	HaveRoot		;Result is ebx:esi
+	div	ebx			;edi:eax is 64-bit quotient
+	add	ebx,edi			;quotient + divisor (always sets CY)
+RoundRoot:
+	mov	esi,eax			;Save low half root*2
+
+;We have 65-bit root*2 in ebx:esi (eax==esi) (MSB is implied one).
+;Max absolute error is +4.450E-20, -5.421E-20.	This maximum error 
+;corresponds to just less than +/- 1 in the last (65th) bit.  
+;	
+;We have to determine if this error is positive or negative so
+;we can tell if we rounded up or down (and set the status bit
+;accordingly).	This is done by squaring the root and comparing the
+;that result with the input.
+;
+;Squaring the sample root requires summing partial products:
+; lo*lo + lo*hi + hi*lo + hi*hi.  lo*hi == hi*lo, so only one multiply
+;is needed there.  The low half of lo*lo isn't relevant, we know it
+;is non-zero.  Only the low few bits of hi*hi are needed, so we can use
+;an 8-bit multiply there.  Since the MSB is implied, we need to add in
+;two 1*lo products (shifted up 64 bits).  We only need bits 64 - 71 of
+;the 130-bit product (the action happens near bit 65).	What we're 
+;squaring is root*2, so the result is square*4.  ecx already has arg*4.
+
+	mul	eax			;Low partial product of square
+	mov	edi,edx			;Only high half counts
+	mov	eax,ebx
+	mul	esi			;Middle partial product of square
+	add	eax,eax			;There are two of these
+	adc	edx,edx
+	add	edi,eax
+	adc	edx,0			;edx:edi = lo*lo + lo*hi + hi*lo
+	add	edx,esi			;lo*implied msb
+	add	edx,esi			;lo*implied msb again
+	mov	al,bl
+	mul	al			;hi*hi - only low 8 bits are valid
+	add	al,dl			;Bits 64 - 71 of product
+	or	al,1			;Account for sticky bits 0 - 63
+	sub	cl,al			;Compare product with argument
+;Sign flag set if product is larger.  In this case, subtract 1 from root.
+	add	cl,cl			;Set CY if sign is set
+SubOneFromRoot:
+	sbb	esi,0			;Reduce root if product was too big
+	sbb	ebx,0
+ShiftRoot:
+;ebx:esi = root*2
+;Absolute error is in the range (0, -5.421E-20).  This is equivalent to
+;less than +1, -0 in last bit.	Thus LSB is correct rounding bit as 
+;long as we set a sticky bit below it.
+;
+;Now divide root*2 by 2, preserving LSB as rounding bit and filling
+;eax with 1's as sticky bits.
+;
+;Avoid RCR because it takes 9 clocks on 386.  Use SHRD (3 clocks) instead.
+	mov	eax,-1
+	shrd	eax,esi,1		;Move round bit to MSB of eax
+	shrd	esi,ebx,1
+	shrd	ebx,eax,1		;Shift 1 into MSB of ebx
+StoreRoot:
+	mov	edi,EMSEG:[CURstk]
+	mov	EMSEG:[Result],edi
+	mov	ecx,EMSEG:[edi].ExpSgn
+;mantissa in ebx:esi:eax, exponent in high ebx, sign in bh bit 7
+	jmp	EMSEG:[RoundMode]
+
+HaveRoot:
+;esi = eax = edx = 0
+	cmp	edi,ebx			;Does quotient == divisor?
+	jz	StoreRoot		;If so, we're done
+;Quotient != divisor, so answer is not exact.  Since remainder is zero,
+;the division was exact.  The only error in the result is e^2/2A, which
+;is always positive.  We need the error to be only negative so that
+;the rounding routine can properly tell if it rounded up.
+	add	ebx,edi			;quotient + divisor (always sets CY)
+	jmp	SubOneFromRoot		;Reduce root to ensure negative error
diff --git a/private/ntos/dll/i386/emftran.asm b/private/ntos/dll/i386/emftran.asm
new file mode 100644
index 000000000..116c3a29f
--- /dev/null
+++ b/private/ntos/dll/i386/emftran.asm
@@ -0,0 +1,1206 @@
+	subttl  emftran.asm - Transcendental instructions
+	page
+;*******************************************************************************
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;emftran.asm - Transcendental instructions
+;	by Tim Paterson
+;
+;Purpose:
+;	F2XM1, FPATAN, FYL2X, FYL2XP1 instructions
+;Inputs:
+;	edi = [CURstk]
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;********************* Polynomial Coefficients *********************
+
+;These polynomial coefficients were all taken from "Computer Approximations"
+;by J.F. Hart (reprinted 1978 w/corrections).  All calculations and 
+;conversions to hexadecimal were done with a character-string calculator
+;written in Visual Basic with precision set to 30 digits.  Once the constants
+;were typed into this file, all transfers were done with cut-and-paste
+;operations to and from the calculator to help eliminate any typographical
+;errors.
+
+
+tAtanPoly	label	word
+
+;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2),
+;accurate to 20.78 digits over interval [0, tan(pi/12)].
+
+	dd	4			;P() is degree four
+
+;  Hart constant
+;
+;+.16241 70218 72227 96595 08	      E0
+;Hex value:    0.A650A5D5050DE43A2C25A8C00 HFFFE
+	dq	0A650A5D5050DE43AH
+	dw	bTAG_VALID,0FFFEH-1
+
+;+.65293 76545 29069 63960 675	      E1
+;Hex value:    0.D0F0A714A9604993AC4AC49A0 H3
+	dq	0D0F0A714A9604994H
+	dw	bTAG_VALID,03H-1
+
+;+.39072 57269 45281 71734 92684      E2
+;Hex value:    0.9C4A507F16530AC3CDDEFA3DE H6
+	dq	09C4A507F16530AC4H
+	dw	bTAG_VALID,06H-1
+
+;+.72468 55912 17450 17145 90416 9    E2
+;Hex value:    0.90EFE6FB30465042CF089D1310 H7
+	dq	090EFE6FB30465043H
+	dw	bTAG_VALID,07H-1
+
+;+.41066 29181 34876 24224 77349 62   E2
+;Hex value:    0.A443E2004BB000B84A5154D44 H6
+	dq	0A443E2004BB000B8H
+	dw	bTAG_VALID,06H-1
+
+	dd	4			;Q() is degree four
+
+;  Hart constant
+;
+;+.15023 99905 56978 85827 4928	      E2
+;Hex value:    0.F0624CD575B782643AFB912D0 H4
+	dq	0F0624CD575B78264H
+	dw	bTAG_VALID,04H-1
+
+;+.59578 42201 83554 49303 22456      E2
+;Hex value:    0.EE504DDC907DEAEB7D7473B82 H6
+	dq	0EE504DDC907DEAEBH
+	dw	bTAG_VALID,06H-1
+
+;+.86157 32305 95742 25062 42472      E2
+;Hex value:    0.AC508CA5E78E504AB2032E864 H7
+	dq	0AC508CA5E78E504BH
+	dw	bTAG_VALID,07H-1
+
+;+.41066 29181 34876 24224 84140 84   E2
+;Hex value:    0.A443E2004BB000B84F542813C H6
+	dq	0A443E2004BB000B8H
+	dw	bTAG_VALID,06H-1
+
+
+;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3) 
+;= 0.26794 91924 31122 70647 25536 58494 12763	;From Hart appendix
+;Hex value:    0.8930A2F4F66AB189B517A51F2 HFFFF
+Tan15Hi		equ	08930A2F4H
+Tan15Lo		equ	0F66AB18AH
+Tan15exp	equ	0FFFFH-1
+
+;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236	;From Hart appendix
+;Hex value:    0.DDB3D742C265539D92BA16B8 H1
+Sqrt3Hi		equ	0DDB3D742H
+Sqrt3Lo		equ	0C265539EH
+Sqrt3exp	equ	01H-1
+
+;pi = +3.14159265358979323846264338328
+;Hex value:    0.C90FDAA22168C234C4C6628B8 H2
+PiHi		equ	0C90FDAA2H
+PiLo		equ	02168C235H
+PiExp		equ	02H-1
+
+;3*pi = +9.42477796076937971538793014984
+;Hex value:    0.96CBE3F9990E91A79394C9E890 H4
+XThreePiHi	equ	096CBE3F9H
+XThreePiMid	equ	0990E91A7H
+XThreePiLo	equ	090000000H
+ThreePiExp	equ	04H-1
+
+
+;This is a table of multiples of pi/6.  It is used to adjust the
+;final result angle after atan().  Derived from Hart appendix
+;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612
+;
+;When the reduced argument for atan() is very small, these correction
+;constants simply become the result.  These constants have all been
+;rounded to nearest, but the user may have selected a different rounding
+;mode.  The tag byte is not needed for these constants, so its space
+;is used to indicate if it was rounded.  To determine if a constant 
+;was rounded, 7FH is subtracted from this flag; CY set means it was
+;rounded up.
+
+RoundedUp	equ	040H
+RoundedDown	equ	0C0H
+
+tAtanPiFrac	label	dword
+;pi/2 = +1.57079632679489661923132169163
+;Hex value:    0.C90FDAA22168C234C4C6628B0 H1
+	dq	0C90FDAA22168C235H
+	dw	RoundedUp,01H-1
+
+;2*pi/3 = +2.09439510239319549230842892218
+;Hex value:    0.860A91C16B9B2C232DD997078 H2
+	dq	0860A91C16B9B2C23H
+	dw	RoundedDown,02H-1
+
+;none
+	dd	0,0,0
+
+;pi/6 = +0.523598775598298873077107230544E0
+;Hex value:    0.860A91C16B9B2C232DD99707A H0
+	dq	0860A91C16B9B2C23H
+	dw	RoundedDown,00H-1
+
+;pi/2 = +1.57079632679489661923132169163
+;Hex value:    0.C90FDAA22168C234C4C6628B0 H1
+	dq	0C90FDAA22168C235H
+	dw	RoundedUp,01H-1
+
+;pi/3 = +1.04719755119659774615421446109
+;Hex value:    0.860A91C16B9B2C232DD997078 H1
+	dq	0860A91C16B9B2C23H
+	dw	RoundedDown,01H-1
+
+;pi = +3.14159265358979323846264338328
+;Hex value:    0.C90FDAA22168C234C4C6628B8 H2
+	dq	0C90FDAA22168C235H
+	dw	RoundedUp,02H-1
+
+;5*pi/6 = +2.61799387799149436538553615272
+;Hex value:    0.A78D3631C681F72BF94FFCC96 H2
+	dq	0A78D3631C681F72CH
+	dw	RoundedUp,02H-1
+
+;*********************
+
+tExpPoly	label	word
+
+;These constants are from Hart #1324: 2^x - 1 = 
+; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) )
+;accurate to 21.54 digits over interval [0, 0.5].
+
+	dd	2			;P() is degree two
+
+;  Hart constant
+;
+;+.60613 30790 74800 42574 84896 07	E2
+;Hex value:    0.F27406FCF405189818F68BB78 H6
+	dq	0F27406FCF4051898H
+	dw	bTAG_VALID,06H-1
+
+;+.30285 61978 21164 59206 24269 927	E5
+;Hex value:    0.EC9B3D5414E1AD0852E432A18 HF
+	dq	0EC9B3D5414E1AD08H
+	dw	bTAG_VALID,0FH-1
+
+;+.20802 83036 50596 27128 55955 242	E7
+;Hex value:    0.FDF0D84AC3A35FAF89A690CC4 H15
+	dq	0FDF0D84AC3A35FB0H
+	dw	bTAG_VALID,015H-1
+
+	dd	3			;Q() is degree three.  First 
+					;coefficient is 1.0 and is not listed.
+;  Hart constant
+;
+;+.17492 20769 51057 14558 99141 717	E4
+;Hex value:    0.DAA7108B387B776F212ECFBEC HB
+	dq	0DAA7108B387B776FH
+	dw	bTAG_VALID,0BH-1
+
+;+.32770 95471 93281 18053 40200 719	E6
+;Hex value:    0.A003B1829B7BE85CC81BD5309 H13
+	dq	0A003B1829B7BE85DH
+	dw	bTAG_VALID,013H-1
+
+;+.60024 28040 82517 36653 36946 908	E7
+;Hex value:    0.B72DF814E709837E066855BDD H17
+	dq	0B72DF814E709837EH
+	dw	bTAG_VALID,017H-1
+
+
+;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808	;From Hart appendix
+;Hex value:    0.B504F333F9DE6484597D89B30 H1
+Sqrt2Hi		equ	0B504F333H
+Sqrt2Lo		equ	0F9DE6484H
+Sqrt2Exp	equ	01H-1
+
+;sqrt(2) - 1 = +0.4142135623730950488016887242E0
+;Hex value:    0.D413CCCFE779921165F626CC4 HFFFF
+Sqrt2m1Hi	equ	0D413CCCFH
+Sqrt2m1Lo	equ	0E7799211H
+XSqrt2m1Lo	equ	060000000H
+Sqrt2m1Exp	equ	0FFFFH-1
+
+;2 - sqrt(2) = +0.5857864376269049511983112758E0
+;Hex value:    0.95F619980C4336F74D04EC9A0 H0
+TwoMinusSqrt2Hi	equ	095F61998H
+TwoMinusSqrt2Lo	equ	00C4336F7H
+TwoMinusSqrt2Exp equ	00H-1
+
+;*********************
+
+tLogPoly	label	dword
+
+;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2),
+; z = (x+1) / (x-1) accurate to 19.74 digits over interval 
+;[1/sqrt(2), sqrt(2)].  The original Hart coefficients were for log10(); 
+;the P() coefficients have been scaled by log2(10) to compute log2().
+;
+;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017	;From Hart appendix
+
+	dd	3			;P() is degree three
+
+;  Original Hart constant	 	Scaled value
+;
+;+.18287 59212 09199 9337	 E0	+0.607500660543248917834110566373E0
+;Hex value:    0.9B8529CD54E72022A12BAEC53 H0
+	dq	09B8529CD54E72023H
+	dw	bTAG_VALID,00H-1
+
+;-.41855 96001 31266 20633	 E1	-13.9042489506087332809657007634
+;Hex value:    0.DE77CDBF64E8C53F0DCD458D0 H4
+	dq	0DE77CDBF64E8C53FH
+	dw	bSign shl 8 + bTAG_VALID,04H-1
+
+;+.13444 58152 27503 62236	 E2	+44.6619330844279438866067340334
+;Hex value:    0.B2A5D1C95708A0C9FE50F6F97 H6
+	dq	0B2A5D1C95708A0CAH
+	dw	bTAG_VALID,06H-1
+
+;-.10429 11213 72526 69497 44122 E2	-34.6447606134704282123622236943
+;Hex value:    0.8A943C20526AE439A98B30F6A H6
+	dq	08A943C20526AE43AH
+	dw	bSign shl 8 + bTAG_VALID,06H-1
+
+
+	dd	3			;Q() is degree three.  First 
+					;coefficient is 1.0 and is not listed.
+;  Hart constant
+;
+;-.89111 09060 90270 85654	 E1
+;Hex value:    0.8E93E7183AA998D74F45CDFF0 H4
+	dq	08E93E7183AA998D7H
+	dw	bSign shl 8 + bTAG_VALID,04H-1
+
+;+.19480 96618 79809 36524 155	 E2
+;Hex value:    0.9BD904CCFEE118D4BEF319716 H5
+	dq	09BD904CCFEE118D5H
+	dw	bTAG_VALID,05H-1
+
+;-.12006 95907 02006 34243 4218	 E2
+;Hex value:    0.C01C811D2EC1B5806304B1858 H4
+	dq	0C01C811D2EC1B580H
+	dw	bSign shl 8 + bTAG_VALID,04H-1
+
+;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213	;From Hart appendix
+;Hex value:    0.B8AA3B295C17F0BBBE87FED04 H1
+Log2OfEHi	equ	0B8AA3B29H
+Log2OfELo	equ	05C17F0BCH
+Log2OfEexp	equ	01H-1
+
+
+;********************* Generic polynomial evaluation *********************
+;
+;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly
+;
+;Inputs:
+;	ebx:esi,ecx = floating point number, internal format
+;	edi = pointer to polynomial degree and coefficients
+;Outputs:
+;	result in ebx:esi,ecx
+;	edi incremented to start of last coefficient in list
+;
+;EvalPoly is the basic polynomial evaluator, using Horner's rule.  The
+;polynomial pointer in edi points to a list: the first dword in the list
+;is the degree of the polynomial (n); it is followed by the n+1 
+;coefficients in internal (12-byte) format.  The argment for EvalPoly
+;must be stored in the static FloatTemp in addition to being in
+;registers.
+;
+;EvalPolyAdd is an alternate entry point into the middle of EvalPoly.
+;It is used when the first coefficient is 1.0, so it skips the first
+;multiplication.  It requires that the degree of the polynomial be
+;already loaded into ebp.
+;
+;EvalPolySetup store a copy of the argument in the static ArgTemp,
+;and stores the square of the argument in the static FloatTemp.  
+;Then it falls into EvalPoly to evaluate the polynomial on the square.
+;
+;Eval2Poly evaluate two polynomials on its argument.  The first 
+;polynomial is  x * P(x^2), and its result is left at [[CURstk]].
+;The second polynomial is Q(x^2), and its result is left in registers.
+;The most significant coefficient of Q() is 1.
+;
+;Polynomial evaluation uses a slight variation on the standard add
+;and multiply routines.  PolyAddDouble and PolyMulDouble both check
+;to see if the argument in registers (the current accumulation) is 
+;zero.  The argument pointed to by edi is a coefficient and is never
+;zero.
+;
+;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped",
+;i.e., redirected to special handlers for polynomial evaluation.
+;[RoundMode] ordinarily points to the routine that handles the
+;the current rounding mode and precision control; however, during
+;polynomial evaluation, we always want full precision and round
+;nearest.  The normal rounding routines also store their result
+;at [[Result]], but we want the result left in registers.
+;[ZeroVector] exists solely so polynomial evaluation can trap
+;when AddDouble results of zero.  The normal response is to store
+;a zero at [[Result]], but we need the zero left in registers.
+;PolyRound and PolyZero handle these traps.
+
+
+EvalPolySetup:
+;Save x in ArgTemp
+	mov	EMSEG:[ArgTemp].ExpSgn,ecx
+	mov	EMSEG:[ArgTemp].lManHi,ebx
+	mov	EMSEG:[ArgTemp].lManLo,esi
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	push	edi			;Save pointer to  polynomials
+;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+	mov	edx,ebx
+	mov	edi,esi
+	mov	eax,ecx
+;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
+	call	MulDoubleReg		;Compute x^2
+;Save x^2 in FloatTemp
+	mov	EMSEG:[FloatTemp].ExpSgn,ecx
+	mov	EMSEG:[FloatTemp].lManHi,ebx
+	mov	EMSEG:[FloatTemp].lManLo,esi
+	pop	edi
+EvalPoly:
+;ebx:esi,ecx = arg to evaluate, also in FloatTemp
+;edi = pointer to degree and list of coefficients.
+	push	edi
+	mov	eax,cs:[edi+4].ExpSgn
+	mov	edx,cs:[edi+4].lManHi
+	mov	edi,cs:[edi+4].lManLo
+	call	MulDoubleReg		;Multiply arg by first coef.
+	pop	edi
+	mov	ebp,cs:[edi]		;Get polynomial degree
+	add	edi,4+Reg87Len		;Point to second coefficient
+	jmp	EvalPolyAdd
+
+PolyLoop:
+	push	ebp			;Save loop count
+ifdef NT386
+        mov	edi,YFloatTemp
+else
+	mov	edi,offset edata:FloatTemp
+endif
+        call	PolyMulDouble
+	pop	ebp
+	pop	edi
+	add	di,Reg87Len
+EvalPolyAdd:
+	push	edi
+	mov	eax,cs:[edi].ExpSgn
+	mov	edx,cs:[edi].lManHi
+	mov	edi,cs:[edi].lManLo
+	cmp	cl,bTAG_ZERO		;Adding to zero?
+	jz	AddToZero
+	call	AddDoubleReg		;ebp preserved
+ContPolyLoop:
+	dec	ebp
+	jnz	PolyLoop
+	pop	edi
+	ret
+
+AddToZero:
+;Number in registers is zero, so just return value from memory.
+	mov	ecx,eax
+	mov	ebx,edx
+	mov	esi,edi
+	jmp	ContPolyLoop
+
+
+Eval2Poly:
+	call	EvalPolySetup
+	push	edi
+ifdef NT386
+        mov	edi,YArgTemp
+else
+	mov	edi,offset edata:ArgTemp
+endif
+	call	PolyMulDouble		;Multiply first result by argument
+	pop	edi
+;Save result of first polynomial at [[CURstk]]
+	mov	edx,EMSEG:[CURstk]
+	mov	EMSEG:[edx].ExpSgn,ecx
+	mov	EMSEG:[edx].lManHi,ebx
+	mov	EMSEG:[edx].lManLo,esi
+;Load x^2 back into registers
+	mov	ecx,EMSEG:[FloatTemp].ExpSgn
+	mov	ebx,EMSEG:[FloatTemp].lManHi
+	mov	esi,EMSEG:[FloatTemp].lManLo
+;Start second polynomial evaluation
+	add	edi,4+Reg87Len		;Point to coefficient
+	mov	ebp,cs:[edi-4]		;Get polynomial degree
+	jmp	EvalPolyAdd
+
+
+PolyRound:
+;This routine handles all rounding during polynomial evaluation.
+;It performs 64-but round nearest, with result left in registers.
+;
+;Inputs:
+;	mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+;Outputs:
+;	same, plus tag in cl.
+;
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.  This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.  
+;
+;This routine needs to be reversible in case we're at the last step
+;in the polynomial and final rounding uses a different rounding mode.
+;We do this by copying the LSB of esi into al.  While the rounding is 
+;reversible, you can't tell if the answer was exact.
+
+	mov	edx,esi
+	and	dl,1			;Look at LSB
+	or	al,dl			;Set LSB as sticky bit
+	add	eax,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
+	adc	esi,0
+	adc	ebx,0
+	jc	PolyBumpExponent	;Overflowed, increment exponent
+	or      esi,esi			;Any bits in low half?
+.erre   bTAG_VALID eq 1
+.erre   bTAG_SNGL eq 0
+	setnz   cl			;if low half==0 then cl=0 else cl=1
+	ret
+
+PolyBumpExponent:
+	add	ecx,1 shl 16		;Mantissa overflowed, bump exponent
+	or	ebx,1 shl 31		;Set MSB
+	mov     cl,bTAG_SNGL
+PolyZero:
+;Enter here when result is zero
+	ret
+
+;*******************************************************************************
+
+;FPATAN instruction
+
+;Actual instruction entry point is in emarith.asm
+
+tFpatanDisp	label	dword		;Source (ST(0))	Dest (*[di] = ST(1))
+	dd	AtanDouble		;single		single
+	dd	AtanDouble		;single		double
+	dd	AtanZeroDest		;single		zero
+	dd	AtanSpclDest		;single		special
+	dd	AtanDouble		;double		single
+	dd	AtanDouble		;double		double
+	dd	AtanZeroDest		;double		zero
+	dd	AtanSpclDest		;double		special
+	dd	AtanZeroSource		;zero		single
+	dd	AtanZeroSource		;zero		double
+	dd	AtanZeroDest		;zero		zero
+	dd	AtanSpclDest		;zero		special
+	dd	AtanSpclSource		;special	single
+	dd	AtanSpclSource		;special	double
+	dd	AtanSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	AtanTwoInf		;Two infinites
+
+;Compute atan( st(1)/st(0) ).  Neither st(0) or st(1) are zero or
+;infinity at this point.
+;
+;Argument reduction starts by dividing the smaller by the larger,
+;ensuring that the result x is <= 1.  The absolute value of the quotient
+;is used and the quadrant is fixed up later.  If x = st(0)/st(1), then 
+;the final atan result is subtracted from pi/2 (and normalized for the
+;correct range of -pi to +pi).  
+;
+;The range of x is further reduced using the formulas:
+;	t = (x - k) / (1 + kx)
+;	atan(x) = atan(k) + atan(t)
+;
+;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we
+;are assured that t <= tan(pi/12) = 2 - sqrt(3), and
+;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12).
+;Thus we can always reduce the argument to abs(t) <= tan(pi/12).
+;
+;Since k = 1/sqrt(3), it is convenient to multiply the numerator
+;and denominator of t by 1/k, which gives
+;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ).
+;This is the form found in Cody and Waite and in previous versions
+;of the emulator.  It requires one each add, subtract, multiply, and
+;divide.
+;
+;Hart has derived a simpler version of this formula:
+;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ).
+;Note that this computation requires one each add, subtract, and
+;divide, but no multiply.
+
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+
+AtanDouble:
+	mov	EMSEG:[Result],edi
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	mov	ah,EMSEG:[edi].bSgn	;Sign of result
+	mov	al,ch			;Affects quadrant of result
+	and	al,bSign		;Zero other bits, used as flags
+	push	eax			;Save flag
+;First figure out which is larger
+	push	offset AtanQuo		;Return address for DivDouble
+	shld	edx,ecx,16		;Get exponent to ax
+	cmp	dx,EMSEG:[edi].wExp	;Compare exponents
+	jl	DivrDoubleSetFlag	;ST(0) is smaller, make it dividend
+	jg	DivDouble		;   ...is bigger, make it divisor
+;Exponents are equal, compare mantissas
+	cmp	ebx,EMSEG:[edi].lManHi
+	jb	DivrDoubleSetFlag	;ST(0) is smaller, make it dividend
+	ja	DivDouble		;   ...is bigger, make it divisor
+	cmp	esi,EMSEG:[edi].lManLo
+	jbe	DivrDoubleSetFlag	;ST(0) is smaller, make it dividend
+	jmp	DivDouble
+
+TinyAtan:
+;Come here if the angle was reduced to zero, or the divide resulted in
+;unmasked underflow so that the quotient exponent was biased.
+;Note that an angle of zero means reduction was performed, and the
+;result will be corrected to a non-zero value.
+	mov	dl,[esp]		;Get flag byte
+	or	dl,dl			;No correction needed?
+	jz	AtanSetSign		;Just return result of divide
+	and	EMSEG:[CURerr],not Underflow
+;Angle in registers is too small to affect correction amount.  Just
+;load up correction angle instead of adding it in.
+	add	dl,40H			;Change flags for correction lookup
+	shr	dl,5-2			;Now in bits 2,3,4
+	and	edx,7 shl 2
+	mov	ebx,[edx+2*edx+tAtanPiFrac].lManHi
+	mov	esi,[edx+2*edx+tAtanPiFrac].lManLo
+	mov	ecx,[edx+2*edx+tAtanPiFrac].ExpSgn
+	shrd	eax,ecx,8		;Copy rounding flag to high eax
+	jmp	AtanSetSign
+
+AtanQuo:
+;Return here after divide.  Underflow flag is set only for "big underflow",
+;meaning the (15-bit) exponent couldn't even be kept in 16 bits.  This can
+;only happen dividing a denormal by one of the largest numbers.
+;
+;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
+	test	EMSEG:[CURerr],Underflow;Did we underflow?
+	jnz	TinyAtan
+;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3)
+	xor	cx,cx			;Use absolute value
+	cmp	ecx,Tan15exp shl 16
+	jg	AtnNeedReduce
+	jl	AtnReduced
+	cmp	ebx,Tan15Hi
+	ja	AtnNeedReduce
+	jb	AtnReduced
+	cmp	esi,Tan15Lo
+	jbe	AtnReduced
+AtnNeedReduce:
+	or	byte ptr [esp],20H	;Note reduction in flags on stack
+;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ).
+	mov	eax,Sqrt3exp shl 16
+	mov	edx,Sqrt3Hi
+	mov	edi,Sqrt3Lo
+	call	AddDoubleReg		;x + sqrt(3)
+	mov	edi,esi
+	mov	esi,ebx			;Mantissa in esi:edi
+	mov	ebx,ecx			;ExpSgn to ebx
+	mov	ecx,(2+TexpBias) shl 16
+	mov	edx,1 shl 31
+	xor	eax,eax			;edx:edi,eax = 4.0
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+	call	DivDoubleReg		;4 / ( x + sqrt(3) )
+	not	ch			;Flip sign
+	mov	eax,Sqrt3exp shl 16
+	mov	edx,Sqrt3Hi
+	mov	edi,Sqrt3Lo
+	call	AddDoubleReg		;sqrt(3) - 4 / ( x + sqrt(3) )
+;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6).
+	cmp	cl,bTAG_ZERO
+	jz	TinyAtan
+AtnReduced:
+;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x
+;[or 1 - x^2/3 = 1], which happens when x < 2^-32.  This prevents underflow
+;in computing x^2.
+TinyAtanArg	equ	-32
+	cmp	ecx,TinyAtanArg shl 16
+	jl	AtanCorrection
+	mov	edi,offset tAtanPoly
+	call	Eval2Poly
+	mov	edi,EMSEG:[CURstk]	;Point to first result
+	call	DivDouble		;x * P(x^2) / Q(x^2)
+AtanCorrection:
+;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
+;
+;Correct sign and add fraction of pi to account for various angle reductions:
+;
+;    flag bit	   indicates		correction
+;----------------------------------------------------
+;	5	arg > tan(pi/12)	add pi/6
+;	6	st(1) > st(0)		sub from pi/2
+;	7	st(0) < 0		sub from pi
+;
+;This results in the following correction for the result R:
+;
+;bit  7 6 5	correction
+;---------------------------
+;     0 0 0	none
+;     0 0 1	pi/6 + R
+;     0 1 0	pi/2 - R
+;     0 1 1	pi/3 - R
+;     1 0 0	pi - R
+;     1 0 1	5*pi/6 - R
+;     1 1 0	pi/2 + R
+;     1 1 1	2*pi/3 + R
+
+	mov	dl,[esp]		;Get flag byte
+	or	dl,dl			;No correction needed?
+	jz	AtanSetSign
+	add	dl,40H			;Set bit 7 for all -R cases
+
+;This changes the meaning of the flag bits to the following:
+;
+;bit  7 6 5	correction
+;---------------------------
+;     0 0 0	pi/2 + R
+;     0 0 1	2*pi/3 + R
+;     0 1 0	none
+;     0 1 1	pi/6 + R
+;     1 0 0	pi/2 - R
+;     1 0 1	pi/3 - R
+;     1 1 0	pi - R
+;     1 1 1	5*pi/6 - R
+
+	xor	ch,dl			;Flip sign bit in cases 4 - 7
+	shr	dl,5-2			;Now in bits 2,3,4
+	and	edx,7 shl 2
+	mov	eax,[edx+2*edx+tAtanPiFrac].ExpSgn
+	mov	edi,[edx+2*edx+tAtanPiFrac].lManLo
+	mov	edx,[edx+2*edx+tAtanPiFrac].lManHi
+	call	AddDoubleReg		;Add in correction angle
+AtanSetSign:
+	pop	edx			;Get flags again
+	mov	ch,dh			;Set sign to original ST(1)
+;Rounded mantissa in ebx:esi:eax, exp/sign in ecx
+	jmp     TransUnround
+
+
+;***
+AtanSpclDest:
+	mov	al,EMSEG:[edi].bTag	;Pick up tag
+;	cmp     cl,bTAG_INF		;Is argument infinity?
+	cmp     al,bTAG_INF		;Is argument infinity?
+	jnz	SpclDest		;In emarith.asm
+AtanZeroSource:
+;Dividend is infinity or divisor is zero.  Return pi/2 with 
+;same sign as dividend.
+	mov	ecx,(PiExp-1) shl 16 + bTAG_VALID	;Exponent for pi/2
+PiMant:
+;For storing multiples of pi.  Exponent/tag is in ecx.
+	mov	ch,EMSEG:[edi].bSgn	;Get dividend's sign
+	mov	ebx,XPiHi
+	mov	esi,XPiMid
+	mov	eax,XPiLo
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow.  Unmasked underflow requires [RoundMode] be set.
+	jmp	EMSEG:[TransRound]
+
+;***
+AtanSpclSource:
+	cmp	cl,bTAG_INF		;Scaling by infinity?
+	jnz	SpclSource		;in emarith.asm
+AtanZeroDest:
+;Divisor is infinity or dividend is zero.  Return zero for +divisor, 
+;pi for -divisor.  Result sign is same is dividend.
+	or	ch,ch			;Check divisor's sign
+	mov	ecx,PiExp shl 16 + bTAG_VALID	;Exponent for pi
+	js	PiMant			;Store pi
+;Result is zero
+	mov	EMSEG:[edi].lManHi,0
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].wExp,0
+	mov	EMSEG:[edi].bTAG,bTAG_ZERO
+	ret
+
+;***
+AtanTwoInf:
+;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor.
+;Result sign is same is dividend infinity.
+	or	ch,ch			;Check divisor's sign
+	mov	ecx,(PiExp-2) shl 16 + bTAG_VALID	;Exponent for pi/4
+	jns	PiMant			;Store pi/4
+	mov	ecx,(ThreePiExp-2) shl 16 + bTAG_VALID	;Exponent for 3*pi/4
+	mov	ch,EMSEG:[edi].bSgn	;Get dividend's sign
+	mov	ebx,XThreePiHi
+	mov	esi,XThreePiMid
+	mov	eax,XThreePiLo
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow.  Unmasked underflow requires [RoundMode] be set.
+	jmp	EMSEG:[TransRound]
+
+;*******************************************************************************
+
+ExpSpcl:
+;Tagged special
+	cmp	cl,bTAG_DEN
+	jz	ExpDenorm
+	cmp	cl,bTAG_INF
+        mov     al, cl
+	jnz	SpclDestNotDen		;Check for Empty or NAN
+;Have infinity, check its sign.  
+;Return -1 for -infinity, no change if +infinity
+	or	ch,ch			;Check sign
+	jns	ExpRet			;Just return the +inifinity
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].lManHi,1 shl 31
+	mov	EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL	;-1.0 (exponent is zero)
+	ret
+
+ExpDenorm:
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
+	jnz	ExpCont			;Yes, continue
+ExpRet:
+	ret
+
+EM_ENTRY eF2XM1
+eF2XM1:
+;edi = [CURstk]
+	mov	ecx,EMSEG:[edi].ExpSgn
+	cmp	cl,bTAG_ZERO
+	jz	ExpRet			;Return same zero
+	ja	ExpSpcl
+ExpCont:
+
+;The input range specified for the function is (-1, +1).  The polynomial 
+;used for this function is valid only over the range [0, +0.5], so range
+;reduction is needed.  Range reduction is based on the identity:
+;
+;  2^(a+b) = 2^a * 2^b
+;
+;1.0 or 0.5 can be added/subtracted from the argument to bring it into
+;range.  We calculate 2^x - 1 with a polynomial, and then adjust the
+;result according to the amount added or subtracted, as shown in the table:
+;
+;Arg range	Adj	Polynomial result	Required result, 2^x - 1
+;
+; (-1, -0.5]	+1	P = 2^(x+1) - 1		(P - 1)/2
+;
+; (-0.5, 0)	+0.5	P = 2^(x+0.5) - 1	P * sqrt(2)/2 + (sqrt(2)/2 - 1)
+;
+; (0, 0.5)	0	P = 2^x - 1		P
+;
+; [0.5, 1)	-0.5	P = 2^(x-0.5) - 1	P * sqrt(2) + (sqrt(2)-1)
+;
+;Since the valid input range does not include +1.0 or -1.0, and zero is
+;handled separately, the precision exception will always be set.
+
+	mov	EMSEG:[Result],edi
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	push	offset TransUnround		;Always exit through here
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	esi,EMSEG:[edi].lManLo
+;Check for small argument, so that x^2 does not underflow.  Note that 
+;e^x = 1+x for small x, where small x means  x + x^2/2 = x  [or 1 + x/2 = 1], 
+;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x.
+TinyExpArg	equ	-64
+	cmp	ecx,TinyExpArg shl 16
+	jl	TinyExp
+	cmp	ecx,-1 shl 16 + bSign shl 8	;See if positive, < 0.5
+	jl	ExpReduced
+;Argument was not in range (0, 0.5), so we need some kind of reduction
+	or	ecx,ecx			;Exp >= 0 means arg >= 1.0 --> too big
+;CONSIDER: this returns through TransUnround which restores the rounding
+;vectors, but it also randomly rounds the result becase eax is not set.
+	jge	ExpRet			;Give up if arg out of range
+;We're going to need to add/subtract 1.0 or 0.5, so load up the constant
+	mov	edx,1 shl 31
+	xor	edi,edi
+	mov	eax,-1 shl 16 + bSign shl 8	;edx:edi,eax = -0.5
+	mov	ebp,offset ExpReducedMinusHalf
+	or	ch,ch			;If it's positive, must be [0.5, 1)
+	jns	ExpReduction
+	xor	ah,ah			;edx:edi,eax = +0.5
+	mov	ebp,offset ExpReducedPlusHalf
+	cmp	ecx,eax			;See if abs(arg) >= 0.5
+	jl	ExpReduction		;No, adjust by .5
+	xor	eax,eax			;edx:edi,eax = 1.0
+	mov	ebp,offset ExpReducedPlusOne
+ExpReduction:
+	call	AddDoubleReg		;Argument now in range [0, 0.5]
+	cmp	cl,bTAG_ZERO		;Did reduction result in zero?
+	jz	ExpHalf			;If so, must have been exactly 0.5
+	push	ebp			;Address of reduction cleanup
+ExpReduced:
+	mov	edi,offset tExpPoly
+	call	Eval2Poly
+;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
+;Q(x^2) is in registers, P(x^2) is at [[CURstk]]
+	mov	edi,EMSEG:[CURstk]
+	mov	dx,bSign shl 8		;Subtract memory operand
+;Note that Q() and P() have no roots over the input range
+;(they will never be zero).
+	call	AddDouble		;Q(x^2) - x*P(x^2)
+	sub	ecx,1 shl 16		;Divide by two
+	mov	edi,EMSEG:[CURstk]
+	jmp	DivDouble		;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
+;Returns to correct argument reduction correction routine or TransUnround
+
+TinyExp:
+;Exponent is very small (and was not reduced)
+	mov	edx,cFLDLN2hi
+	mov	edi,cFLDLN2lo
+	mov	eax,cFLDLN2exp shl 16
+;This could underflow (but not big time)
+	jmp	MulDoubleReg		;Returns to TransUnround
+
+ExpHalf:
+;Argument of exactly 0.5 was reduced to zero.  Just return result.
+	mov	ebx,Sqrt2m1Hi
+	mov	esi,Sqrt2m1Lo
+	mov	eax,XSqrt2m1Lo + 1 shl 31 - 1
+	mov	ecx,Sqrt2m1Exp shl 16
+	ret				;Exit through TransUnround
+
+ExpReducedPlusOne:
+;Correct result is (P - 1)/2
+	sub	ecx,1 shl 16		;Divide by two
+	mov	edx,1 shl 31
+	xor	edi,edi
+	mov	eax,-1 shl 16 + bSign shl 8	;edx:edi,eax = -0.5
+	jmp	AddDoubleReg
+
+ExpReducedPlusHalf:
+;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2)
+	mov	edx,Sqrt2Hi
+	mov	edi,Sqrt2Lo
+	mov	eax,Sqrt2exp-1 shl 16	;sqrt(2)/2
+	call	MulDoubleReg
+	mov	edx,TwoMinusSqrt2Hi
+	mov	edi,TwoMinusSqrt2Lo
+	mov	eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8	;(2-sqrt(2))/2
+	jmp	AddDoubleReg
+
+ExpReducedMinusHalf:
+;Correct result is P * sqrt(2) + (sqrt(2)-1)
+	mov	edx,Sqrt2Hi
+	mov	edi,Sqrt2Lo
+	mov	eax,Sqrt2exp shl 16
+	call	MulDoubleReg
+	mov	edx,Sqrt2m1Hi
+	mov	edi,Sqrt2m1Lo
+	mov	eax,Sqrt2m1Exp shl 16
+	jmp	AddDoubleReg
+
+;*******************************************************************************
+
+;Dispatch table for log(x+1)
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+
+.erre	TAG_SNGL	eq	0	;SINGLE: low 32 bits are zero
+.erre	TAG_VALID	eq	1
+.erre	TAG_ZERO	eq	2
+.erre	TAG_SPCL	eq	3	;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFyl2xp1Disp	label	dword		;Source (ST(0))	Dest (*[di] = ST(1))
+	dd	LogP1Double		;single		single
+	dd	LogP1Double		;single		double
+	dd	LogP1ZeroDest		;single		zero
+	dd	LogP1SpclDest		;single		special
+	dd	LogP1Double		;double		single
+	dd	LogP1Double		;double		double
+	dd	LogP1ZeroDest		;double		zero
+	dd	LogP1SpclDest		;double		special
+	dd	XorSourceSign		;zero		single
+	dd	XorSourceSign		;zero		double
+	dd	XorDestSign		;zero		zero
+	dd	LogP1SpclDest		;zero		special
+	dd	LogSpclSource		;special	single
+	dd	LogSpclSource		;special	double
+	dd	LogSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	LogTwoInf		;Two infinites
+
+
+LogP1Double:
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+;
+;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)]
+;The approximation used (valid over exactly this range) is
+; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is
+; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2)
+;
+;We're not too picky about this range check because the function is simply
+;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and
+;signal Invalid if less, -infinity if equal.
+	or	ecx,ecx			;abs(x) >= 1.0?
+	jge	LogP1OutOfRange		;Valid range is approx [-0.3, +0.4]
+	mov	EMSEG:[Result],edi
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	mov	eax,1 shl 16		;Exponent of 1 for adding 2.0
+	push	offset TotalLog		;Return address for BasicLog
+;	jmp	BasicLog		;Fall into BasicLog
+;.erre	BasicLog eq $
+
+;BasicLog is used by eFYL2X and eFYL2XP1.
+;eax has exponent and sign to add 1.0 or 2.0 to argument
+;ebx:esi,ecx has argument, non-zero, tag not set
+;ST has argument to take log2 of, minus 1.  (This is the actual argument
+;of eFYL2XP1, or argument minus 1 of eFYL2X.)
+
+BasicLog:
+	mov	edx,1 shl 31
+	xor	edi,edi			;edx:edi,eax = +1.0 or +2.0
+	call	AddDoubleReg
+	mov	edi,EMSEG:[CURstk]	;Point to x-1
+	call	DivDouble		;Compute (x-1) / (x+1)
+;Result in registers is z = (x-1)/(x+1).  For tiny z, ln(x) = 2*z, so
+; log2(x) = 2 * log2(e) * z.  Tiny z is such that z + z^3/3 = z.
+	cmp	ecx,-32 shl 16		;Smallest exponent to bother with
+	jl	LogSkipPoly
+	mov	edi,offset tLogPoly
+	call	Eval2Poly
+	mov	edi,EMSEG:[CURstk]	;Point to first result, r * P(r^2)
+	jmp	DivDouble		;Compute r * P(r^2) / Q(r^2)
+
+LogSkipPoly:
+;Multiply r by 2 * log2(e)
+	mov	edx,Log2OfEHi
+	mov	edi,Log2OfELo
+	mov	eax,(Log2OfEexp+1) shl 16
+	jmp	MulDoubleReg
+
+LogP1OutOfRange:
+;Input range isn't valid, so we can return anything we want--EXCEPT, for
+;numbers < -1 we must signal Invalid Operation, and Divide By Zero for
+;-1.  Otherwise, we return an effective log of one by just leaving the
+;second operand as the return value.
+;
+;Exponent in ecx >= 0  ( abs(x) >= 1 )
+	or	ch,ch			;Is it positive?
+	jns	LogP1Ret		;If so, skip it
+	and	ecx,0FFFFH shl 16	;Look at exponent only: 0 for -1.0
+	sub	ebx,1 shl 31		;Kill MSB
+	or	ebx,esi
+	or	ebx,ecx
+	jnz	ReturnIndefinite	;Must be < -1.0
+	jmp	DivideByMinusZero
+
+LogP1Ret:
+	ret
+	
+;***
+LogP1ZeroDest:
+	or	ch,ch			;Is it negative?
+	jns	LogP1Ret		;If not, just leave it zero
+	or	ecx,ecx			;abs(x) >= 1.0?
+	jl	XorDestSign		;Flip sign of zero
+;Argument is <= -1
+	jmp	ReturnIndefinite	;Have 0 * log( <=0 )
+
+;***
+LogP1SpclDest:
+	mov	al,EMSEG:[edi].bTag		;Pick up tag
+	cmp	al,bTAG_INF		;Is argument infinity?
+	jnz	SpclDest		;In emarith.asm
+;Multiplying log(x+1) * infinity.
+;If x > 0, return original infinity.
+;If -1 <= x < 0, return infinity with sign flipped.
+;If x < -1 or x == 0, invalid operation.
+	cmp	cl,bTAG_ZERO
+	jz	ReturnIndefinite
+	or	ch,ch			;Is it positive?
+	jns	LogP1Ret
+	test	ecx,0FFFFH shl 16	;Is exponent zero?
+	jl	XorDestSign
+	jg	ReturnIndefinite
+	sub	ebx,1 shl 31		;Kill MSB
+	or	ebx,esi
+	jnz	ReturnIndefinite	;Must be < -1.0
+	jmp	XorDestSign
+
+;***
+LogSpclSource:
+	cmp	cl,bTAG_INF		;Is argument infinity?
+	jnz	SpclSource		;in emarith.asm
+	or	ch,ch			;Is it negative infinity?
+	js	ReturnIndefinite
+	jmp	MulByInf
+
+;***
+LogTwoInf:
+	or	ch,ch			;Is it negative infinity?
+	js	ReturnIndefinite
+	jmp	XorDestSign
+
+;*******************************************************************************
+
+;Dispatch table for log(x)
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+
+.erre	TAG_SNGL	eq	0	;SINGLE: low 32 bits are zero
+.erre	TAG_VALID	eq	1
+.erre	TAG_ZERO	eq	2
+.erre	TAG_SPCL	eq	3	;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFyl2xDisp	label	dword		;Source (ST(0))	Dest (*[di] = ST(1))
+	dd	LogDouble		;single		single
+	dd	LogDouble		;single		double
+	dd	LogZeroDest		;single		zero
+	dd	LogSpclDest		;single		special
+	dd	LogDouble		;double		single
+	dd	LogDouble		;double		double
+	dd	LogZeroDest		;double		zero
+	dd	LogSpclDest		;double		special
+	dd	DivideByMinusZero	;zero		single
+	dd	DivideByMinusZero	;zero		double
+	dd	ReturnIndefinite	;zero		zero
+	dd	LogSpclDest		;zero		special
+	dd	LogSpclSource		;special	single
+	dd	LogSpclSource		;special	double
+	dd	LogSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	LogTwoInf		;Two infinites
+
+
+LogDouble:
+;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
+;[edi] points to st(1), where result is returned
+;
+;Must reduce the argument to the range [1/sqrt(2), sqrt(2)]
+	or	ch,ch			;Is it positive?
+	js	ReturnIndefinite	;Can't take log of negative number
+	mov	EMSEG:[Result],edi
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	shld	eax,ecx,16		;Save exponent in ax as int part of log2
+	xor	ecx,ecx			;Zero exponent: 1 <= x < 2
+	cmp	ebx,Sqrt2Hi		;x > sqrt(2)?
+	jb	LogReduced
+	ja	LogReduceOne
+	cmp	esi,Sqrt2Lo
+	jb	LogReduced
+LogReduceOne:
+	sub	ecx,1 shl 16		;1/sqrt(2) < x < 1
+	inc	eax
+LogReduced:
+	push	eax			;Save integer part of log2
+	mov	ebp,ecx 		;Save reduced exponent (tag is wrong!)
+	mov	edx,1 shl 31
+	mov	eax,bSign shl 8		;Exponent of 0, negaitve
+	xor	edi,edi			;edx:edi,eax = -1.0
+	call	AddDoubleReg
+	cmp	cl,bTAG_ZERO		;Was it exact power of two?
+	jz	LogDone			;Skip log if power of two
+;Save (x - 1), reload x with reduced exponent
+	mov	edi,EMSEG:[CURstk]	;Point to original x again
+	xchg	EMSEG:[edi].lManHi,ebx
+	xchg	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].ExpSgn,ecx
+	mov	ecx,ebp			;Get reduced exponent
+	xor	eax,eax			;Exponent of 0, positive
+	call	BasicLog
+LogDone:
+	pop	eax			;Get integer part back
+	cwde
+	or	eax,eax			;Is it zero?
+	jz	TotalLog
+;Next 3 instructions take abs() of integer
+	cdq				;Extend sign through edx
+	xor	eax,edx			;Complement...
+	sub	eax,edx			;  and increment if negative
+	bsr	dx,ax			;Look for MSB to normalize integer
+;Bit number in dx ranges from 0 to 15
+	mov	cl,dl
+	not	cl			;Convert to shift count
+	shl	eax,cl			;Normalize
+.erre	TexpBias eq 0
+	rol	edx,16			;Move exponent high, sign low
+	or	ebx,ebx			;Was log zero?
+	jz	ExactPower
+	xchg	edx,eax			;Exp/sign to eax, mantissa to edx
+	xor	edi,edi			;Extend with zero
+	call	AddDoubleReg
+TotalLog:
+;Registers could be zero if input was exactly 1.0
+	cmp	cl,bTAG_ZERO
+	jz	ZeroLog
+TotalLogNotZero:
+	mov	edi,EMSEG:[Result]	;Point to second arg
+	push	offset TransUnround
+	jmp	MulDouble
+
+ExactPower:
+;Arg was a power of two, so log is exact (but not zero).
+	mov     ebx,eax			;Mantissa to ebx
+	mov     ecx,edx			;Exponent to ecx
+	xor     esi,esi			;Extend with zero
+;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx
+;
+;The result log is exact, so we don't want TransUnround, which is designed 
+;to ensure the result is never exact.  Instead we set the [RoundMode]
+;vector to [TransRound] before the final multiply.
+	mov	eax,EMSEG:[TransRound]
+	mov	EMSEG:[RoundMode],eax
+	mov	edi,EMSEG:[Result]	;Point to second arg
+	push	offset RestoreRound	;Return addr. for MulDouble in emtrig.asm
+	jmp	MulDouble
+
+ZeroLog:
+	mov	eax,EMSEG:[SavedRoundMode]
+	mov	EMSEG:[RoundMode],eax
+	mov	EMSEG:[ZeroVector],offset SaveResult
+	jmp	SaveResult
+
+;***
+LogZeroDest:
+	or	ch,ch			;Is it negative?
+	js	ReturnIndefinite	;Can't take log of negative numbers
+;See if log is + or - so we can get correct sign of zero
+	or	ecx,ecx			;Is exponent >= 0?
+	jge	LogRet			;If so, keep present zero sign
+FlipDestSign:
+	not	EMSEG:[edi].bSgn
+	ret
+
+;***
+LogSpclDest:
+	mov	al,EMSEG:[edi].bTag		;Pick up tag
+	cmp	al,bTAG_INF		;Is argument infinity?
+	jnz	SpclDest		;In emarith.asm
+;Multiplying log(x) * infinity.
+;If x > 1, return original infinity.
+;If 0 <= x < 1, return infinity with sign flipped.
+;If x < 0 or x == 1, invalid operation.
+	cmp	cl,bTAG_ZERO
+	jz	FlipDestSign
+	or	ch,ch			;Is it positive?
+	js	ReturnIndefinite
+	test	ecx,0FFFFH shl 16	;Is exponent zero?
+	jg	LogRet			;x > 1, just return infinity
+	jl	FlipDestSign
+	sub	ebx,1 shl 31		;Kill MSB
+	or	ebx,esi
+	jz	ReturnIndefinite	;x == 1.0
+LogRet:
+	ret
diff --git a/private/ntos/dll/i386/emload.asm b/private/ntos/dll/i386/emload.asm
new file mode 100644
index 000000000..c2e68c561
--- /dev/null
+++ b/private/ntos/dll/i386/emload.asm
@@ -0,0 +1,416 @@
+        subttl  emload.asm - FLD and FILD instructions
+        page
+;*******************************************************************************
+;emload.asm - FLD and FILD instructions
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       FLD and FILD instructions
+;Inputs:
+;	edi = [CURstk]
+;	dseg:esi = pointer to memory operand
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+	PrevStackWrap	edi,LdStk	;Tied to PrevStackElem below
+
+;*******
+EM_ENTRY eFLDreg
+eFLDreg:
+;*******
+;	edi = [CURstk]
+;	esi = pointer to st(i) from instruction field
+
+	PrevStackElem	edi,LdStk	;Point to receiving location
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is it empty?
+	jnz	FldErr
+	mov	ecx,EMSEG:[esi].ExpSgn
+	cmp	cl,bTAG_EMPTY
+	jz	FldErr
+	mov	ebx,EMSEG:[esi].lManHi
+	mov	esi,EMSEG:[esi].lManLo
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].lManHi,ebx
+	mov	EMSEG:[edi].ExpSgn,ecx
+	ret
+
+
+;This is common code that stores a value into the stack after being loaded
+;into registers by the appropriate routine.
+
+	PrevStackWrap	edi,Load	;Tied to PrevStackElem below
+
+FldCont:
+;mantissa in ebx:esi, exp/sign in ecx
+;edi = [CURstk]
+	PrevStackElem	edi,Load	;Point to receiving location
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is it empty?
+	jnz	FldErr
+	cmp	cl,bTAG_NAN		;Returning a NAN?
+	jz	FldNAN
+SaveStack:
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].lManHi,ebx
+	mov	EMSEG:[edi].ExpSgn,ecx
+	ret
+
+FldErr:
+	or	EMSEG:[SWcc],C1		;Signal overflow
+	mov	EMSEG:[CURerr],StackFlag;Kills possible denormal exception
+Unsupported:
+	call	ReturnIndefinite	;in emarith.asm
+	jz	FldExit			;Unmasked, do nothing
+	mov	EMSEG:[CURstk],edi	;Update top of stack
+FldExit:
+	ret
+
+FldNAN:
+;Is it a signaling NAN?
+	test	ebx,1 shl 30		;Check for SNAN
+	jnz	SaveStack		;If QNAN, just use it as result
+	or	EMSEG:[CURerr],Invalid	;Flag the error
+	or	ebx,1 shl 30		;Make it into a QNAN
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jnz	SaveStack		;If so, update with masked response
+	ret
+
+
+;****************
+;Load Single Real
+;****************
+
+EM_ENTRY eFLD32
+eFLD32:
+	push	offset FldCont		;Return address
+					;Fall into Load32Real
+Load32Real:
+;dseg:esi points to IEEE 32-bit real number
+;On exit:
+;	mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ecx,dseg:[esi]		;Get number
+	mov	ebx,ecx			;Save copy of mantissa
+	shl	ebx,8			;Normalize
+	shr	ecx,7			;Bring exponent down
+	and	ecx,0FFH shl 16		;Look at just exponent
+	mov	ch,dseg:[esi+3]		;Get sign again
+	jz	short ZeroOrDenorm32	;Exponent is zero
+	xor	esi,esi			;Zero out the low bits
+	or	ebx,1 shl 31		;Set implied bit
+	cmp	ecx,SexpMax shl 16
+	jge	NANorInf		;Max exp., must be NAN or Infinity
+	add	ecx,(TexpBias-SexpBias) shl 16	;Change to extended format bias
+	mov	cl,bTAG_SNGL
+	ret
+
+ZeroOrDenorm32:
+;Exponent is zero. Number is either zero or denormalized
+	xor	esi,esi			;Zero out the low bits
+	and	ebx,not (1 shl 31)	;Keep just mantissa
+	jnz	Norm32
+	mov	cl,bTAG_ZERO
+	ret
+
+Norm32:
+	add	ecx,(TexpBias-SexpBias+1-31) shl 16	;Fix up bias
+	jmp	FixDenorm
+
+
+NANorInf:
+;Shared by single and double real
+	and	ecx,bSign shl 8		;Save only sign in ch
+	or	ecx,TexpMax shl 16 + bTAG_NAN	;Max exp.
+	cmp	ebx,1 shl 31		;Only 1 bit set means infinity
+	jnz	@F
+	or	esi,esi
+	jnz	@F
+	mov	cl,bTAG_INF
+@@:
+	ret
+
+;****************
+;Load Double Real
+;****************
+
+EM_ENTRY eFLD64
+eFLD64:
+	push	offset FldCont		;Return address
+					;Fall into Load64Real
+Load64Real:
+;dseg:esi points to IEEE 64-bit real number
+;On exit:
+;	mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ecx,dseg:[esi+4]		;Get sign, exp., and high mantissa
+	mov	ebx,ecx			;Save copy of mantissa
+	shr	ecx,4			;Bring exponent down
+	and	ecx,7FFH shl 16		;Look at just exponent
+	mov	ch,dseg:[esi+7]		;Get sign again
+	mov	esi,dseg:[esi]		;Get low 32 bits of op
+	jz	short ZeroOrDenorm64	;Exponent is zero
+	shld	ebx,esi,31-20
+	shl	esi,31-20		;Normalize
+	or	ebx,1 shl 31		;Set implied bit
+	cmp	ecx,DexpMax shl 16
+	jge	NANorInf		;Max exp., must be NAN or Infinity
+	add	ecx,(TexpBias-DexpBias) shl 16	;Change to extended format bias
+SetNormTag:
+	or	esi,esi			;Any bits in low half?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz   cl                      ;if low half==0 then cl=0 else cl=1
+	ret
+
+ZeroOrDenorm64:
+;Exponent is zero. Number is either zero or denormalized
+	and	ebx,0FFFFFH		;Keep just mantissa
+	jnz	ShortNorm64		;Are top 20 bits zero?
+	or	esi,esi			;Are low 32 bits zero too?
+	jnz	LongNorm64
+	mov	cl,bTAG_ZERO
+	ret
+
+LongNorm64:
+	xchg	ebx,esi			;Shift up 32 bits
+	sub	ecx,32 shl 16		;Correct exponent
+ShortNorm64:
+	add	ecx,(TexpBias-DexpBias+12-31) shl 16	;Fix up bias
+FixDenorm:
+	or	EMSEG:[CURerr],Denormal	;Set Denormal Exception
+	bsr	edx,ebx			;Scan for MSB
+;Bit number in edx ranges from 0 to 31
+	mov	cl,dl
+	not	cl			;Convert bit number to shift count
+	shld	ebx,esi,cl
+	shl	esi,cl
+	shl	edx,16			;Move exp. adjustment to high end
+	add	ecx,edx			;Adjust exponent
+	jmp	SetNormTag
+
+
+;******************
+;Load Short Integer
+;******************
+
+EM_ENTRY eFILD16
+eFILD16:
+	push	offset FldCont		;Return address
+					;Fall into Load16Int
+Load16Int:
+;dseg:esi points to 16-bit integer
+;On exit:
+;	mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ax,dseg:[esi]
+NormInt16:
+	xor	esi,esi			;Extend with zero
+	cwd				;extend sign through dx
+	xor	ax,dx
+	sub	ax,dx			;Take ABS() of integer
+	bsr	cx,ax			;Find MSB
+	jz	ZeroInt
+;Bit number in cx ranges from 0 to 15
+	not	ecx			;Convert to shift count
+	shl	eax,cl			;Normalize
+	not	ecx
+.erre	TexpBias eq 0
+	shl	ecx,16			;Move exponent to high half
+	mov	ch,dh			;Set sign
+	mov	ebx,eax			;Mantissa to ebx
+	mov	cl,bTAG_SNGL
+	ret
+
+ZeroInt:
+	xor	ebx,ebx
+	mov	ecx,ebx
+	mov	cl,bTAG_ZERO
+	ret
+
+
+;******************
+;Load Long Integer
+;******************
+
+EM_ENTRY eFILD32
+eFILD32:
+	push	offset FldCont		;Return address
+					;Fall into Load32Int
+Load32Int:
+;dseg:esi points to 32-bit integer
+;On exit:
+;	mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;preserves edi.
+
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	eax,dseg:[esi]
+	xor	esi,esi			;Extend with zero
+	or	eax,eax			;It it zero?
+	jz	ZeroInt
+	cdq				;extend sign through edx
+	xor	eax,edx
+	sub	eax,edx			;Take ABS() of integer
+	mov	ebx,eax			;Mantissa to ebx
+;BSR uses 3 clocks/bit, so speed it up by checking the top half
+;This saves 36 clocks on 386 (42 on 486sx)
+;Cost is 13 clocks on 386 if high word isn't zero (5 on 486sx)
+.erre	TexpBias eq 0
+	xor	eax,eax			;Initialize exponent
+	cmp	ebx,0FFFFH		;Upper bits zero?
+	ja	@F
+	shl	ebx,16
+	sub	eax,16
+@@:
+	bsr	ecx,ebx			;Find MSB
+	add	eax,ecx			;Compute expoment
+	not	cl			;Convert bit number to shift count
+	shl	ebx,cl			;Normalize
+	shrd	ecx,eax,16		;Move exponent to high half of ecx
+	mov	ch,dh			;Set sign
+	mov	cl,bTAG_SNGL
+	ret
+
+
+;*****************
+;Load Quad Integer
+;*****************
+
+EM_ENTRY eFILD64
+eFILD64:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ebx,dseg:[esi+4]		;Get high 32 bits
+	mov	eax,ebx			;Make copy of sign
+	mov	esi,dseg:[esi]		;Get low 32 bits
+	mov	ecx,ebx
+	or	ecx,esi			;Is it zero?
+	jz	ZeroQuad
+NormQuadInt:
+;Entry point from eFBLD
+;eax bit 31 = sign
+;ebx:esi = integer
+;edi = [CURstk]
+.erre	TexpBias eq 0
+	mov     ax,32                   ;Initialize exponent
+	or	ebx,ebx			;Check sign
+	jz	LongNormInt
+	jns	FindBit
+	not	ebx
+	neg	esi			;CY set if non-zero
+	sbb	ebx,-1			;Add one if esi == 0
+	jnz	FindBit			;Check for high bits zero
+LongNormInt:
+	xchg	ebx,esi			;Normalize 32 bits
+	xor     ax,ax                   ;Reduce exponent by 32
+FindBit:
+;BSR uses 3 clocks/bit, so speed it up by checking the top half
+;This saves 35 clocks on 386 (41 on 486sx)
+;Cost is 11 clocks on 386 if high word isn't zero (4 on 486sx)
+	cmp	ebx,0FFFFH		;Upper bits zero?
+	ja	@F
+	shld	ebx,esi,16
+	shl	esi,16
+	sub	eax,16
+@@:
+	bsr	ecx,ebx			;Find MSB
+	add	eax,ecx			;Compute expoment
+	not	cl			;Convert bit number to shift count
+	shld	ebx,esi,cl		;Normalize
+	shl	esi,cl
+	mov     ecx,eax                 ;Move sign and exponent to ecx
+	rol     ecx,16                  ;Swap sign and exponent halves
+	or	esi,esi			;Any bits in low half?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz   cl                      ;if low half==0 then cl=0 else cl=1
+	jmp	FldCont
+
+ZeroQuad:
+	mov	cl,bTAG_ZERO
+	jmp	FldCont
+
+
+;****************
+;Load Temp Real
+;****************
+
+	PrevStackWrap	edi,Ld80	;Tied to PrevStackElem below
+
+EM_ENTRY eFLD80
+eFLD80:
+;This is not considered an "arithmetic" operation (like all the others are),
+;so SNANs do NOT cause an exception.  However, unsupported formats do.
+        mov     EMSEG:[PrevDataOff],esi	;Save operand pointer
+	PrevStackElem	edi,Ld80	;Point to receiving location
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is it empty?
+	jnz	FldErr
+LoadTempReal:
+	mov	ebx,dseg:[esi+4]	;Get high half of mantissa
+	mov	cx,dseg:[esi+8]		;Get exponent and sign
+	mov	esi,dseg:[esi]		;Get low half of mantissa
+	mov	eax,ecx	
+	and	ch,7FH			;Mask off sign bit
+	shl	ecx,16			;Move exponent to high end
+	mov	ch,ah			;Restore sign
+	jz	ZeroOrDenorm80
+;Check for unsupported format: unnormals (MSB not set)
+	or	ebx,ebx
+	jns	Unsupported
+	sub	ecx,(IexpBias-TexpBias) shl 16	;Correct the bias
+	cmp	ecx,TexpMax shl 16
+	jge	NANorInf80
+SetupTag:
+	or	esi,esi			;Any bits in low half?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz   cl                      ;if low half==0 then cl=0 else cl=1
+	jmp	SaveStack
+
+NANorInf80:
+	mov	cl,bTAG_NAN
+	cmp	ebx,1 shl 31		;Only 1 bit set means infinity
+	jnz	SaveStack
+	or	esi,esi
+	jnz	SaveStack
+	mov	cl,bTAG_INF
+	jmp	SaveStack
+
+ZeroOrDenorm80:
+;Exponent is zero. Number is either zero or denormalized
+	or	ebx,ebx
+	jnz	ShortNorm80		;Are top 32 bits zero?
+	or	esi,esi			;Are low 32 bits zero too?
+	jnz	LongNorm80
+	mov	cl,bTAG_ZERO
+	jmp	SaveStack
+
+;This code accepts and works correctly with pseudo-denormals (MSB already set)
+LongNorm80:
+	xchg	ebx,esi			;Shift up 32 bits
+	sub	ecx,32 shl 16		;Correct exponent
+ShortNorm80:
+	add	ecx,(TexpBias-IexpBias+1-31) shl 16	;Fix up bias
+	bsr	edx,ebx			;Scan for MSB
+;Bit number in edx ranges from 0 to 31
+	mov	cl,dl
+	not	cl			;Convert bit number to shift count
+	shld	ebx,esi,cl
+	shl	esi,cl
+	shl	edx,16			;Move exp. adjustment to high end
+	add	ecx,edx			;Adjust exponent
+	jmp	SetUpTag
diff --git a/private/ntos/dll/i386/emlsbcd.asm b/private/ntos/dll/i386/emlsbcd.asm
new file mode 100644
index 000000000..f07d35b1e
--- /dev/null
+++ b/private/ntos/dll/i386/emlsbcd.asm
@@ -0,0 +1,279 @@
+	subttl	emlsbcd.asm - FBSTP and FBLD instructions
+        page
+;*******************************************************************************
+;emlsbcd.asm - FBSTP and FBLD instructions
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;	FBSTP and FBLD instructions.
+;
+;	These routines convert between 64-bit integer and 18-digit packed BCD
+;	format.  They work by splitting the number being converted in half
+;	and converting the two halves separately.  This works well because
+;	9 decimal digits fit nicely within 30 binary bits, so converion of
+;	each half is strictly a 32-bit operation.
+;
+;Inputs:
+;	edi = [CURstk]
+;	dseg:esi = pointer to memory operand
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+eFBLD:
+;******
+	mov	eax,dseg:[esi+5]		;Get high 8 digits
+	or	eax,eax			;Anything there?
+	jz	HighDigitsZero
+	mov	ecx,8
+	call	ReadDigits		;Convert first 8 digits to binary
+	mov	eax,dseg:[esi+1]		;Get next 8 digits
+	xor	edi,edi
+	shld	edi,eax,4		;Shift ninth digit into edi
+	imul	ebx,10
+	add	edi,ebx			;Accumulate ninth digit
+SecondNineDigits:
+	xor	ebx,ebx			;In case eax==0
+	shl	eax,4			;Keep digits left justified
+	jz	LastTwoDigits
+	mov	ecx,7
+	call	ReadDigits		;Convert next 7 digits to binary
+LastTwoDigits:
+	mov	al,dseg:[esi]		;Get last two digits
+	shl	eax,24			;Left justify
+	mov	ecx,2
+	call	InDigitLoop		;Accumulate last two digits
+;edi = binary value of high 9 digits
+;ebx = binary value of low 9 digits
+	mov	eax,1000000000		;One billion: shift nine digits left
+	mul	edi			;Left shift 9 digits. 9 cl. if edi==0
+	add	ebx,eax			;Add in low digits
+	adc	edx,0
+BcdReadyToNorm:
+;edx:ebx = integer converted to binary
+	mov	eax,dseg:[esi+6]		;Get sign to high bit of eax
+	mov	esi,ebx
+	mov	ebx,edx
+	mov     edi,EMSEG:[CURstk]
+;mantissa in ebx:esi, sign in high bit of eax
+;edi = [CURstk]
+	jmp	NormQuadInt		;in emload.asm
+
+HighDigitsZero:
+	mov	eax,dseg:[esi+1]		;Get next 8 digits
+	or	eax,eax			;Anything there?
+	jz	CheckLastTwo
+	xor	edi,edi
+	shld	edi,eax,4		;Shift ninth digit into edi
+	jmp	SecondNineDigits
+       
+CheckLastTwo:
+	mov	bl,dseg:[esi]		;Get last two digits
+	or	bl,bl
+	jz	ZeroBCD
+	mov	al,bl
+	shr	al,4			;Bring down upper digit
+	imul	eax,10
+	and	ebx,0FH			;Keep lowest digit only
+	add	ebx,eax
+	xor	edx,edx
+	jmp	BcdReadyToNorm
+	
+ZeroBCD:
+	mov	ecx,bTAG_ZERO		;Exponent is zero
+	mov	ch,dseg:[esi+9]		;Get sign byte to ch
+	xor	ebx,ebx
+	mov	esi,ebx
+;mantissa in ebx:esi, exp/sign in ecx
+;edi = [CURstk]
+	jmp	FldCont			;in emload.asm
+
+			
+;*** ReadDigits
+;
+;Inputs:
+;	eax = packed BCD digits, left justified, non-zero
+;	ecx = no. of digits, 7 or 8
+;Outputs:
+;	ebx = number
+
+SkipZeroDigits:
+        sub     ecx,3
+        shl     eax,12
+ReadDigits:
+;We start by scanning off leading zeros.  This costs 16 cl./nybble in
+;the ScanZero loop.  To reduce this cost for many leading zeros, we
+;check for three leading zeros at a time.  Adding this test saves
+;26 cl. for 3 leading zeros, 57 cl. for 6 leading zeros, at a cost
+;of only 5 cl. if less than 3 zeros.  We choose 3 at a time so we
+;can repeat it once (there are never more than 7 zeros).
+	test    eax,0FFF00000H          ;Check first 3 nybbles for zero
+	jz      SkipZeroDigits
+	xor	ebx,ebx
+ScanZero:
+;Note that bsr is 3 cl/bit, or 12 cl/nybble.  Add in the overhead and
+;this loop of 16 cl/nybble is cheaper for the 1 - 3 digits it does.
+	dec	ecx
+	shld	ebx,eax,4		;Shift digit into ebx
+	rol	eax,4			;Left justify **Doesn't affect ZF!**
+	jz	ScanZero		;Skip to next digit if zero
+	jecxz	ReadDigitsX
+InDigitLoop:
+;eax = digits to convert, left justified
+;ebx = result accumulation
+;ecx = number of digits to convert
+	xor	edx,edx
+	shld	edx,eax,4		;Shift digit into edx
+	shl	eax,4			;Keep digits left justified
+	imul	ebx,10			;Only 10 clocks on 386!
+	add	ebx,edx			;Accumulate number
+	dec	ecx
+	jnz	InDigitLoop
+ReadDigitsX:
+	ret
+		
+;*******************************************************************************
+
+ChkInvalidBCD:
+	ja	SetInvalidBCD
+	cmp	edi,0A7640000H		;(1000000000*1000000000) and 0ffffffffh
+	jb	ValidBCD
+SetInvalidBCD:
+	mov	EMSEG:[CURerr],Invalid
+InvalidBCD:
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	ReadDigitsX		;No--leave memory unchanged
+;Store Indefinite
+	mov	dword ptr dseg:[esi],0
+	mov	dword ptr dseg:[esi+4],0
+	mov	word ptr dseg:[esi+8],-1	;0FF00000000H for packed BCD indefinite
+	jmp	PopStack		;in emstore.asm
+
+;******
+eFBSTP:
+;******
+	call	RoundToInteger		;Get integer in ebx:edi, sign in ch
+	jc	InvalidBCD
+	cmp	ebx,0DE0B6B3H		;(1000000000*1000000000) shr 32
+	jae	ChkInvalidBCD
+ValidBCD:
+	and	ch,bSign
+	mov	dseg:[esi+9],ch		;Fill in sign byte
+	mov	edx,ebx
+	mov	eax,edi			;Get number to edx:eax for division
+	mov	ebx,1000000000
+	div	ebx			;Break into two 9-digit halves
+	xor	ecx,ecx			;Initial digits
+	mov	edi,eax			;Save quotient
+	mov	eax,edx
+	or	eax,eax
+	jz	SaveLowBCD
+	call	WriteDigits
+	shrd	ecx,eax,4		;Pack 8th digit
+	xor	al,al
+	shl	eax,20			;Move digit in ah to high end
+SaveLowBCD:
+	mov	dseg:[esi],ecx		;Save low 8 digits
+	mov	ecx,eax			;Get ready for next 8 digits
+	mov	eax,edi
+	or	eax,eax
+	jz	ZeroHighBCD
+	call	WriteDigits
+	shl	ah,4			;Move digit to upper nybble
+	or	al,ah			;Combine last two digits
+SaveHighBCD:
+	mov	dseg:[esi+4],ecx		;Save lower 8 digits
+	mov	dseg:[esi+8],al
+	jmp	PopStack
+
+ZeroHighBCD:
+	shr	ecx,28			;Position 9th digit
+	jmp	SaveHighBCD
+
+
+;*** WriteDigits
+;
+;Inputs:
+;	eax = binary number < 1,000,000,000 and > 0
+;	ecx = Zero or had one BCD digit left justified
+;Purpose:
+;	Convert binary integer to BCD.
+;
+;	The time required for the DIV instruction is dependent on operand
+;	size, at 6 + (no. of bits) clocks for 386.  (In contrast, multiply
+;	by 10 as used in FBLD/ReadDigits above takes the same amount of
+;	time regardless of operand size--only 10 clocks.)
+;
+;	The easy way to do this conversion would be to repeatedly do a
+;	32-bit division by 10 (at 38 clocks/divide).  Instead, the number
+;	is broken down so that mostly 8-bit division is used (only 14 clocks).
+;	AAM (17 clocks) is also used to save us from having to load the 
+;	constant 10 and zero ah.  AAM is faster than DIV on the 486sx.
+;
+;Outputs:
+;	ecx has seven more digits packed into it (from left)
+;	ah:al = most significant two digits (unpacked)
+;esi,edi preserved
+
+WriteDigits:
+;eax = binary number < 1,000,000,000
+	cdq				;Zero edx
+	mov	ebx,10000
+	div	ebx			;Break into 4-digit and 5-digit pieces
+	mov	bl,100
+	or	edx,edx
+	jz	ZeroLowDigits
+	xchg	edx,eax			;Get 4-digit remainder to eax
+;Compute low 4 digits
+; 0 < eax < 10000
+	div	bl			;Get two 2-digit pieces. 14cl on 386
+	mov	bh,al			;Save high 2 digits
+	mov	al,ah			;Get low digits
+	aam
+	shl	ah,4			;Move digit to upper nybble
+	or	al,ah
+	shrd	ecx,eax,8
+	mov	al,bh			;Get high 2 digits
+	aam
+	shl	ah,4			;Move digit to upper nybble
+	or	al,ah
+	shrd	ecx,eax,8
+;Compute high 5 digits
+	mov	eax,edx			;5-digit quotient to eax
+	or	eax,eax
+	jz	ZeroHighDigits
+ConvHigh5:
+	cdq				;Zero edx
+	shld	edx,eax,16		;Put quotient in dx:ax
+	xor	bh,bh			;bx = 100
+	div	bx			;Get 2- and 3-digit pieces. 22cl on 386
+	xchg	edx,eax			;Save high 3 digits, get log 2 digits
+	aam
+	shl	ah,4			;Move digit to upper nybble
+	or	al,ah
+	shrd	ecx,eax,8
+	mov	eax,edx			;Get high 3 digits
+	mov	bl,10
+	div	bl
+	mov	bl,ah			;Remainder is next digit
+	shrd	ecx,ebx,4
+	aam				;Get last two digits
+;Last two digits in ah:al
+	ret
+
+ZeroLowDigits:
+	shr	ecx,16
+	jmp	ConvHigh5
+
+ZeroHighDigits:
+	shr	ecx,12
+	ret
diff --git a/private/ntos/dll/i386/emlsenv.asm b/private/ntos/dll/i386/emlsenv.asm
new file mode 100644
index 000000000..a3b725d9d
--- /dev/null
+++ b/private/ntos/dll/i386/emlsenv.asm
@@ -0,0 +1,457 @@
+	subttl	emlsenv.asm - Emulator Save/Restore
+	page
+;***
+;emlsenv.asm - Emulator Save/Restore
+;
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;
+;	 All Rights Reserved
+;
+;Purpose:
+;	FLDCW, FSTCW, FSTSW, FSTENV, FLDENV, FSAVE, FRSTOR instructions
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;
+;*******************************************************************************
+
+
+;When setting the control word, the [RoundMode] vector must be set
+;according to the rounding and precision modes.
+
+tRoundMode	label	dword
+	irp	RC,<near,down,up,chop>
+	irp	PC,<24,24,53,64>
+	dd	Round&&PC&&RC
+	endm
+	endm
+
+
+EM_ENTRY eFLDCW
+eFLDCW:
+;Uses only eax and ebx
+	mov	ax, dseg:[esi]		; Fetch control word from user memory
+SetControlWord:
+	and	ax,0F3FH		; Limit to valid values
+	mov	EMSEG:[ControlWord], ax	; Store in the emulated control word
+	not	al			;Flip mask bits for fast compare
+        and     al,3FH                  ;Limit to valid mask bits
+	mov	EMSEG:[ErrMask],al
+	and	eax,(RoundControl + PrecisionControl) shl 8
+.erre	RoundControl eq 1100B
+.erre	PrecisionControl eq 0011B
+	shr	eax,6			;Put PC and RC in bits 2-5
+	mov	ebx,tRoundMode[eax]	;Get correct RoundMode vector
+	mov	EMSEG:[RoundMode],ebx
+	mov	EMSEG:[SavedRoundMode],ebx
+	and	eax,RoundControl shl (8-6)	;Mask off precision control
+	mov	ebx,tRoundMode[eax+PC64 shl (8-6)];Get correct RoundMode vector
+	mov	EMSEG:[TransRound],ebx	;Round mode w/o precision
+	ret
+
+
+EM_ENTRY eFSTCW
+eFSTCW:
+;Uses only eax 
+	mov	ax, EMSEG:[ControlWord]	; Fetch user control word
+	mov	dseg:[esi], ax		; Store into user memory
+	ret
+
+
+EM_ENTRY eFSTSW
+eFSTSW:
+;Uses only eax and ebx
+	call	GetStatusWord		; Fetch emulated Status word
+	mov	dseg:[esi], ax		; Store into user memory
+	ret
+
+
+eFSTSWax:
+;Uses only eax and ebx
+	call	GetStatusWord		; Fetch emulated Status word
+	mov	[esp+4].regAX,ax
+	ret
+
+
+EM_ENTRY eFDECSTP
+eFDECSTP:
+;edi = [CURstk]
+	cmp	edi,BEGstk
+	jbe	DecWrap
+	sub	EMSEG:[CURstk],Reg87Len
+	ret
+
+DecWrap:
+	mov	EMSEG:[CURstk],INITstk
+	ret
+
+
+EM_ENTRY eFINCSTP
+eFINCSTP:
+;edi = [CURstk]
+	cmp	edi,INITstk
+	jae	IncWrap
+	add	EMSEG:[CURstk],Reg87Len
+	ret
+
+IncWrap:
+	mov	EMSEG:[CURstk],BEGstk
+	ret
+
+
+eFCLEX:
+	mov	EMSEG:[SWerr],0
+	and	[esp+4].OldLongStatus,0FFFF00FFH		; clear saved SWerr
+	ret
+
+
+;*** eFSTENV - emulate FSTENV	[address]
+;
+;   ARGUMENTS
+;	    dseg:esi  = where to store environment
+;
+;
+;   DESCRIPTION
+;	    This routine emulates an 80387 FSTENV (store environment)
+;
+
+EM_ENTRY eFSTENV
+eFSTENV:
+	mov	ax,[esp+4].OldStatus
+	mov	EMSEG:[StatusWord],ax
+SaveEnv:
+	xor	ax,ax
+	mov	dseg:[esi.reserved1],ax
+	mov	dseg:[esi.reserved2],ax
+	mov	dseg:[esi.reserved3],ax
+	mov	dseg:[esi.reserved4],ax
+	mov	dseg:[esi.reserved5],ax
+	mov	ax,EMSEG:[ControlWord]
+	mov	dseg:[esi.E32_ControlWord],ax
+	call	GetEMSEGStatusWord
+	mov	dseg:[esi.E32_StatusWord],ax
+	call	GetTagWord
+	mov	dseg:[esi.E32_TagWord],ax
+	mov	ax,cs
+	mov	dseg:[esi.E32_CodeSeg],ax
+	mov	ax,ss
+	mov	dseg:[esi.E32_DataSeg],ax
+	mov	eax,EMSEG:[PrevCodeOff]
+	mov	dseg:[esi.E32_CodeOff],eax
+	mov	eax,EMSEG:[PrevDataOff]
+	mov	dseg:[esi.E32_DataOff],eax
+        mov     EMSEG:[CWmask],03FH        ;Set all mask bits
+	mov	EMSEG:[ErrMask],0
+	ret
+
+
+;*** eFSAVE - emulate FSAVE   [address]
+;
+;   ARGUMENTS
+;	    dseg:esi  = where to store environment
+;
+;
+;   DESCRIPTION
+;	    This routine emulates an 80387 FSAVE (store environment)
+;	    Once the data is stored an finit is executed.
+;
+;   REGISTERS
+;	destroys ALL.
+
+EM_ENTRY eFSAVE
+eFSAVE:
+	mov	ax,[esp+4].OldStatus
+	mov	EMSEG:[StatusWord],ax
+        mov     eax,[esp+4].OldCodeOff
+        mov     EMSEG:[PrevCodeOff],eax
+	push	offset eFINIT		; After fsave we must do a finit
+SaveState:				; Enter here for debugger save state
+	call	SaveEnv
+	add	esi,size Env80x87_32	;Skip over environment
+	mov	ebp,NumLev		;Save entire stack
+	mov	edi,EMSEG:[CURstk]
+FsaveStoreLoop:
+	mov	eax,EMSEG:[edi].ExpSgn
+	call	StoreTempReal		;in emstore.asm
+        add     esi,10
+
+        mov     edi,EMSEG:[CURstk]
+        NextStackElem   edi,FSave
+        mov     EMSEG:[CURstk],edi
+
+        dec     ebp
+	jnz	FsaveStoreLoop
+	ret
+
+WrapFSave:                              ; tied to NextStackElem above
+        mov     edi, BEGstk
+        mov     EMSEG:[CURstk],edi
+        dec     ebp
+        jnz     FsaveStoreLoop
+        ret
+
+
+;*** eFRSTOR - emulate FRSTOR  [address]
+;
+;   ARGUMENTS
+;	    dseg:esi  = where to get the environment
+;
+;   DESCRIPTION
+;	    This routine emulates an 80387 FRSTOR (restore state)
+
+	NextStackWrap	edi,Frstor
+
+EM_ENTRY eFRSTOR
+eFRSTOR:
+;First we set up the status word so that [CURstk] is initialized.
+;The floating-point registers are stored in logical ST(0) - ST(7) order,
+;not physical register order.  We don't do a full load of the environment
+;because we're not ready to use the tag word yet.
+
+    and		[esp+4].[OldLongStatus], NOT(LongSavedFlags)	;clear saved codes, errs
+	mov	ax, dseg:[esi.E32_StatusWord]
+	call	SetEmStatusWord		;Initialize [CURstk]
+	add	esi,size Env80x87_32	;Skip over environment
+
+;Load of temp real has one difference from real math chip: it is an invalid
+;operation to load an unsupported format.  By ensuring the exception is
+;masked, we will convert unsupported format to Indefinite.  Note that the
+;mask and [CURerr] will be completely restored by the FLDENV at the end.
+
+        mov     EMSEG:[CWmask],3FH              ;Mask off invalid operation exception
+	mov	edi,EMSEG:[CURstk]
+	mov	ebp,NumLev
+FrstorLoadLoop:
+	push	esi
+	call	LoadTempReal		;In emload.asm
+	pop	esi
+	add	esi,10		;Point to next temp real
+	NextStackElem	edi,Frstor
+	dec	ebp
+	jnz	FrstorLoadLoop
+	sub	esi,NumLev*10+size Env80x87_32	;Point to start of env.
+        jmp     eFLDENV                 ;Fall into eFLDENV
+
+
+;***	eFLDENV - emulate FLDENV   [address]
+;
+;	ARGUMENTS
+;	       dseg:si	= where to store environment
+;
+;	       This routine emulates an 80387 FLDENV (load environment)
+
+EM_ENTRY eFLDENV
+eFLDENV:
+    and		[esp+4].[OldLongStatus], NOT(LongSavedFlags)	;clear saved codes, errs
+	mov		ax, dseg:[esi.E32_StatusWord]
+	call	SetEmStatusWord			; set up status word
+	mov		ax, dseg:[esi.E32_ControlWord]
+	call	SetControlWord
+	mov		ax, dseg:[esi.E32_TagWord]
+	call	UseTagWord
+	mov		eax, dseg:[esi.E32_CodeOff]
+	mov     EMSEG:[PrevCodeOff], eax
+	mov		eax, dseg:[esi.E32_DataOff]
+	mov     EMSEG:[PrevDataOff], eax
+	ret
+
+
+;***	GetTagWord - figures out what the tag word is from the numeric stack
+;		   and returns the value of the tag word in ax.
+;
+
+GetTagWord:
+	push	esi
+	xor	eax, eax
+	mov	ecx, NumLev		; get tags for regs. 0, 7 - 1
+	mov	esi,INITstk
+GetTagLoop:
+	mov	bh, EMSEG:[esi.bTag]	; The top 2 bits of Tag are the X87 tag bits.
+	shld	ax, bx, 2
+	sub	esi, Reg87Len
+	loop	GetTagLoop
+	rol	ax, 2			; This moves Tag(0) into the low 2 bits
+	pop	esi
+	ret
+
+
+;***	UseTagWord - Set up tags using tag word from environment
+;
+;	ARGUMENTS
+;	       ax - should contain the tag word
+;
+;	Destroys ax,bx,cx,dx,di
+
+UseTagWord:
+	ror	ax, 2			; mov Tag(0) into top bits of ax
+	mov	edi,INITstk
+	mov	ecx, NumLev
+UseTagLoop:
+	mov	dl,bTAG_EMPTY
+	cmp	ah, 0c0h		;Is register to be tagged Empty?
+	jae	SetTag			;Yes, go mark it
+	mov	dl,EMSEG:[edi].bTag	;Get current tag
+	cmp	dl,bTAG_EMPTY		;Is register currently Empty?
+	je	SetTagNotEmpty		;If so, go figure out tag for it
+SetTag:
+	mov	EMSEG:[edi].bTag,dl
+UseTagLoopCheck:
+	sub	edi, Reg87Len
+	shl	eax, 2
+	loop	UseTagLoop
+	ret
+
+SetTagEmpty:
+	mov	EMSEG:[edi.bTag], bTAG_EMPTY
+	jmp	UseTagLoopCheck
+
+SetTagNotEmpty:
+;Register is currently tagged empty, but new tag word says it is not empty.
+;Figure out a new tag for it.  The rules are:
+;
+;1. Everything is either normalized or zero--unnormalized formats cannot
+;get in.  So if the high half mantissa is zero, the number is zero.
+;
+;2. Although the exponent bias is different, NANs and Infinities are in
+;standard IEEE format - exponent is TexpMax, mantissa indicates NAN vs.
+;infinity (mantissa for infinity is 800..000H).
+;
+;3. Denormals have an exponent less than TexpMin.
+;
+;4. If the low half of the mantissa is zero, it is tagged bTAG_SNGL
+;
+;5. Everything else is bTAG_VALID
+
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	dl,bTAG_ZERO		;Try zero first
+	or	ebx,ebx			;Is mantissa zero?
+	jz	SetTag
+	mov	edx,EMSEG:[edi].ExpSgn
+	mov	dl,bTAG_DEN
+	cmp	edx,TexpMin shl 16	;Is it denormal?
+	jl	SetTag
+	cmp	EMSEG:[edi].lManLo,0	;Is low half zero?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz	dl			;if low half==0 then dl=0 else dl=1
+	cmp	edx,TexpMax shl 16	;Is it NAN or Infinity?
+	jl	SetTag			;If not, it's valid
+.erre	(bTAG_VALID - bTAG_SNGL) shl TAG_SHIFT eq (bTAG_NAN - bTAG_INF)
+	shl	dl,TAG_SHIFT
+	add	dl,bTAG_INF - bTAG_SNGL
+;If the low bits were zero we have just changed bTAG_SNGL to bTAG_INF
+;If the low bits weren't zero, we changed bTAG_VALID to bTAG_NAN
+;See if infinity is really possible: is high half 80..00H?
+	cmp	ebx,1 shl 31		;Is it infinity?
+	jz	SetTag			;Store tag for infinity or NAN
+	mov	dl,bTAG_NAN
+	jmp	SetTag
+
+
+;***	GetStatusWord -
+;
+; User status word returned in ax.
+; Destroys ebx only.
+
+GetStatusWord:
+	mov	eax, EMSEG:[CURstk]
+	sub	eax, BEGstk
+	mov	bl,Reg87Len
+	div	bl
+        inc     eax                     ; adjust for emulator's stack layout
+	and	eax, 7			; eax is now the stack number
+	shl	ax, 11
+	or	ax,[esp+8].OldStatus	; or in the rest of the status word.
+	ret
+
+
+;***	GetEMSEGStatusWord -
+;
+; User status word returned in ax.
+; Destroys ebx only.
+; Uses status word in per-thread data area, otherwise
+;   identical to GetStatusWord
+
+EM_ENTRY eGetStatusWord
+GetEMSEGStatusWord:
+	mov	eax, EMSEG:[CURstk]
+	sub	eax, BEGstk
+	mov	bl,Reg87Len
+	div	bl
+        inc     eax                     ; adjust for emulator's stack layout
+	and	eax, 7			; eax is now the stack number
+	shl	ax, 11
+	or	ax, EMSEG:[StatusWord]	; or in the rest of the status word.
+	ret
+
+
+;***	SetEmStatusWord -
+;
+; Given user status word in ax, set into emulator.
+; Destroys ebx only.
+
+
+SetEmStatusWord:
+	and	ax,7F7FH
+	mov	bx,ax
+        and     bx,3FH                  ; set up CURerr in case user
+	mov	EMSEG:[CURerr],bl	; wants to force an exception
+	mov	ebx, eax
+	and	ebx, not (7 shl 11)	; remove stack field.
+	mov	EMSEG:[StatusWord], bx
+
+	sub	ah, 8  			; adjust for emulator's stack layout
+	and	ah, 7 shl 3
+	mov	al, ah
+	shr	ah, 1
+	add	al, ah			; stack field * 3 * 4
+.erre	Reg87Len eq 12	
+	and	eax, 255	   	; eax is now 12*stack number
+        add     eax, BEGstk
+	mov	EMSEG:[CURstk], eax
+	ret
+
+
+public _SaveEm87Context
+_SaveEm87Context PROC
+
+	push	ebp
+	mov	ebp, esp
+	push	ebx
+	push	edi
+	push	esi
+	mov	esi, [ebp+8]
+	call	SaveState
+	test	EMSEG:[CURErr], Summary
+	jne	RetSaveEmIdle
+	mov	eax, Em87Busy
+	jmp	RetSaveEm
+RetSaveEmIdle:
+	mov	eax, Em87Idle
+RetSaveEm:
+	pop	esi
+	pop	edi
+	pop	ebx
+	pop	ebp
+	ret
+_SaveEm87Context ENDP
+
+
+public _RestoreEm87Context
+_RestoreEm87Context PROC
+	push	ebp
+	mov	ebp, esp
+	push	ebx
+	push	edi
+	push	esi
+	mov	esi, [ebp+8]
+	call	eFRSTOR
+	pop	esi
+	pop	edi
+	pop	ebx
+	pop	ebp
+	ret
+_RestoreEm87Context  ENDP
diff --git a/private/ntos/dll/i386/emround.asm b/private/ntos/dll/i386/emround.asm
new file mode 100644
index 000000000..34704c5bd
--- /dev/null
+++ b/private/ntos/dll/i386/emround.asm
@@ -0,0 +1,712 @@
+        subttl  emround.asm - Rounding and Precision Control and FRNDINT
+        page
+;*******************************************************************************
+;emround.asm - Rounding and Precision Control
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       Rounding and precision control.  The correct routine is jumped
+;	to through the [RoundMode] vector.
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;	02/28/92  JWM   Minor bug fix in NotNearLow
+;
+;*******************************************************************************
+
+
+RndIntSpcl:
+	cmp	cl,bTAG_INF
+	jz	RndIntX			;Leave infinity unchanged
+	cmp	cl,bTAG_DEN
+	jnz	SpclDestNotDen		;Handle NAN & empty - in emarith.asm
+;Handle denormal
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is it masked?
+	jnz	NormRndInt		;If so, ignore denormalization
+RndIntX:
+	ret
+
+;********
+EM_ENTRY eFRNDINT
+eFRNDINT:
+;********
+;edi points to top of stack
+	mov	ecx,EMSEG:[edi].ExpSgn
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+	jz	RndIntX	
+	ja	RndIntSpcl
+	cmp	ecx,63 shl 16		;Is it already integer?
+	jge	RndIntX
+NormRndInt:
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	esi,EMSEG:[edi].lManLo
+	mov	EMSEG:[Result],edi	;Save result pointer
+	xor	eax,eax			;Extend mantissa
+	push	offset SaveResult
+	jmp	RoundToBit
+
+;*******************************************************************************
+
+ResultOverflow:
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl.
+;We were all ready to save the rounded result, but the exponent turned out
+;to be too large.
+	or	EMSEG:[CURerr],Overflow
+	sub	ecx,UnderBias shl 16	;Unmasked response
+	test	EMSEG:[CWmask],Overflow	;Is exception unmasked?
+	jz	SaveResult		;If so, we're ready
+;Produce masked overflow response
+	mov	ebx,1 shl 31		;Assume infinity
+	xor	esi,esi
+	mov	cl,bTAG_INF
+	mov	al,EMSEG:[CWcntl]	;Get rounding control
+	mov	ah,al
+	and	ah,RCchop			;Rounding control only
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre	RCchop eq RCup + RCdown		;Always return max value
+.erre	RCnear eq 0			;Never return max value
+	sar	ch,7			;Expand sign through whole byte
+.erre	(RCdown and bSign) eq 0		;Don't want to change real sign
+	xor	ch,RCdown		;Flip sign for RCdown bit
+	and	ah,ch			;RCup & sign  OR  RCdown & not sign
+	jnz	SaveMax
+	and	ecx,0FFFFH
+	or	ecx,TexpMax shl 16
+	jmp	SaveResult		;Save Infinity
+SaveMax:
+;Get max value for current precision
+	mov	ebx,0FFFFFF00H		;Max value for 24 bits
+	and	ecx,bSign shl 8		;Preserve only sign
+	or	ecx,(IexpMax-IexpBias-1) shl 16 + bTAG_VALID ;Set up max value
+	and	al,PrecisionControl
+.erre	PC24 eq 0
+	jz	SaveResult		;Save 24-bit max value
+	dec	esi			;esi == -1
+	mov	ebx,esi
+	cmp	al,PC53
+	jnz	SaveResult		;Save 64-bit max value
+	mov	esi,0FFFFF800H
+	jmp	SaveResult		;Save 53-bit max value
+
+;*******************************************************************************
+;
+;64-bit rounding routines
+;
+
+;***********
+Round64down:
+;***********
+	cmp	ecx,(IexpMin-IexpBias+1) shl 16	;Test for Underflow
+	jl	RndDenorm64
+	or	eax,eax			;Exact result?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+	test	ch,bSign
+	jz	SaveValidResult		;Positive, so chop
+	jmp	RoundUp64		;Round up if negative
+
+RndDenorm64:
+	test	EMSEG:[CWmask],Underflow ;Is exception unmasked?
+	jz	RndSetUnder
+Denormalize:
+;We don't really store in denormalized format, but we need the number 
+;to be rounded as if we do.  If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to 
+;keep look like integer bits, and performing round-to-integer.
+	add	ecx,(IexpBias+62) shl 16 ;Adjust exponent so we're integer
+	call	RoundToBit
+;Set underflow exception if precision exception is set
+	mov	al,EMSEG:[CURerr]
+	and	al,Precision
+	ror	al,Precision-Underflow	;Move Precision bit to Underflow pos.
+	or	EMSEG:[CURerr],al	;Signal Underflow if inexact
+	cmp	cl,bTAG_ZERO
+	jz	SaveResult
+	sub	ecx,(IexpBias+62) shl 16;Restore unbiased exponent
+	cmp	ecx,TexpMin shl 16	;Did we round out of denorm?
+	jae	SaveResult
+	mov	cl,bTAG_DEN
+	jmp	SaveResult
+
+RndSetUnder:
+;Underflow exception not masked.  Adjust exponent and try again.
+	or	EMSEG:[CURerr],Underflow
+	add	ecx,UnderBias shl 16
+	jmp	EMSEG:[RoundMode]	;Try again with revised exponent
+
+;***********
+Round64near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm64
+	or	eax,eax			;Exact result?
+	jz	short SaveValidResult
+	or	EMSEG:[CURerr],Precision ;Set flag on inexact result
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.  This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.  
+
+	bt	esi,0			;Is mantissa even or odd? (set CY)
+	adc	eax,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
+	jnc	SaveValidResult
+RoundUp64:
+	mov	EMSEG:[SWcc],RoundUp
+	add	esi,1
+	adc	ebx,0
+	jc	BumpExponent		;Overflowed, increment exponent
+
+SaveValidResult:			;A jump to here requires 9 clocks
+	or	esi,esi			;Any bits in low half?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz   cl                      ;if low half==0 then cl=0 else cl=1
+	cmp	ecx,TexpMax shl 16	;Test for overflow
+	jge	ResultOverflow
+
+SaveResult:				;A jump to here requires 10 clocks
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+	mov	edi,EMSEG:[Result]
+SaveResultEdi:
+	mov	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].lManHi,ebx
+SaveExpSgn:
+	mov	EMSEG:[edi].ExpSgn,ecx
+	ret
+
+;***********
+Round64up:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm64
+	or	eax,eax			;Exact result?
+	jz	short SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+	cmp	ch,bSign		;No CY iff sign bit is set
+	jc	RoundUp64		;Round up if positive
+	jmp	short SaveValidResult
+
+;***********
+Round64chop:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm64
+	or	eax,eax			;Exact result?
+	jz	short SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	jmp	short SaveValidResult
+
+;*******************************************************************************
+;
+;53-bit rounding routines
+;
+
+;***********
+Round53down:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm53
+	mov	edx,esi			;Get low bits
+	and	edx,(1 shl 11) - 1	;Mask to last 11 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+	and	esi,not ((1 shl 11)-1)	;Mask off low 11 bits
+	test	ch,bSign
+	jz	SaveValidResult		;Positive, go chop
+	jmp	RoundUp53
+
+RndDenorm53:
+	test	EMSEG:[CWmask],Underflow;Is exception unmasked?
+	jz	RndSetUnder
+;We don't really store in denormalized format, but we need the number 
+;to be rounded as if we do.  If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to 
+;keep look like integer bits, and performing round-to-integer.
+	add	ecx,(IexpBias+51) shl 16 ;Adjust exponent so we're integer
+	call	RoundToBit
+;Set underflow exception if precision exception is set
+	mov	al,EMSEG:[CURerr]
+	and	al,Precision
+	ror	al,Precision-Underflow	;Move Precision bit to Underflow pos.
+	or	EMSEG:[CURerr],al	;Signal Underflow if inexact
+	cmp	cl,bTAG_ZERO
+	jz	SaveResult
+	sub	ecx,(IexpBias+51) shl 16;Restore unbiased exponent
+	cmp	ecx,(IexpMin-IexpBias+1) shl 16	;Did we round out of denorm?
+	jae	SaveResult
+	mov	cl,bTAG_DEN
+	jmp	SaveResult
+
+;***********
+Round53near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm53
+	mov	edx,esi			;Get low bits
+	and	edx,(1 shl 11) - 1	;Mask to last 11 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+	mov	edx,esi
+	and	esi,not ((1 shl 11)-1)	;Mask off low 11 bits
+	test	edx,1 shl 10		;Is round bit set?
+	jz	SaveValidResult
+	and	edx,(3 shl 10)-1	;Keep only sticky bits and LSB
+	or	eax,edx			;Combine with other sticky bits
+	jz	SaveValidResult
+RoundUp53:
+	mov	EMSEG:[SWcc],RoundUp
+	add	esi,1 shl 11		;Round
+	adc	ebx,0
+	jnc	SaveValidResult
+BumpExponent:
+	add	ecx,1 shl 16		;Mantissa overflowed, bump exponent
+	or	ebx,1 shl 31		;Set MSB
+	jmp	SaveValidResult
+
+;***********
+Round53up:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm53
+	mov	edx,esi			;Get low bits
+	and	edx,(1 shl 11) - 1	;Mask to last 11 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+	and	esi,not ((1 shl 11)-1)	;Mask off low 11 bits
+	test	ch,bSign
+	jz	RoundUp53		;Round up if positive
+	jmp	SaveValidResult
+
+;***********
+Round53chop:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm53
+	mov	edx,esi			;Get low bits
+	and	edx,(1 shl 11) - 1	;Mask to last 11 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	and	esi,not ((1 shl 11)-1)	;Mask off low 11 bits
+	jmp	SaveValidResult
+
+;*******************************************************************************
+;
+;24-bit rounding routines
+;
+
+;***********
+Round24down:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm24
+	or	eax,esi			;Low dword is just sticky bits
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if positive, increase mantissa if negative
+	xor	esi,esi
+	and	ebx,not ((1 shl 8)-1)	;Mask off low 8 bits
+	test	ch,bSign
+	jz	SaveValidResult		;Chop if positive
+	jmp	RoundUp24
+
+RndDenorm24:
+	test	EMSEG:[CWmask],Underflow;Is exception unmasked?
+	jz	RndSetUnder
+;We don't really store in denormalized format, but we need the number 
+;to be rounded as if we do.  If the exponent were -IexpBias, we would
+;lose 1 bit of precision; as it gets more negative, we lose more bits.
+;We'll do this by adjusting the exponent so that the bits we want to 
+;keep look like integer bits, and performing round-to-integer.
+	add	ecx,(IexpBias+22) shl 16 ;Adjust exponent so we're integer
+	call	RoundToBit
+;Set underflow exception if precision exception is set
+	mov	al,EMSEG:[CURerr]
+	and	al,Precision
+	ror	al,Precision-Underflow	;Move Precision bit to Underflow pos.
+	or	EMSEG:[CURerr],al	;Signal Underflow if inexact
+	cmp	cl,bTAG_ZERO
+	jz	SaveResult
+	sub	ecx,(IexpBias+22) shl 16;Restore unbiased exponent
+	cmp	ecx,(IexpMin-IexpBias+1) shl 16	;Did we round out of denorm?
+	jae	SaveResult
+	mov	cl,bTAG_DEN
+	jmp	SaveResult
+
+;***********
+Round24near:
+;***********
+;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm24
+	or	eax,esi			;Low dword is just sticky bits
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	xor	esi,esi
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.  
+
+	mov	edx,ebx
+	and	ebx,not ((1 shl 8)-1)	;Mask off low 8 bits
+	test	dl,1 shl 7		;Round bit set?
+	jz	SaveValidResult
+	and	edx,(3 shl 7)-1		;Mask to LSB and sticky bits
+	or	eax,edx			;Combine all sticky bits
+	jz	SaveValidResult
+RoundUp24:
+	mov	EMSEG:[SWcc],RoundUp
+	add	ebx,1 shl 8
+	jnc	SaveValidResult
+	jmp	BumpExponent		;Overflowed, increment exponent
+
+;***********
+Round24up:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm24
+	or	eax,esi			;Low dword is just sticky bits
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+;Chop if negative, increase mantissa if positive
+	xor	esi,esi
+	and	ebx,not ((1 shl 8)-1)	;Mask off low 8 bits
+	test	ch,bSign
+	jz	RoundUp24		;Round up if positive
+	jmp	SaveValidResult
+
+;***********
+Round24chop:
+;***********
+	cmp	ecx,TexpMin shl 16	;Test for Underflow
+	jl	RndDenorm24
+	or	eax,esi			;Low dword is just sticky bits
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,eax			;Throwing away any bits?
+	jz	SaveValidResult
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	xor	esi,esi
+	and	ebx,not ((1 shl 8)-1)	;Mask off low 8 bits
+	jmp	SaveValidResult
+
+;*******************************************************************************
+
+;*** RoundToInteger
+;
+;This routine is used by FISTP Int64 and BSTP.  Unlike RoundToBit, this
+;unnormalizes the number into a 64-bit integer.
+;
+;Inputs:
+;	edi = pointer to number to round in stack
+;Outputs:
+;	CY set if invalid operation
+;	ebx:edi = rounded integer if CY clear
+;	ch = sign if CY clear
+;Note:
+;	FIST/FISTP/BSTP exception rules are used:  If the number is too big,
+;	Invalid Operation occurs.  Denormals are ignored.
+;
+;esi preserved
+
+RoundSpcl64Int:
+	cmp	cl,bTAG_DEN
+	jz	NormRound64Int		;Ignore denormal
+	cmp	cl,bTAG_EMPTY
+	jnz	RoundInvalid		;All other specials are invalid
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	stc				;Flag exception to caller
+	ret
+
+RoundInvalid:
+;Overflow on integer store is invalid according to IEEE
+	mov	EMSEG:[CURerr],Invalid
+	stc				;Flag exception to caller
+	ret
+
+RoundToInteger:
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+	mov	al,ch			;Save sign bit
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+	jz	RoundIntX		;Just return zero
+	ja	RoundSpcl64Int
+NormRound64Int:
+	xor	edx,edx
+	sar	ecx,16			;Bring exponent down
+	cmp	ecx,-1			;Is it less than 1?
+	jle	Under64Int
+	cmp	ecx,63
+	jg	RoundInvalid
+	sub	ecx,63
+	neg	ecx			;cl = amount to shift right
+	mov	ch,al			;Get sign out of al
+	xor	eax,eax
+	cmp	cl,32			;Too big for one shift?
+	jl	ShortShft64
+;32-bit shift right
+	xchg	edx,edi
+	xchg	ebx,edi			;ebx=0 now
+	shrd	eax,edx,cl
+;Max total shift is 63 bits, so we know that the LSB of eax is still zero.
+;We can rotate this zero to the MSB so the sticky bits in eax can be combined
+;with those in edx without affecting the rounding bit in the MSB of edx.
+	ror	eax,1			;MSB is now zero
+ShortShft64:
+;Shift count in cl is modulo-32
+	shrd	edx,edi,cl
+	shrd	edi,ebx,cl
+	shr	ebx,cl
+	or	edx,eax			;Collapse sticky bits into one dword
+	jz	RoundIntX		;No sticky or round bits, so don't round
+;Result will not be exact--check rounding mode
+Round64Int:
+	mov	EMSEG:[CURerr],Precision;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearest64Int		;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+	bt	edi,0			;Look at LSB (for round even)
+	adc	edx,(1 shl 31)-1	;CY set if round up
+	jnc	RoundIntX
+	mov	EMSEG:[SWcc],RoundUp
+	add	edi,1			;Round
+	adc	ebx,0
+	jc	RoundInvalid
+RoundIntX:
+	ret				;CY clear, no Invalid exception
+
+Shift64Round:
+	or	edi,edi
+	setnz	dl			;Set sticky bit in edx
+	xor	edi,edi			;Mantissa is all zero
+	jmp	Round64Int
+
+Under64Int:
+;ZF set if exponent is -1
+	xchg	ebx,edx			;64-bit right shift
+	mov	ch,al			;Restore sign to ch
+	jz	Shift64Round		;Exp. is -1, could need to round up
+	xor	edi,edi			;Mantissa is all zero
+	mov	EMSEG:[CURerr],Precision;Set flag on inexact result
+NotNearest64Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	al,EMSEG:[CWcntl]	;Get rounding control
+.erre	(not RCup and RoundControl) eq RCdown
+	sar	ch,7			;Expand sign through whole byte
+	xor	al,ch			;Flip round mode if -
+	and	al,RoundControl
+	cmp	al,RCup			;Rounding up?
+	jnz	RoundIntOk		;No, chop it
+	mov	EMSEG:[SWcc],RoundUp
+	add	edi,1
+	adc	ebx,0
+	jc	RoundInvalid
+RoundIntOk:
+	clc
+	ret
+
+;*******************************************************************************
+
+;*** RoundToBit
+;
+;This is a relatively low performance routine used by FRNDINT and to
+;generate internal-format denormals.  It can round to any bit position.
+;
+;Inputs:
+;	mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
+;Purpose:
+;	Round number to integer.  Zero exponent means number is in the
+;	range [1,2), so only the MSB will survive (MSB-1 is round bit).  
+;	Larger exponents keep more bits; 63 would mean no rounding.
+;Outputs:
+;	mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+;
+;Does NOT detect overflow.
+
+NoSigBits:
+;Exponent was negative: no integer part
+	and	ecx,bSign shl 8		;Zero exponent, preserve sign
+	mov	cl,bTAG_ZERO
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearNoSig		;Not just round-to-nearest
+	cmp	edx,-1			;Exponent of -1 ==> range [.5,1)
+	je	HalfBitRound
+RndIntToZero:
+	xor	ebx,ebx
+	mov	esi,ebx			;Just return zero
+	ret
+
+NotNearNoSig:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	al,EMSEG:[CWcntl]	;Get rounding control
+	sar	ch,7			;Expand sign through whole byte
+	xor	al,ch			;Flip rounding bits if negative
+	and	al,RoundControl
+	cmp	al,RCup			;Rounding up?
+	jnz	RndIntToZero		;No, chop it
+RndIntToOne:
+	mov	ebx,1 shl 31
+	xor	esi,esi
+	mov	cl,bTAG_SNGL
+	mov	EMSEG:[SWcc],RoundUp
+	ret
+
+HalfBitRound:
+	add	ebx,ebx			;Shift off MSB (round bit)
+	or	ebx,esi
+	or	ebx,eax
+	jnz	RndIntToOne
+	ret				;Return zero
+
+;**********
+RoundToBit:
+;**********
+	mov	edx,ecx			;Make copy of exponent
+	sar	edx,16			;Bring rounding exponent down
+	jl	NoSigBits
+	mov	cl,dl
+	cmp	cl,32			;Rounding in low word?
+	jae	RoundLow
+;When cl = 31, the RoundBit is in the low half while the LSB is in the 
+;high half.  We must preserve the RoundBit when we move it to eax.
+	xchg    eax,esi                 ;Low half becomes sticky bits
+	or      ah,al                   ;Preserve lowest bits in ah
+	add     esi,-1                  ;Set CY if any original sticky bits
+	sbb     al,al                   ;Put original sticky bits in al
+	mov	esi,ebx
+	xor	ebx,ebx			;Shift mantissa right 32 bits
+RoundLow:
+	mov	edx,(1 shl 31) - 1
+	shr	edx,cl			;Make mask
+;Note in the case of cl = 31, edx is now zero.
+	mov	edi,esi
+	and	edi,edx
+	or	edi,eax			;Any bits being lost?
+	jz	RndSetTag		;All done
+	inc	edx			;Mask for LSB
+	or	EMSEG:[CURerr],Precision;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearLow		;Not just round-to-nearest
+	mov	edi,edx			;Save LSB mask
+	shr	edi,1			;Mask for round bit
+	jc	SplitRound		;Round bit in eax?
+	test	esi,edi			;Round bit set?
+	jz	MaskOffLow
+	dec	edi			;Mask for sticky bits
+	or	edi,edx			;Sticky bits + LSB
+	and	edi,esi
+	or	edi,eax			;Any sticky bits set?
+	jz	MaskOffLow
+RoundUpThenMask:
+	mov	EMSEG:[SWcc],RoundUp
+	add	esi,edx			;Round up
+	adc	ebx,0
+	jc	RoundBumpExp
+MaskOffLow:
+	dec	edx			;Mask for round & sticky bits
+	not	edx
+	and	esi,edx			;Zero out low bits
+RndSetTag:
+	or	ebx,ebx			;Is it normalized?
+        jns     RoundedHighHalf
+        or      esi,esi                 ;Any bits in low half?
+.erre   bTAG_VALID eq 1
+.erre   bTAG_SNGL eq 0
+        setnz   cl                      ;if low half==0 then cl=0 else cl=1
+        ret
+
+SplitRound:
+;Rounding high half in esi on rounding bit in eax
+	bt	esi,0			;Look at LSB
+	adc	eax,(1 shl 31) - 1	;Set CY if round up
+	jc	RoundUpThenMask
+        or      ebx,ebx                 ;Will set ZF for jnz below
+RoundedHighHalf:
+;Rounding occured in high half, which had been moved low.
+;Move it back up high.
+;
+;ZF set here on content of ebx.  If not zero, rounding high half in esi
+;rippled forward into zero in ebx.
+        mov     cl,bTAG_SNGL
+        jnz     RndIntNorm              ;Present high half should be zero
+        xchg    ebx,esi                 ;Shift left 32 bits
+        ret
+
+RndIntNorm:
+;Rounded up high half of mantissa, which rolled over to 0.
+	add	ecx,1 shl 16		;Increase exponent
+	mov	ebx,1 shl 31		;Restore MSB
+	ret				;Tag already set to SNGL
+
+RoundBumpExp:
+;Mantissa was FFFFF... and rolled over to 0 when we rounded
+	add	ecx,1 shl 16		;Increase exponent
+	mov	ebx,1 shl 31		;Restore MSB
+	jmp	MaskOffLow
+
+NotNearLow:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	al,EMSEG:[CWcntl]	;Get rounding control
+	sar	ch,7			;Expand sign through whole byte
+.erre	(not RCup and RoundControl) eq RCdown
+	xor	al,ch			;Flip rounding bits if negative
+	and	al,RoundControl
+	cmp	al,RCup			;Rounding up?
+	jz	RoundUpThenMask		;yes
+	jmp	MaskOffLow		;No, chop it
diff --git a/private/ntos/dll/i386/emsincos.asm b/private/ntos/dll/i386/emsincos.asm
new file mode 100644
index 000000000..8cd3bc0ac
--- /dev/null
+++ b/private/ntos/dll/i386/emsincos.asm
@@ -0,0 +1,571 @@
+;      SCCSID = @(#)emsincos.asm	       13.5 90/03/27
+	page	,132
+	subttl	emsincos - fsin, fcos and fsincos
+;***
+;emulator.asm -  80387 emulator
+;
+;	 IBM/Microsoft Confidential
+;
+;	 Copyright (c) IBM Corporation 1987, 1989
+;	 Copyright (c) Microsoft Corporation 1987, 1989
+;
+;	 All Rights Reserved
+;
+;Purpose:
+;	Code for fsin, fcos and fsincos
+;
+;Revision History:
+;	See emulator.hst
+;
+;*******************************************************************************
+
+lab eFsincosStackOver
+	or	SEL[CURerr], StackFlag or Invalid
+	test	SEL[CWmask], Invalid
+	JSZ	eFsincosStackOverRet
+
+	mov	SEL[rsi.lMan0], 0			; st(0) = Ind
+	mov	SEL[rsi.lMan1], 0c0000000h
+	mov	SEL[rsi.wExp], 7fffh - IexpBias
+	mov	SEL[rsi.bTag], bTAG_NAN
+	mov	SEL[rsi.bFlags], bSign
+
+	mov	SEL[rdi.lMan0], 0			; st(-1) = Ind
+	mov	SEL[rdi.lMan1], 0c0000000h
+	mov	SEL[rdi.wExp], 7fffh - IexpBias
+	mov	SEL[rdi.bTag], bTAG_NAN
+	mov	SEL[rdi.bFlags], bSign
+
+	mov	SEL[CURstk], rdi		; push stack
+lab eFsincosStackOverRet
+	ret
+
+
+lab eFSINCOS
+	mov	esi, SEL[CURStk]    ; esi = st(0)
+	mov	edi, esi
+	PrevStackElem	 edi	    ; edi = st(-1)
+
+	cmp	SEL[edi.bTag], bTAG_EMPTY
+	JSNE	eFsincosStackOver
+
+	cmp	SEL[esi.bTag], bTAG_NAN
+	JSNE	eFsincosNotSNaN
+
+	test	SEL[esi.bMan7], 40h
+	JSNZ	eFsincosNotSNaN
+
+	test	SEL[CWmask], Invalid
+	JSNZ	eFsincosNotSNaN
+
+	or	SEL[CURerr], Invalid
+	ret
+
+lab eFsincosNotSNaN
+ifdef NT386
+        push    eax
+        mov     eax, dword ptr SEL[rsi]
+        mov     dword ptr SEL[rdi], eax
+        mov     eax, dword ptr SEL[rsi+4]
+        mov     dword ptr SEL[rdi+4], eax
+        mov     eax, dword ptr SEL[rsi+8]
+        mov     dword ptr SEL[rdi+8], eax
+        add     rsi, Reg87Len
+        add     rdi, Reg87Len
+        pop     eax
+else
+        push	ds		    ; Copy current stack into st(-1)
+	pop	es
+	movsd
+	movsd
+	movsd
+endif
+
+	call	eFSIN
+	PUSHST
+	call	eFCOS
+
+	ret
+
+
+lab eFcosSpecial
+	mov	esp, ebp
+	pop	ebp
+
+	mov	SEL[RESULT], esi
+
+	mov	al, SEL[esi.bTag]
+	cmp	al, bTAG_ZERO
+	JSNE	eFcosInf
+
+lab eFcosRetOne
+	mov	SEL[esi.lMan0], 0
+	mov	SEL[esi.lMan1], 080000000h
+	mov	SEL[esi.wExp], 3fffh - IexpBias
+	mov	SEL[esi.bFlags], 0
+	mov	SEL[esi.bTag], bTAG_VALID
+	ret
+
+lab eFcosInf
+	cmp	al, bTAG_INF
+	JE	RetIndInv
+
+lab eFcosNaN
+	jmp	OneArgOpNaNRet
+
+
+cProc  eFCOS,<PLM,PUBLIC>,<>
+
+	localT	temp
+	localB	SignFlag
+
+cBegin
+	mov	esi, SEL[CURstk]
+
+	cmp	SEL[esi.bTag], bTAG_VALID
+	jne	eFcosSpecial
+
+	or	SEL[CURerr], Precision
+
+	and	SEL[esi].bFlags, not bSign ; st(0) = fabs( st(0) );
+
+	call	SinCosReduce		; Set ah to condition code.
+
+	add	SEL[esi].wExp, IExpBias
+
+	push	SEL[esi].wExp
+	push	SEL[esi].lMan1
+	push	SEL[esi].lMan0
+	lea	ecx, [temp]
+	push	ecx
+
+	mov	bl, ah			; if octant 2, 3, 4, or 5 then final
+	and	bl, bOCT2 or bOCT4	; result must be negative
+	mov	[SignFlag], bl
+
+	test	ah, bOCT1 or bOCT2	; if octant is 1, 2, 5, 6 then must
+	jpo	CosCallSin		; do sin()
+
+	call	__FASTLDCOS
+	jmp	short CosCopyRes
+
+CosCallSin:
+	call	__FASTLDSIN
+
+CosCopyRes:
+	mov	eax, dword ptr [temp]
+	mov	SEL[esi].lMan0, eax
+	mov	eax, dword ptr [temp+4]
+	mov	SEL[esi].lMan1, eax
+
+	mov	ax,  word ptr [temp+8]
+	sub	ax, IExpBias
+	mov	SEL[esi].wExp, ax
+
+	cmp	[SignFlag], 0
+	jpe	CosDone
+
+	or	SEL[esi].bFlags, bSign	; Make result negative.
+CosDone:
+
+cEnd
+
+
+
+
+
+lab eFsinSpecial
+	mov	esp, ebp
+	pop	ebp
+
+	mov	al, SEL[esi.bTag]
+	cmp	al, bTAG_ZERO
+	JSNE	eFsinInf
+
+lab eFsinZero
+	ret
+
+lab eFsinInf
+	cmp	al, bTAG_INF
+	JE	RetIndInv
+
+lab eFsinNaN
+	jmp	OneArgOpNaNRet
+
+
+cProc  eFSIN,<PLM,PUBLIC>,<>
+
+	localT	temp
+	localB	SignFlag
+
+cBegin
+	mov	esi, SEL[CURstk]
+
+	cmp	SEL[esi.bTag], bTAG_VALID
+	jne	eFsinSpecial
+
+	or	SEL[CURerr], Precision
+
+	mov	al, SEL[esi].bFlags
+	and	SEL[esi].bFlags, not bSign
+
+	shl	al, 1		    ; shift sign into carry.
+	sbb	cl, cl		    ; set cl to -1 if argument is negative.
+
+	push	ecx
+	call	SinCosReduce	    ; Set ah to condition code.
+	pop	ecx
+
+	cmp	SEL[esi].bTag, bTAG_ZERO
+	je	SinDone
+
+	add	SEL[esi].wExp, IExpBias
+
+	push	SEL[esi].wExp
+	push	SEL[esi].lMan1
+	push	SEL[esi].lMan0
+	lea	ebx, [temp]
+	push	ebx
+
+	mov	bl, ah			; if octant 4, 5, 6 or 7 then final
+	and	bl, bOCT4		; result must be negative
+
+	neg	cl			; set cl to odd parity if arg was < 0.0
+	xor	bl, cl			; set bl to odd parity if result must be negative
+
+	mov	[SignFlag], bl
+
+	test	ah, bOCT1 or bOCT2	; if octant is 1, 2, 5, 6 then must
+	jpo	SinCallCos		; do cos()
+
+	call	__FASTLDSIN
+	jmp	short SinCopyResult
+
+SinCallCos:
+	call	__FASTLDCOS
+
+SinCopyResult:
+	mov	eax, dword ptr [temp]
+	mov	SEL[esi].lMan0, eax
+	mov	eax, dword ptr [temp+4]
+	mov	SEL[esi].lMan1, eax
+
+	mov	ax, word ptr [temp+8]
+	sub	ax, IExpBias
+	mov	SEL[esi].wExp, ax
+
+	cmp	[SignFlag], 0
+	jpe	SinDone
+
+	or	SEL[esi].bFlags, bSign	; Make result negative.
+SinDone:
+
+cEnd
+
+
+
+lab SinCosReduce
+	mov	SEL[TEMP1.bFlags], 0		; TEMP1 = pi/4
+	mov	SEL[TEMP1.bTag], bTAG_VALID
+	mov	SEL[TEMP1.wExp], 3ffeh-IExpBias
+	mov	SEL[TEMP1.wMan3], 0c90fh
+	mov	SEL[TEMP1.wMan2], 0daa2h
+	mov	SEL[TEMP1.wMan1],	2168h
+	mov	SEL[TEMP1.wMan0], 0c235h
+
+ifdef NT386
+        mov     edi, TEMP1
+else
+	mov	edi, edataOFFSET TEMP1
+endif
+
+	push	esi
+	call	InternFPREM		    ; rsi = st(0), rdi = st(0)
+	pop	esi
+
+	mov	ah, SEL[SWcc]
+
+	test	ah, bOCT1		; check for even octant
+	jz	EvenOct 		;   yes
+
+	add	SEL[esi.wExp], IExpBias	; convert to true long double
+
+	push	ds
+	push	esi
+	push	cs
+	push	ecodeOFFSET PIBY4
+	push	ds
+	push	esi
+	push	-1
+	call	__FASTLDADD		; st(0) = pi/4 - st(0)
+	mov	ah, SEL[SWcc]
+
+	sub	SEL[esi.wExp], IExpBias	; convert to squirly emulator long double
+
+EvenOct:
+	retn
+
+
+
+labelW	PIBY4
+    dw	    0c235h, 02168h, 0daa2h, 0c90fh, 3ffeh
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; BUGBUG tedm: NT masm can't handle floating-point constants  ;
+;              because strtod and _strtold C-runtimes aren't  ;
+;              there.  So the constants below must be pre-    ;
+;              assembled and defined as a byte stream.        ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ifdef NOTDEF
+
+staticT  FourByPI, +0.1273239544735162686151e+01
+
+staticT  SinP0, +0.7853981633974483096141845e+00
+staticT  SinP1, -0.8074551218828078152025820e-01
+staticT  SinP2, +0.2490394570192716275251900e-02
+staticT  SinP3, -0.3657620418214640005290000e-04
+staticT  SinP4, +0.3133616889173253480000000e-06
+staticT  SinP5, -0.1757247417617080600000000e-08
+staticT  SinP6, +0.6948152035052200000000000e-11
+staticT  SinP7, -0.2022531292930000000000000e-13
+
+staticT  CosP0, +0.99999999999999999996415e+00
+staticT  CosP1, -0.30842513753404245242414e+00
+staticT  CosP2, +0.15854344243815410897540e-01
+staticT  CosP3, -0.32599188692668755044000e-03
+staticT  CosP4, +0.35908604458858195300000e-05
+staticT  CosP5, -0.24611363826370050000000e-07
+staticT  CosP6, +0.11500497024263000000000e-09
+staticT  CosP7, -0.38577620372000000000000e-12
+
+else
+
+staticB  FourByPI, <02Ah,015h,044h,04Eh,06Eh,083h,0F9h,0A2h,0FFh,03Fh>
+
+staticB  SinP0   , <035h,0C2h,068h,021h,0A2h,0DAh,00Fh,0C9h,0FEh,03Fh>
+staticB  SinP1   , <0DAh,095h,0F2h,02Dh,031h,0E7h,05Dh,0A5h,0FBh,0BFh>
+staticB  SinP2   , <0E9h,0C6h,056h,0ADh,03Bh,0E3h,035h,0A3h,0F6h,03Fh>
+staticB  SinP3   , <0D5h,0E7h,05Dh,015h,073h,066h,069h,099h,0F0h,0BFh>
+staticB  SinP4   , <0BCh,032h,069h,0E1h,042h,01Ah,03Ch,0A8h,0E9h,03Fh>
+staticB  SinP5   , <021h,077h,004h,05Fh,0A1h,0A5h,083h,0F1h,0E1h,0BFh>
+staticB  SinP6   , <0FCh,01Ah,0D1h,006h,0CCh,063h,077h,0F4h,0D9h,03Fh>
+staticB  SinP7   , <04Ah,003h,086h,040h,07Ch,065h,02Ch,0B6h,0D1h,0BFh>
+
+staticB  CosP0   , <0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FEh,03Fh>
+staticB  CosP1   , <02Fh,0F2h,02Eh,0F2h,04Dh,0E6h,0E9h,09Dh,0FDh,0BFh>
+staticB  CosP2   , <02Fh,04Eh,0D5h,0DAh,040h,0F8h,0E0h,081h,0F9h,03Fh>
+staticB  CosP3   , <09Dh,0DEh,06Ah,0E4h,0F1h,0E3h,0E9h,0AAh,0F3h,0BFh>
+staticB  CosP4   , <031h,01Eh,0F9h,081h,041h,083h,0FAh,0F0h,0ECh,03Fh>
+staticB  CosP5   , <076h,0B1h,000h,0A4h,01Eh,0F6h,068h,0D3h,0E5h,0BFh>
+staticB  CosP6   , <0D8h,005h,06Fh,08Ah,0EAh,00Ah,0E6h,0FCh,0DDh,03Fh>
+staticB  CosP7   , <003h,0D5h,00Ah,0ACh,0CCh,035h,02Ch,0D9h,0D5h,0BFh>
+
+endif
+
+cProc __FASTLDSIN,<PLM,PUBLIC>,<isi,idi>
+
+	parmT	x
+	parmI	RetOff
+
+	localT	x2
+	localT	poly
+	localI	count
+
+cBegin
+
+	lea	isi, [x]		    ; x = x * (4/PI)
+	push	ss
+	push	isi
+
+	push	ss
+	push	isi
+
+	mov	iax, codeOFFSET FourByPI
+	push	cs
+	push	iax
+
+	call	__FASTLDMULT
+
+
+	lea	idi, [x2]		    ; x2 = x * x
+	push	ss
+	push	idi
+
+	push	ss
+	push	isi
+
+	push	ss
+	push	isi
+
+	call	__FASTLDMULT
+
+if 0
+	push	ss
+	pop	es
+	lea	idi, [poly]
+	mov	isi, codeOFFSET SinP7
+	movsw
+	movsw
+	movsw
+	movsw
+	movsw
+endif
+	mov	eax, dword ptr [SinP7]	    ; poly = SinP7
+	mov	dword ptr [poly], eax
+	mov	eax, dword ptr [SinP7+4]
+	mov	dword ptr [poly+4], eax
+	mov	ax, word ptr [SinP7+8]
+	mov	word ptr [poly+8], ax
+
+	lea	isi, [poly]
+	mov	idi, codeOFFSET SinP6
+
+	mov	[count], 7
+
+SinPolyLoop:
+	push	ss
+	push	isi		    ; poly = poly * x2
+
+	push	ss
+	push	isi
+
+	lea	iax, [x2]
+	push	ss
+	push	iax
+
+	call	__FASTLDMULT
+
+
+	push	ss
+	push	isi		    ; poly = poly + SinP[n]
+
+	push	ss
+	push	isi
+
+	push	cs
+	push	idi
+
+	xor	iax, iax
+	push	iax
+	call	__FASTLDADD
+
+	sub	idi, 10
+
+	dec	[count]
+	jnz	SinPolyLoop
+
+	push	ss
+	push	[RetOff]		; return x * poly
+
+	lea	iax, [x]
+	push	ss
+	push	iax
+
+	push	ss
+	push	isi
+
+	call	__FASTLDMULT
+
+	mov	iax, [RetOff]
+	mov	idx, ss
+cEnd
+
+
+
+
+cProc  __FASTLDCOS,<PLM,PUBLIC>,<isi,idi>
+
+	parmT	x
+	parmI	RetOff
+
+	localT	x2
+	localI	count
+
+cBegin
+
+	lea	isi, [x]		    ; x = x * (4/PI)
+	push	ss
+	push	isi
+
+	push	ss
+	push	isi
+
+	mov	iax, codeOFFSET FourByPI
+	push	cs
+	push	iax
+
+	call	__FASTLDMULT
+
+
+	lea	idi, [x2]		    ; x2 = x * x
+	push	ss
+	push	idi
+
+	push	ss
+	push	isi
+
+	push	ss
+	push	isi
+
+	call	__FASTLDMULT
+
+if 0
+	push	ss			    ; (return) = CosP7
+	pop	es
+	mov	idi, [RetOff]
+	mov	isi, codeOFFSET CosP7
+	movsw
+	movsw
+	movsw
+	movsw
+	movsw
+endif
+	mov	isi, [RetOff]
+	mov	eax, dword ptr [CosP7]
+	mov	dword ptr ss:[isi], eax
+	mov	eax, dword ptr [CosP7+4]
+	mov	dword ptr ss:[isi+4], eax
+	mov	ax, word ptr [CosP7+8]
+	mov	word ptr ss:[isi+8], ax
+
+	mov	idi, codeOFFSET CosP6
+
+	mov	[count], 7
+
+CosPolyLoop:
+	push	ss
+	push	isi		    ; (return) = (return) * x2
+
+	push	ss
+	push	isi
+
+	lea	iax, [x2]
+	push	ss
+	push	iax
+
+	call	__FASTLDMULT
+
+
+	push	ss
+	push	isi		    ; (return) = (return) + SinP[n]
+
+	push	ss
+	push	isi
+
+	push	cs
+	push	idi
+
+	xor	iax, iax
+	push	iax
+
+	call	__FASTLDADD
+
+
+	sub	idi, 10
+
+	dec	[count]
+	jnz	CosPolyLoop
+
+	mov	iax, isi
+	mov	idx, ss
+cEnd
diff --git a/private/ntos/dll/i386/emstack.inc b/private/ntos/dll/i386/emstack.inc
new file mode 100644
index 000000000..a60c03aff
--- /dev/null
+++ b/private/ntos/dll/i386/emstack.inc
@@ -0,0 +1,72 @@
+	subttl	emstack.asm - Emulator Stack Management Macros
+	page
+;***
+;emstack.asm - Emulator Stack Management Area
+;
+;	 Microsoft Confidential
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;Purpose:
+;	Handles emulator stack.
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;POPSTret:  pops the stack and returns.  Uses esi.
+
+POPSTret        macro	reg
+        local   stackwrap
+IFB	<reg>
+        mov     esi,EMSEG:[CURstk]
+_popreg	equ	esi
+ELSE
+_popreg	equ	reg
+ENDIF
+	mov     EMSEG:[_popreg].bTag,bTAG_EMPTY
+        NextStackElem   _popreg,stackwrap
+        mov     EMSEG:[CURstk],_popreg
+        ret
+
+Wrap&stackwrap:
+	mov	EMSEG:[CURstk],BEGstk
+	ret
+	endm
+
+;NextStackElem:  Given pST(0) = [CURstk] in reg, returns pST(1)
+;Requires NextStackWrap macro with same arguments
+
+NextStackElem	macro	reg,stackwrap
+	cmp	reg,INITstk			;JWM
+	jae	Wrap&stackwrap
+	add	reg,Reg87Len
+Cont&stackwrap:
+	endm
+
+NextStackWrap	macro	reg,stackwrap
+Wrap&stackwrap:
+	mov	reg,BEGstk			;JWM
+	jmp	Cont&stackwrap
+	endm
+
+
+;PrevStackElem:  Given pST(0) = [CURstk] in reg, returns new pST(0) 
+;after a push onto on the stack.
+;Requires PrevStackWrap macro with same arguments
+
+PrevStackElem	macro	reg,stackwrap
+	cmp	reg,BEGstk			;JWM
+	jbe	Wrap&stackwrap
+	sub	reg,Reg87Len
+Cont&stackwrap:
+	endm
+
+PrevStackWrap	macro	reg,stackwrap
+Wrap&stackwrap:
+	mov	reg,INITstk			;JWM
+	jmp	Cont&stackwrap
+	endm
diff --git a/private/ntos/dll/i386/emstore.asm b/private/ntos/dll/i386/emstore.asm
new file mode 100644
index 000000000..aadeb6520
--- /dev/null
+++ b/private/ntos/dll/i386/emstore.asm
@@ -0,0 +1,803 @@
+        subttl  emstore.asm - FST, FSTP, FIST, FISTP instructions
+        page
+;*******************************************************************************
+;emstore.asm - FST, FSTP, FIST, FISTP instructions
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Purpose:
+;       FST, FSTP, FIST, FISTP instructions
+;Inputs:
+;	edi = [CURstk]
+;	dseg:esi = pointer to memory destination
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;******
+EM_ENTRY eFSTP
+eFSTP:
+;******
+;	edi = [CURstk]
+;	esi = pointer to st(i) from instruction field
+
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY
+        jz      short efstp_StackError
+;UNDONE: temporary hack to preserve condition codes
+        mov     ax,[esp+4].OldStatus
+        mov     EMSEG:[StatusWord],ax
+;UNDONE: end of hack
+
+;A common use of this instruction is FSTP st(0) just to pop the stack.
+;We check for this case and optimize it.
+        cmp     esi,edi
+        jz      short JustPop
+;Copy the register
+        mov     eax,EMSEG:[edi].ExpSgn
+        mov     EMSEG:[esi].ExpSgn,eax
+        mov     eax,EMSEG:[edi].lManHi
+        mov     EMSEG:[esi].lManHi,eax
+        mov     eax,EMSEG:[edi].lManLo
+        mov     EMSEG:[esi].lManLo,eax
+JustPop:
+	POPSTret	edi
+
+efstp_StackError:
+	mov	EMSEG:[CURerr],Invalid+StackFlag
+	ret
+
+
+;******
+EM_ENTRY eFST
+eFST:
+;******
+;	edi = [CURstk]
+;	esi = pointer to st(i) from instruction field
+
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY
+	jz	StackError		;In emarith.asm
+;Copy the register
+        mov     eax,EMSEG:[edi].ExpSgn
+        mov     EMSEG:[esi].ExpSgn,eax
+        mov     eax,EMSEG:[edi].lManHi
+        mov     EMSEG:[esi].lManHi,eax
+        mov     eax,EMSEG:[edi].lManLo
+        mov     EMSEG:[esi].lManLo,eax
+DontPop:
+	ret
+
+
+;Come here if the instruction wants to pop the stack
+
+PopStackChk:
+	jc	DontPop			;Get unmasked error?
+PopStack:
+	mov	edi,EMSEG:[CURstk]
+	POPSTret	edi
+
+
+StoreSpcl64:
+	cmp	cl,bTAG_DEN
+	jz	Denorm64
+.erre	bTAG_NAN lt bTAG_EMPTY
+.erre	bTAG_NAN gt bTAG_INF
+	cmp	cl,bTAG_NAN
+	mov	ecx,DexpMax shl 16	;Insert special exponent for NAN/Inf.
+	jb	StoreIEEE64		;Go handle infinity
+	ja	Empty64
+;Have a NAN.
+	test	ebx,1 shl 30		;Check for SNAN
+	jnz	StoreIEEE64		;Go store QNAN
+	or	ebx,1 shl 30		;Make SNAN into a QNAN
+	mov	EMSEG:[CURerr],Invalid	;Flag the exception
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jnz	StoreIEEE64		;If so, update with masked response
+	stc				;Don't pop stack
+	ret
+
+Empty64:
+;It's empty--signal invalid operation
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing64		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	dword ptr dseg:[esi],0
+	mov	dword ptr dseg:[esi+4],0FFF80000H	;64-bit IEEE indefinite
+	ret				;CY clear
+
+Denorm64:
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is it masked?
+	jnz	NormStore64		;If so, ignore denormalization
+DoNothing64:
+	stc				;Don't pop stack
+	ret
+
+;*****************
+;Store Double Real
+;*****************
+
+EM_ENTRY eFSTP64
+eFSTP64:
+	push	offset PopStackChk	;Return here after store
+
+EM_ENTRY eFST64
+eFST64:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+	mov	al,ch			;Save sign bit
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+        jz      short SignAndStore64    ;Just set sign and exit
+        ja      StoreSpcl64
+NormStore64:
+;Note that we could have a denormal exception at this point.
+;Thus any additional exceptions must OR into [CURerr], not MOV.
+	xor	cx,cx
+	add	ecx,(DexpBias-TexpBias) shl 16	;Correct bias
+        jl      short Under64
+        cmp     ecx,DexpMax shl 16      ;Exponent too big?
+        jge     Over64
+	test	edi,(1 shl 11) - 1	;Any bits to round?
+        jz      short StoreIEEE64
+
+Round64:
+	or	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+        jnz     NotNearest64            ;Not just round-to-nearest
+	test	edi,1 shl 10		;Check rounding bit
+        jz      short StoreIEEE64       ;If zero, don't round up
+	test	edi,(3 shl 10)-1	;Test LSB and sticky bits
+        jnz     RoundUp64b
+
+StoreIEEE64:
+        or      ecx, ecx                ;now that value is rounded,
+        je      short Under64           ;check exponent for underflow
+
+StoreIEEE64Continue:
+	and	ebx,not (1 shl 31)	;Clear MSB--it's implied in IEEE64
+	shrd	edi,ebx,11
+	shr	ebx,11			;Move mantissa down
+	shl	ecx,4			;Exponent up to position
+	or	ebx,ecx			;Combine exponent
+SignAndStore64:
+	and	al,bSign		;Just sign bit
+	shl	eax,24			;Sign to MSB
+	or	ebx,eax			;Combine sign
+	mov	dseg:[esi],edi
+	mov	dseg:[esi+4],ebx
+;CY clear indicate no error
+	ret
+
+SetUnderflow:
+	or	EMSEG:[CURerr],Underflow	;Unmasked underflow--do nothing
+DoNothing:
+	stc				;Indicate nothing was done
+	ret
+
+Under64:
+        dec     cl                      ; Is cx == 1?
+        jz      short StoreIEEE64Continue   ; Yes, we've alread been here
+
+	test	EMSEG:[CWmask],Underflow	;Is underflow masked?
+	jz	SetUnderflow		;No, do nothing more
+;Produce masked underflow response
+;Note that the underflow exception does not occur if the number can be
+;represented exactly as a denormal.
+
+	sar	ecx,16			;Bring exponent down
+	cmp	ecx,DexpMin-52	;Allow for shift down to rounding bit
+	jl	BigUnder64		;Too small, just make it zero
+.erre	DexpMin eq 0
+	neg	ecx			;Use as shift count
+	inc	ecx			;Shift by at least one
+	xor	edx,edx			;Place for sticky bits
+	cmp	cl,32			;Long shift?
+	jb	ShortDenorm
+	neg	edi			;CY set if non-zero
+	sbb	edx,edx			;-1 if bits shifted off, else zero
+	mov	edi,ebx
+	xor	ebx,ebx			;32-bit right shift
+ShortDenorm:
+;Shift count is modulo-32
+	shrd	edx,edi,cl
+	shrd	edi,ebx,cl
+	shr	ebx,cl
+	cmp	edx,1			;CY set if zero, else clear
+	sbb	edx,edx			;Zero if bits shifted off, else -1
+	inc	edx			;1 if bits shifted off, else zero
+	or	edi,edx			;Collapse sticky bits into edi
+
+        mov     ecx, 1                  ;Biased exponent is zero, put 1 into CL (noticed by Under64)
+	test	edi,(1 shl 11) - 1	;Any bits to round?
+	jz	StoreIEEE64		;If not, no exception
+	or	EMSEG:[CURerr],Underflow
+	jmp	Round64
+
+Over64:
+	test	EMSEG:[CWmask],Overflow	;Is overflow masked?
+	jz	SetOverflow		;No, do nothing more
+;Produce masked overflow response
+	or	EMSEG:[CURerr],Overflow+Precision
+	mov	ebx,DexpMax shl 20
+	xor	edi,edi			;ebx:edi = positive infinity
+	mov	ah,EMSEG:[CWcntl]	;Get rounding control
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre	RCchop eq RCup + RCdown		;Always return max value
+.erre	RCnear eq 0			;Never return max value
+	sar	al,7			;Expand sign through whole byte
+.erre	(RCdown and bSign) eq 0		;Don't want to change real sign
+	xor	al,RCdown		;Flip sign for RCdown bit
+	and	ah,al			;RCup & sign  OR  RCdown & not sign
+	test	ah,RoundControl		;Look only at RC bits
+	jz	SignAndStore64		;Return infinity
+	dec	ebx
+	dec	edi			;Max value == infinity-1
+	jmp	SignAndStore64
+
+SetOverflow:
+	or	EMSEG:[CURerr],Overflow
+	stc				;Indicate nothing was done
+	ret
+
+BigUnder64:
+	or	EMSEG:[CURerr],Underflow+Precision
+	xor	ebx,ebx
+	mov	edi,ebx			;Set it to zero
+	mov	ecx,ebx			;Including exponent
+NotNearest64:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	ah,EMSEG:[CWcntl]		;Get rounding control
+	sar	al,7			;Expand sign through whole byte
+.erre	(not RCup and RoundControl) eq RCdown
+	xor	ah,al			;Flip rounding bits if negative
+	and	ah,RoundControl
+	cmp	ah,RCup
+        jnz     StoreIEEE64             ;No, chop it
+
+RoundUp64b:
+        mov     EMSEG:[SWcc],RoundUp
+	add	edi,1 shl 11		;Round up
+	adc	ebx,0
+        jnc     StoreIEEE64
+
+	add	ecx,1 shl 16		;Mantissa overflowed, bump exponent
+        cmp     ecx,DexpMax shl 16      ;Exponent too big?
+        jge     Over64
+        jmp     StoreIEEE64
+
+;*******************************************************************************
+
+StoreSpcl32:
+	cmp	cl,bTAG_DEN
+	jz	Denorm32
+.erre	bTAG_NAN lt bTAG_EMPTY
+.erre	bTAG_NAN gt bTAG_INF
+	cmp	cl,bTAG_NAN
+	mov	ecx,SexpMax shl 16	;Insert special exponent
+	jb	StoreIEEE32
+	ja	Empty64
+;Have a NAN.
+	test	ebx,1 shl 30		;Check for SNAN
+	jnz	StoreIEEE32		;Go store QNAN
+	or	ebx,1 shl 30		;Make SNAN into a QNAN
+	mov	EMSEG:[CURerr],Invalid	;Flag the exception
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jnz	StoreIEEE32		;If so, update with masked response
+	stc				;Don't pop stack
+	ret
+
+Empty32:
+;It's empty--signal invalid operation
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing32		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	dword ptr dseg:[esi],0FFC00000H	;32-bit IEEE indefinite
+	ret				;CY clear
+
+Denorm32:
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is it masked?
+	jnz	NormStore32		;If so, ignore denormalization
+DoNothing32:
+	stc				;Don't pop stack
+	ret
+
+;*****************
+;Store Single Real
+;*****************
+
+EM_ENTRY eFSTP32
+eFSTP32:
+	push	offset PopStackChk	;Return here after store
+
+EM_ENTRY eFST32
+eFST32:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+	mov	al,ch			;Save sign bit
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+	jz	SignAndStore32		;Just set sign and exit
+	ja	StoreSpcl32
+NormStore32:
+;Note that we could have a denormal exception at this point.
+;Thus any additional exceptions must OR into [CURerr], not MOV.
+	xor	cx,cx
+	add	ecx,(SexpBias-TexpBias) shl 16	;Correct bias
+	jle	Under32
+	cmp	ecx,SexpMax shl 16	;Exponent too big?
+	jge	Over32
+;See if we need to round
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,edi			;Throwing away any bits?
+	jz	StoreIEEE32
+;Result will not be exact--check rounding mode
+Round32:
+	or	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearest32		;Not just round-to-nearest
+	test	bl,1 shl 7		;Round bit set?
+	jz	StoreIEEE32
+	mov	edx,ebx
+	and	edx,(3 shl 7)-1		;Mask to LSB and sticky bits
+	or	edx,edi			;Combine with remaining sticky bits
+	jz	StoreIEEE32
+	mov	EMSEG:[SWcc],RoundUp
+	add	ebx,1 shl 8		;Round up
+	jc	AddOneExp32
+StoreIEEE32:
+	and	ebx,not (1 shl 31)	;Clear MSB--it's implied in IEEE32
+	shr	ebx,8			;Move mantissa down
+	shl	ecx,7			;Exponent up to position
+	or	ebx,ecx			;Combine exponent
+SignAndStore32:
+	and	al,bSign		;Just sign bit
+	shl	eax,24			;Sign to MSB
+	or	ebx,eax			;Combine sign
+	mov	dseg:[esi],ebx
+;CY clear indicate no error
+	ret
+
+Under32:
+	test	EMSEG:[CWmask],Underflow	;Is underflow masked?
+	jz	SetUnderflow		;No, do nothing more
+;Produce masked underflow response
+;Note that the underflow exception does not occur if the number can be
+;represented exactly as a denormal.
+	sar	ecx,16			;Bring exponent down
+	cmp	ecx,SexpMin-23	;Allow for shift down to rounding bit
+	jl	BigUnder32		;Too small, just make it zero
+.erre	SexpMin eq 0
+	neg	ecx			;Use as shift count
+	inc	ecx			;Shift by at least one
+	xor	edx,edx			;Place for sticky bits
+	shrd	edx,ebx,cl
+	shr	ebx,cl
+	xor	ecx,ecx			;Biased exponent is zero
+	or	edi,edx			;Combine sticky bits
+	mov	edx,ebx			;Get low bits
+	and	edx,(1 shl 8) - 1	;Mask to last 8 bits
+	or	edx,edi			;Throwing away any bits?
+	jz	StoreIEEE32
+	or	EMSEG:[CURerr],Underflow
+	jmp	Round32
+
+AddOneExp32:
+	add	ecx,1 shl 16		;Mantissa overflowed, bump exponent
+	cmp	ecx,SexpMax shl 16	;Exponent too big?
+	jl	StoreIEEE32
+Over32:
+	test	EMSEG:[CWmask],Overflow	;Is overflow masked?
+	jz	SetOverflow		;No, do nothing more
+;Produce masked overflow response
+	or	EMSEG:[CURerr],Overflow+Precision
+	mov	ebx,SexpMax shl 23
+	mov	ah,EMSEG:[CWcntl]		;Get rounding control
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre	RCchop eq RCup + RCdown		;Always return max value
+.erre	RCnear eq 0			;Never return max value
+	sar	al,7			;Expand sign through whole byte
+.erre	(RCdown and bSign) eq 0		;Don't want to change real sign
+	xor	al,RCdown		;Flip sign for RCdown bit
+	and	ah,al			;RCup & sign  OR  RCdown & not sign
+	test	ah,RoundControl		;Look only at RC bits
+	jz	SignAndStore32		;Return infinity
+	dec	ebx			;Max value == infinity-1
+	jmp	SignAndStore32
+
+BigUnder32:
+	or	EMSEG:[CURerr],Underflow+Precision
+	xor	ebx,ebx			;Set it to zero
+	xor	ecx,ecx			;Exponent too
+NotNearest32:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	ah,EMSEG:[CWcntl]		;Get rounding control
+	sar	al,7			;Expand sign through whole byte
+.erre	(not RCup and RoundControl) eq RCdown
+	xor	ah,al			;Flip rounding bits if negative
+	and	ah,RoundControl
+	cmp	ah,RCup
+	jnz	StoreIEEE32		;No, chop it
+	mov	EMSEG:[SWcc],RoundUp
+	add	ebx,1 shl 8		;Round up
+	jnc	StoreIEEE32
+	jmp	AddOneExp32
+
+;*******************************************************************************
+
+StoreSpcl32Int:
+	cmp	cl,bTAG_DEN
+	jz	NormStore32Int		;Ignore denormal
+	cmp	cl,bTAG_EMPTY
+	jnz	Over32Int		;All other specials are invalid
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	jmp	Invalid32Int
+
+DoNothing32Int:
+	stc				;Don't pop stack
+	ret
+
+CheckMax32:
+	ja	Over32Int
+	test	al,bSign		;Is it negative?
+	jnz	Store32Int		;If so, answer is OK
+Over32Int:
+;Overflow on integer store is invalid according to IEEE
+	mov	EMSEG:[CURerr],Invalid	;Must remove precision exception
+Invalid32Int:
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing32Int		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	dword ptr dseg:[esi],80000000H	;32-bit integer indefinite
+	ret				;CY clear
+
+;******************
+;Store Long Integer
+;******************
+
+EM_ENTRY eFISTP32
+eFISTP32:
+	push	offset PopStackChk	;Return here after store
+
+EM_ENTRY eFIST32
+eFIST32:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+	mov	al,ch			;Save sign bit
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+	jz	Store32Int		;Just store zero and exit
+	ja	StoreSpcl32Int
+NormStore32Int:
+	xor	edx,edx
+	sar	ecx,16			;Bring exponent down
+	cmp	ecx,-1			;Is it less than 1?
+	jle	Under32Int
+	cmp	ecx,31
+	jg	Over32Int
+	sub	ecx,31
+	neg	ecx			;cl = amount to shift right
+	shrd	edx,edi,cl
+	shrd	edi,ebx,cl		;Collect round and sticky bits
+	shr	ebx,cl			;Align integer
+;See if we need to round
+	mov	ecx,edi
+	or	ecx,edx			;Throwing away any bits?
+	jz	StoreIEEE32Int
+;Result will not be exact--check rounding mode
+Round32Int:
+	mov	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearest32Int		;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+	bt	ebx,0			;Look at LSB (for round even)
+	adc	edx,-1			;CY set if sticky bits <>0
+	adc	edi,(1 shl 31)-1	;CY set if round up
+	jnc	StoreIEEE32Int
+	mov	EMSEG:[SWcc],RoundUp
+	inc	ebx
+	jz	Over32Int
+StoreIEEE32Int:
+	cmp	ebx,1 shl 31		;Check for max value
+	jae	CheckMax32
+SignAndStore32Int:
+	shl	eax,24			;Sign to MSB
+	cdq				;Extend sign through edx
+	xor	ebx,edx			;Complement
+	sub	ebx,edx			;  and increment if negative
+	clc
+Store32Int:
+	mov	dseg:[esi],ebx
+;CY clear indicates no error
+	ret
+
+Under32Int:
+;ZF set if exponent is -1
+	xchg	edx,edi			;32-bit right shift
+	xchg	edi,ebx			;ebx = 0 now
+	jz	Round32Int		;If exponent was -1, ready to round
+	mov	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+NotNearest32Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	ah,EMSEG:[CWcntl]		;Get rounding control
+	sar	al,7			;Expand sign through whole byte
+.erre	(not RCup and RoundControl) eq RCdown
+	xor	ah,al			;Flip rounding bits if negative
+	and	ah,RoundControl
+	cmp	ah,RCup			;Rounding up?
+	jnz	StoreIEEE32Int		;No, chop it
+	mov	EMSEG:[SWcc],RoundUp
+	inc	ebx
+	jnc	StoreIEEE32Int
+	jmp	Over32Int
+
+;*******************************************************************************
+
+StoreSpcl16Int:
+	cmp	cl,bTAG_DEN
+	jz	NormStore16Int		;Ignore denormal
+	cmp	cl,bTAG_EMPTY
+	jnz	Over16Int		;All other specials are invalid
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	jmp	Invalid16Int
+
+DoNothing16Int:
+	stc				;Don't pop stack
+	ret
+
+CheckMax16:
+	ja	Over16Int
+	test	al,bSign		;Is it negative?
+	jnz	Store16Int		;If so, answer is OK
+Over16Int:
+;Overflow on integer store is invalid according to IEEE
+	mov	EMSEG:[CURerr],Invalid
+Invalid16Int:
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing16Int		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	word ptr dseg:[esi],8000H	;16-bit integer indefinite
+	ret				;CY clear
+
+;*******************
+;Store Short Integer
+;*******************
+
+EM_ENTRY eFISTP16
+eFISTP16:
+	push	offset PopStackChk	;Return here after store
+
+EM_ENTRY eFIST16
+eFIST16:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	ecx,EMSEG:[edi].ExpSgn
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl
+;memory destination is dseg:esi
+	mov	al,ch			;Save sign bit
+	cmp	cl,bTAG_ZERO
+.erre	bTAG_VALID lt bTAG_ZERO
+.erre	bTAG_SNGL lt bTAG_ZERO
+	jz	Store16Int		;Just store zero and exit
+	ja	StoreSpcl16Int
+NormStore16Int:
+	xor	edx,edx
+	sar	ecx,16			;Bring exponent down
+	cmp	ecx,-1			;Is it less than 1?
+	jle	Under16Int
+	cmp	ecx,15
+	jg	Over16Int
+	sub	ecx,31
+	neg	ecx			;cl = amount to shift right
+	shrd	edx,edi,cl
+	shrd	edi,ebx,cl		;Collect round and sticky bits
+	shr	ebx,cl			;Align integer
+;See if we need to round
+	mov	ecx,edi
+	or	ecx,edx			;Throwing away any bits?
+	jz	StoreIEEE16Int
+;Result will not be exact--check rounding mode
+Round16Int:
+	mov	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+	test	EMSEG:[CWcntl],RoundControl	;Check rounding control bits
+.erre	RCnear eq 0
+	jnz	NotNearest16Int		;Not just round-to-nearest
+
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.
+
+	bt	ebx,0			;Look at LSB (for round even)
+	adc	edx,-1			;CY set if sticky bits <>0
+	adc	edi,(1 shl 31)-1	;CY set if round up
+	jnc	StoreIEEE16Int
+	mov	EMSEG:[SWcc],RoundUp
+	inc	ebx
+StoreIEEE16Int:
+	cmp	ebx,1 shl 15		;Check for max value
+	jae	CheckMax16
+SignAndStore16Int:
+	shl	eax,24			;Sign to MSB
+	cdq				;Extend sign through edx
+	xor	ebx,edx			;Complement
+	sub	ebx,edx			;  and increment if negative
+	clc
+Store16Int:
+	mov	dseg:[esi],bx
+;CY clear indicates no error
+	ret
+
+Under16Int:
+;ZF set if exponent is -1
+	xchg	edx,edi			;16-bit right shift
+	xchg	edi,ebx			;ebx = 0 now
+	jz	Round16Int		;If exponent was -1, ready to round
+	mov	EMSEG:[CURerr],Precision 	;Set flag on inexact result
+NotNearest16Int:
+;We want to increase the magnitude if RCup and +, or RCdown and -
+	mov	ah,EMSEG:[CWcntl]		;Get rounding control
+	sar	al,7			;Expand sign through whole byte
+.erre	(not RCup and RoundControl) eq RCdown
+	xor	ah,al			;Flip rounding bits if negative
+	and	ah,RoundControl
+	cmp	ah,RCup			;Rounding up?
+	jnz	StoreIEEE16Int		;No, chop it
+	mov	EMSEG:[SWcc],RoundUp
+	inc	ebx
+	jnc	StoreIEEE16Int
+	jmp	Over16Int
+
+;*******************************************************************************
+
+;******************
+;Store Quad Integer
+;******************
+
+EM_ENTRY eFISTP64
+eFISTP64:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	call	RoundToInteger
+	jc	Invalid64Int
+;Have integer in ebx:edi
+;Sign in ch
+	cmp	ebx,1 shl 31		;Check for max value
+	jae	CheckMax64
+	or	ch,ch			;Check sign
+	jns	Store64Int
+;64-bit negation
+	not	ebx
+	neg	edi
+	sbb	ebx,-1
+Store64Int:
+	mov	dseg:[esi],edi
+	mov	dseg:[esi+4],ebx
+	jmp	PopStack
+
+CheckMax64:
+	ja	Over64Int
+	test	al,bSign		;Is it negative?
+	jnz	Store64Int		;If so, answer is OK
+Over64Int:
+;Overflow on integer store is invalid according to IEEE
+	mov	EMSEG:[CURerr],Invalid
+Invalid64Int:
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing80		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	dword ptr dseg:[esi],0
+	mov	dword ptr dseg:[esi+4],80000000H	;64-bit integer indefinite
+	jmp	PopStack
+
+;*******************************************************************************
+
+Empty80:
+;It's empty--signal invalid operation
+	mov	EMSEG:[CURerr],StackFlag+Invalid
+	test	EMSEG:[CWmask],Invalid	;Is it masked?
+	jz	DoNothing80		;No--leave memory unchanged
+;Store Indefinite
+;For FSTP, we go ahead and do the pop even though it's empty
+	mov	dword ptr dseg:[esi],0
+	mov	dword ptr dseg:[esi+4],0C0000000H
+	mov	word ptr dseg:[esi+8],0FFFFH	;80-bit IEEE indefinite
+	jmp	PopStack
+
+DoNothing80:
+	ret
+
+;***************
+;Store Temp Real
+;***************
+
+EM_ENTRY eFSTP80
+eFSTP80:
+        mov     EMSEG:[PrevDataOff],esi       ;Save operand pointer
+	mov	eax,EMSEG:[edi].ExpSgn
+	cmp	al,bTAG_EMPTY
+	jz	Empty80
+
+        push    offset PopStack
+
+StoreTempReal:
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	edi,EMSEG:[edi].lManLo
+;mantissa in ebx:edi, exponent in high eax, sign in ah bit 7, tag in al
+;memory destination is dseg:esi
+	mov	ecx,eax			;get copy of sign and tag
+	shr	ecx,16			;Bring exponent down
+	cmp	al,bTAG_ZERO
+	jz	StoreIEEE80		;Skip bias if zero
+	add	ecx,IexpBias-TexpBias	;Correct bias
+	cmp	al,bTAG_DEN
+	jz	Denorm80
+StoreIEEE80:
+	and	eax,bSign shl 8
+	or	ecx,eax			;Combine sign with exponent
+	mov	dseg:[esi],edi
+	mov	dseg:[esi+4],ebx
+	mov	dseg:[esi+8],cx
+
+;	jmp	PopStack
+        ret
+
+Denorm80:
+;Must change it to a denormal
+	dec	ecx
+	neg	ecx			;Use as shift count
+	cmp	cl,32			;Long shift?
+	jae	LongDenorm
+	shrd	edi,ebx,cl
+	shr	ebx,cl
+	xor	ecx,ecx			;Exponent is zero
+	jmp	StoreIEEE80
+
+LongDenorm:
+;edi must be zero if we have 32 bits to shift
+	xchg	ebx,edi			;32-bit right shift
+	shr	edi,cl			;shift count is modulo-32
+	xor	ecx,ecx			;Exponent is zero
+	jmp	StoreIEEE80
diff --git a/private/ntos/dll/i386/emtrig.asm b/private/ntos/dll/i386/emtrig.asm
new file mode 100644
index 000000000..53d981aca
--- /dev/null
+++ b/private/ntos/dll/i386/emtrig.asm
@@ -0,0 +1,863 @@
+	subttl	emtrig.asm - Trig functions sine, cosine, tangent
+	page
+;*******************************************************************************
+;	 Copyright (c) Microsoft Corporation 1991
+;	 All Rights Reserved
+;
+;emtrig.asm - Trig functions sine, cosine, tangent
+;	by Tim Paterson
+;
+;Purpose:
+;	FCOS, FPTAN, FSIN, FSINCOS instructions
+;Inputs:
+;	edi = [CURstk]
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+;XPi is the 66-bit value of Pi from the Intel manual
+XPiHi		equ	0C90FDAA2H
+XPiMid		equ	02168C234H
+XPiLo		equ	0C0000000H	;Extension of pi
+PiOver4exp	equ	-1		;Pi/4 ~= 3/4, so exponent is -1
+
+TinyAngleExp	equ	-32		;Smallest angle we bother with
+MaxAngleExp	equ	63		;Angle that's too big
+
+Trig1Result:
+;Trig function reduction routine used by functions returning 1 value
+;(FSIN and FCOS)
+;edi = [CURstk] = argument pointer
+;Argument has already been checked for zero.
+;ZF = (tag == bTAG_ZERO)
+	jb	TrigPrem
+;Tagged special
+	mov	al,EMSEG:[edi].bTAG
+	cmp	al,bTAG_DEN
+	jz	TrigDenorm
+	add	sp,4			;Don't return to caller
+	cmp	al,bTAG_INF
+	jnz	SpclDestNotDen		;Check for Empty or NAN
+	mov	EMSEG:[SWcc],C2		;Can't reduce infinity
+	jmp	ReturnIndefinite
+
+TrigDenorm:
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
+	jnz	TrigPrem		;Yes, continue
+	add	sp,4			;Don't return to caller
+TrigRet:
+	ret
+
+
+Trig2Inf:
+	mov	EMSEG:[SWcc],C2		;Can't reduce infinity
+	jmp	Trig2Indefinite
+
+Trig2StackOver:
+	mov	EMSEG:[SWcc],C1		;Signal overflow
+Trig2StackUnder:
+	mov	EMSEG:[CURerr],Invalid+StackFlag
+Trig2Indefinite:
+	add	sp,4			;Don't return to caller
+	call	ReturnIndefinite
+	jz	TrigRet			;Unmasked, don't change registers
+;Produce masked response
+	mov	EMSEG:[CURstk],esi		;Push stack
+	mov	edi,esi
+	jmp	ReturnIndefinite
+
+Trig2Special:
+	cmp	al,bTAG_DEN
+	jz	TrigDenorm
+	cmp	al,bTAG_INF
+	jz	Trig2Inf
+;Must be a NAN
+	add	sp,4			;Don't return to caller
+	call	DestNAN
+	jz	TrigRet			;Unmasked, don't change registers
+;Produce masked response
+	mov	EMSEG:[CURstk],esi		;Push stack
+        mov     eax,EMSEG:[edi].ExpSgn
+        mov     EMSEG:[esi].ExpSgn,eax
+        mov     eax,EMSEG:[edi].lManHi
+        mov     EMSEG:[esi].lManHi,eax
+        mov     eax,EMSEG:[edi].lManLo
+        mov     EMSEG:[esi].lManLo,eax
+	ret
+
+Trig2Zero:
+	add	sp,4			;Don't return to caller
+	mov	EMSEG:[CURstk],esi
+	mov	edi,esi
+;Amazing coincidence: both FSINCOS and FPTAN return the same result for
+;a zero argument:
+;	FSINCOS returns ST(0) = cos(0) = 1, ST(1) = sin(0) = 0.
+;	FPTAN returns ST(0) = 1 always, ST(1) = tan(0) = 0.
+;Return zero has same sign as argument zero, so we don't need to touch
+;it -- just push +1.0.
+	jmp	ReturnOne
+
+TrigOutOfRange:
+	mov	EMSEG:[SWcc],C2		;Signal argument not reduced
+	add	sp,4
+	ret
+
+PrevStackWrap	esi,Trig2		;Tied to PrevStackElem below
+
+Trig2Result:
+;Trig function reduction routine used by functions returning 2 values
+;(FSINCOS and FPTAN)
+;edi = [CURstk] = argument pointer
+	mov	esi,edi
+	PrevStackElem	esi,Trig2	;esi points to second result location
+	mov	al,EMSEG:[edi].bTAG	;Get tag
+	cmp	al,bTAG_EMPTY		;Stack underflow if empty
+	jz	Trig2StackUnder
+	cmp	EMSEG:[esi].bTAG,bTAG_EMPTY	;Stack overflow if not empty
+	jnz	Trig2StackOver
+	cmp	al,bTAG_ZERO		;Is it Special?
+	ja	Trig2Special
+	jz	Trig2Zero
+;Fall into TrigPrem
+
+;****
+;TrigPrem
+;
+;This routine reduces an angle in radians to the range [0, pi/4].
+;Angles in odd-numbered octants have been subtracted from pi/4.
+;It uses a 66-bit value for pi, as required by the 387.
+;TrigPrem uses the same two-stage algorithm as FPREM (see 
+;emfprem.asm).	However, it is limited to an argument < 2^63.
+;
+;Inputs:
+;	edi = [CURstk]
+;Outputs:
+;	ebx:esi = remainder, normalized
+;	high ecx = exponent, cl = tag
+;	al = octant
+;	edi = [CURstk]
+
+TrigPrem:
+	mov	EMSEG:[Result],edi
+	mov	eax,EMSEG:[edi].lManLo
+	mov	edx,EMSEG:[edi].lManHi
+	movsx	ebx,EMSEG:[edi].wExp
+	cmp	ebx,MaxAngleExp
+	jge	TrigOutOfRange
+	xor	edi,edi			;Extend dividend
+	xor	esi,esi			;Quotient, in case we skip stage 1
+.erre	PiOver4exp eq -1
+	inc	ebx			;Subtract exponent of pi/4
+	jl	ExitTrigPrem		;If dividend is smaller, return it.
+;We now know that 0 <= ExpDif < 64, so it fits in bl.
+	cmp	bl,31			;Do we need to do stage 1?
+	jl	FitPi			;No, start stage 2
+
+;FPREM stage 1
+;
+;Exponent difference is at least 31.  Use 32-bit division to compute
+;quotient and exact remainder, reducing exponent difference by 31.
+;
+;edx:eax = dividend
+;ebx = exponent difference
+
+;Shift dividend right one bit to be sure DIV instruction won't overflow
+;This means we'll be reducing the exponent difference by 31, not 32
+	xor	ebp,ebp			;Dividend extension
+	shrd	ebp,eax,1
+	shrd	eax,edx,1
+	shr	edx,1
+
+	sub	bl,31			;Exponent reduced
+	mov	ecx,XPiHi
+	div	ecx			;Guess a quotient "digit"
+
+;Check out our guess.  
+;Currently, remainder in edx = (high dividend) - (quotient * high pi).
+;(High dividend is the upper 64 bits--ebp has 1 bit.)  The definition 
+;of remainder is (all dividend) - (quotient * all pi).  So if we
+;subtract (quotient * low pi) from edx:ebp, we'll get the true 
+;remainder.  If it's negative, our guess was too big.
+
+	mov	esi,eax			;Save quotient
+	mov	ecx,edx			;Save remainder
+
+;The pi/4 we use has two bits set below the first 64 bits.  This means
+;we must add another 3/4 of the quotient into the amount to subtract,
+;which we'll compute by rounding the low 32 bits up 1, then subtracting 
+;1/4 of quotient.  But since we're computing the amount to subtract from
+;the remainder, we'll add the 1/4 of the quotient to the remainder instead
+;of subtracting it from the amount to subtract.
+
+.erre	XPiLo eq (3 shl 30)
+	mov	eax,XPiMid+1
+	mul	esi			;Quotient * low pi
+;Note that ebp is either 0 or 800...00H
+	shr	ebp,30			;Move down to low end
+	shld	ebp,esi,30		;Move back up, adding 1/4 of quotient
+	mov	edi,esi			;Another copy of quotient
+	shl	edi,30			;Keep last two bits
+;edx:eax has amount to subtract to get correct remainder from ecx:ebp:edi
+	sub	ebp,eax
+	sbb	ecx,edx			;Subtract from remainder
+	mov	eax,ebp
+	mov	edx,ecx			;Remainder back to edx:eax:edi
+	jnc	TrigPremNorm		;Was quotient OK?
+TrigCorrect:
+	dec	esi			;Quotient was too big
+	add	edi,XPiLo
+	adc	eax,XPiMid		;Add divisor back into remainder
+	adc	edx,XPiHi
+	jnc	TrigCorrect		;Repeat if quotient is still too big
+	jmp	TrigPremNorm
+
+;FPREM stage 2
+;
+;Exponent difference is less than 32.  Use restoring long division to
+;compute quotient bits until exponent difference is zero.  Note that we
+;often get more than one bit/loop:  BSR is used to scan off leading
+;zeros each time around.  Since the divisor is normalized, we can
+;instantly compute a zero quotient bit for each leading zero bit.
+
+TrigPremLoop:
+;edx:eax:edi = dividend (remainder) minus pi/4
+;esi = quotient
+;ebx = exponent difference
+;
+;If D is current dividend and p is pi/4, then we have edx:eax:edi = D - p, 
+;which is negative.  We want 2*D - p, which is positive.  
+;2*D - p = 2*(D - p) + p.
+	add	edi,edi			;2*(D - p)
+	adc	eax,eax
+	adc	edx,edx
+
+	add	edi,XPiLo		;2*(D-p) + p = 2*D - p
+	adc	eax,XPiMid
+	adc	edx,XPiHi
+
+	add	esi,esi			;Double quotient too
+	dec	ebx			;Decrement exponent difference
+PiFit:
+	inc	esi
+TrigPremNorm:
+	bsr	ecx,edx			;Find first 1 bit
+	jz	TrigPremZero
+	not	cl
+	and	cl,1FH			;Convert bit no. to shift count
+	sub	ebx,ecx			;Reduce exponent difference
+	jl	TrigTooFar
+	shld	edx,eax,cl
+	shld	eax,edi,cl
+	shl	edi,cl			;Finish normalize shift
+	shl	esi,cl			;Shift quotient
+FitPi:
+;Dividend could be larger or smaller than divisor
+	sub	edi,XPiLo
+	sbb	eax,XPiMid
+	sbb	edx,XPiHi
+	jnc	PiFit
+;Couldn't subtract pi/2 from dividend.	
+;edx:eax:edi = dividend - pi/4, which is negative
+	or	ebx,ebx			;Is exponent difference zero?
+	jg	TrigPremLoop
+;If quotient (octant number) is odd, we have subtracted an odd number of
+;pi/4's.  However, simple angle reductions work in multiples of pi/2.
+;We will keep the extra pi/4 we just subtracted if the octant was odd.
+;This will give a result range of [-pi/4, pi/4].  
+	test	esi,1			;Is octant odd?
+	jz	EvenOctant
+NegPremResult:
+;-pi/4 < dividend < 0.  Negate this since we use sign-magnitude representation.
+	not	edx			;96-bit negation
+	not	eax
+	neg	edi
+	sbb	eax,-1
+	sbb	edx,-1
+;May need to normalize
+	bsr	ecx,edx
+	jz	TrigNorm32
+	lea	ebx,[ebx+ecx-31]	;Fix up exponent for normalization
+	not	cl			;Convert bit no. to shift count
+TrigShortNorm:
+	shld	edx,eax,cl
+	shld	eax,edi,cl
+	shl	edi,cl			;Finish normalize shift
+RoundPrem:
+;Must round 66-bit result to 64 bits.
+;To perform "round even" when the round bit is set and the sticky bits
+;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
+;is set, that will always force a round up (to even) if the round bit is
+;set.  If the LSB is zero, then the sticky bits remain zero and we always
+;round down.  This rounding rule is implemented by adding RoundBit-1
+;(7F..FFH), setting CY if round up.  
+	bt	eax,0			;Is mantissa even or odd? (set CY)
+	adc	edi,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
+	adc	eax,0
+	adc	edx,0
+ExitTrigPrem:
+;edx:eax = remainder, normalized
+;esi = quotient
+;ebx = exponent difference, zero or less
+.erre	PiOver4exp eq -1
+	dec	ebx			;True exponent
+.erre	bTAG_SNGL eq 0
+	shrd	ecx,ebx,16		;Exponent to high ecx
+	mov	ebx,edx			;High mant. to ebx
+	xchg	esi,eax			;Low mant. to esi, octant to eax
+	or      esi,esi			;Any bits in low half?
+.erre   bTAG_VALID eq 1
+.erre   bTAG_SNGL eq 0
+	setnz   cl			;if low half==0 then cl=0 else cl=1
+	mov	edi,EMSEG:[CURstk]
+	test	EMSEG:[edi].bSgn,bSign	;Was angle negative?
+	jnz	FlipOct			;Yes, flip octant over
+	ret
+
+FlipOct:
+;Angle was negative.  Subtract octant from 7.
+	neg	al
+	add	al,7
+	ret
+
+EvenOctant:
+;Restore dividend
+	add	edi,XPiLo
+	adc	eax,XPiMid
+	adc	edx,XPiHi
+	jmp	RoundPrem
+
+TrigTooFar:
+;Exponent difference in ebx went negative when reduced by shift count in ecx.
+;We need a quotient corresponding to exponent difference of zero.
+	add	ecx,ebx			;Compute previous exponent difference
+	shl	esi,cl			;Fix up quotient
+	sub	ecx,ebx			;Restore shift count
+	test	esi,1			;Is octant odd?
+	jz	TrigShortNorm		;No, go normalize
+	xor	ebx,ebx			;Restore old exponent difference (zero)
+SubPiOver4:
+;We are here if exponent difference was zero and octant is odd.
+;As noted above, we need to reduce the angle by a multiple of pi/2,
+;not pi/4.  We will subtract one more pi/4, which will make the
+;result range [-pi/4, pi/4].
+	sub	edi,XPiLo
+	sbb	eax,XPiMid
+	sbb	edx,XPiHi
+	jmp	NegPremResult
+
+TrigPremZero:
+;High dword of remainder is all zero, so we've reduced exponent difference
+;by 32 bits and overshot.  We need a quotient corresponding to exponent 
+;difference of zero, so we just shift it by the original difference.  Then
+;we need to normalize the rest of the remainder.
+	mov	ecx,ebx			;Get exponent difference
+	shl	esi,cl			;Fix up quotient
+	test	esi,1			;Is octant odd?
+	jnz	SubPiOver4		;Yes, go subtract another pi/4
+TrigNorm32:
+	bsr	ecx,eax
+	jz	TinyTrig
+	lea	ebx,[ebx+ecx-31-32]	;Fix up exponent for normalization
+	mov	edx,eax
+	mov	eax,edi			;Shift left by 32 bits
+	not	cl			;Convert bit no. to shift count
+	shld	edx,eax,cl		;Normalize remainder
+	shl	eax,cl
+	jmp	ExitTrigPrem
+
+TinyTrig:
+;Upper 64 bits of remainder are all zero.  We are assured that the extended
+;remainder is never zero, though.
+	mov	edx,edi			;Shift left 64 bits
+	bsr	ecx,edi
+	lea	ebx,[ebx+ecx-31-64]	;Fix up exponent for normalization
+	not	cl			;Convert bit no. to shift count
+	shl	edx,cl			;Normalize
+	jmp	ExitTrigPrem
+
+;*******************************************************************************
+
+EM_ENTRY eFCOS
+eFCOS:
+    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
+	cmp	EMSEG:[edi].bTAG,bTAG_ZERO
+	jz	ReturnOne
+	call	Trig1Result
+;ebx:esi,ecx = reduced argument
+;eax = octant
+	mov	ch,80H			;Assume negative
+	test	al,110B			;Negative in octants 2 - 5
+	jpo	@F			;Occurs when 1 of these bits are set
+	xor	ch,ch			;Actually positve
+@@:
+	test	al,011B			;Look for octants 0,3,4,7
+	jpo	TakeSine		;Use sine if not
+TakeCosine:
+	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
+	jl	CosReturnOne		;cos(x) = 1 for tiny x
+CosNotTiny:
+	mov	edi,offset tCosPoly
+;Note that argument needs to be saved in ArgTemp (by EvalPolySetup) in case 
+;we were called from eFSINCOS and we'll need the arg for the sine.  Argument
+;is not needed for cosine, however (just its square).
+	call	EvalPolySetup		;In emftran.asm
+	mov	ch,EMSEG:[ArgTemp].bSgn	;Get sign we already figured out
+TransUnround:
+;The last operation performed a simple round nearest, without setting the 
+;C1 status bit if round up occured.  We reverse this last rounding now
+;so we can do the user's selected rounding mode.  We also ensure that
+;the answer is never exact.
+	sub	eax,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
+	jz	UnroundExact		;Answer looks exact, but it's not
+	sbb	esi,0
+	sbb	ebx,0
+	jns	PolyDropExponent	;We had rounded up exponent too
+FinalTransRound:
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow.  Unmasked underflow requires [RoundMode] be set.
+	mov	edx,EMSEG:[TransRound]
+	mov	EMSEG:[RoundMode],edx
+	call	edx			;Perform user's rounding
+RestoreRound:
+;Restore rounding vectors
+	mov	EMSEG:[ZeroVector],offset SaveResult
+	mov	eax,EMSEG:[SavedRoundMode]
+	mov	EMSEG:[RoundMode],eax
+	ret
+
+UnroundExact:
+	inc	eax			;Let's say our answer is a bit small
+	jmp	FinalTransRound
+
+PolyDropExponent:
+	sub	ecx,1 shl 16		;Decrement exponent
+	or	ebx,1 shl 31		;Set MSB
+	jmp	FinalTransRound
+
+
+SinRet:
+	ret
+
+SaveTinySin:
+;Argument in ebx:esi,ecx is small enough so that sin(x) = x, which happens
+;when x - x^3/6 = x [or 1 - x^2/6 = 1].  Note that the infinitely precise
+;result is slightly less than the argument.  To get the correct answer for
+;any rounding mode, we decrement the argument and set up for rounding.
+	mov	eax,-1			;Set up rounding bits
+	sub	esi,1
+	sbb	ebx,0			;Drop mantissa by one
+	js	FinalTransRound		;Still normalized?
+;mantissa must have been 800..000H, set it to 0FFF...FFFH and drop exponent
+	mov	ebx,eax			;ebx = -1
+	sub	ecx,1 shl 16		;Drop exponent by one
+	jmp	FinalTransRound
+
+
+EM_ENTRY eFSIN
+eFSIN:
+    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
+	cmp	EMSEG:[edi].bTAG,bTAG_ZERO
+	jz	SinRet			;Return zero for zero argument
+	call	Trig1Result
+	mov	ch,al
+	shl	ch,7-2			;Move bit 2 to bit 7 as sign bit
+ReducedSine:
+;ebx:esi,ecx = reduced argument
+;ch = correct sign
+;eax = octant
+	test	al,011B			;Look for octants 0,3,4,7
+	jpo	TakeCosine		;Use cosine if not
+TakeSine:
+	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
+	jl	SaveTinySin		;sin(x) = x for tiny x
+
+;The polynomial for sine is  sin(x) = x * P(x^2).  However, the degree zero
+;coefficient of P() is 1, so  P() = R() + 1, where R() has no degree zero
+;term.	Thus  sin(x) = x * [R(x^2) + 1] = x * R(x^2) + x.
+;
+;What's important here is that adding 1 to R(x^2) can blow away a lot of
+;precision just before we do that last multiply by x.  Note that x < pi/4 < 1,
+;so that x^2 is often << 1.  The precision is lost when R(x^2) is shifted
+;right to align its binary point with 1.0.  This can cause a loss of at
+;least 1 bit of precision after the final multiply by x in addition to 
+;rounding errors.
+;
+;To avoid this precision loss, we use the alternate form given above,
+;sin(x) = x * R(x^2) + x.  Instead of adding 1.0 and multiplying by x,
+;we multiply by x and add x--exactly the same level of difficulty.  But
+;the mulitply has all of R(x^2)'s precision available.
+;
+;Because the polynomial R() has no zero-degree term, we give EvalPoly
+;one degree less (so we don't have to add zero as the last term).
+;Then we have to multiply once more by x^2 since we left the loop early.
+
+SineNotTiny:
+	mov	edi,offset tSinPoly
+	call	EvalPolySetup		;In emftran.asm
+SineFinish:
+
+ifdef NT386
+        mov	edi,YFloatTemp
+else
+	mov	edi,offset edata:FloatTemp
+endif
+	call	PolyMulDouble		;Last coefficient in R(x^2)
+
+ifdef NT386
+	mov	edi,YArgTemp		;Point to original x
+else
+	mov	edi,offset edata:ArgTemp ;Point to original x
+endif
+
+	call	PolyMulDouble		;Compute x * R(x^2)
+
+ifdef NT386
+	mov	edi,YArgTemp		;Point to original x
+else
+	mov	edi,offset edata:ArgTemp ;Point to original x
+endif
+
+	push	offset TransUnround
+	jmp	PolyAddDouble		;Compute x * R(x^2) + x
+
+
+EM_ENTRY eFPTAN
+eFPTAN:
+    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
+	call	Trig2Result
+	push	offset TanPushOne	; Push 1.0 when we're all done
+;ebx:esi,ecx = reduced argument
+;eax = octant
+	mov	ch,al
+	shl	ch,7-1			;Move bit 1 to bit 7 as sign bit
+;Note that ch bit 6 now has even/odd octant, which we'll need when we're
+;done to see if we should take reciprocal.
+	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
+	jl	TinyTan
+	mov	edi,offset tTanPoly
+	call	Eval2Poly		;In emftran.asm
+	mov	edi,EMSEG:[CURstk]	;Point to first result
+	push	offset TransUnround	;Return address of divide
+	test	EMSEG:[ArgTemp].bSgn,0C0H	;Check low 2 bits of octant
+;Given the reduced input range, the result can never overflow or underflow.
+;It is must then be safe to assume neither operand is zero.
+	jpe	DivDouble		;Tan() octants 0,3,4,7
+	jmp	DivrDouble		;CoTan()
+
+TinyTan:
+	test	ch,0C0H			;Check low 2 bits of octant
+	jpe	SaveTinySin		;Octants 0,3,4,7: tan(x) = x for tiny x
+;Need reciprocal of reduced argument
+	mov	edi,esi
+	mov	esi,ebx			;Mantissa in esi:edi
+	mov	ebx,ecx			;ExpSgn to ebx
+	mov	edx,1 shl 31		;Load 1.0
+	xor	eax,eax
+.erre	TexpBias eq 0
+	xor	ecx,ecx			;Sign and exponent are zero
+;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
+;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
+	push	offset TransUnround	;Return address of divide
+;Note that this can never overflow, because the reduced argument is never
+;smaller than about 2^-65.
+	jmp	DivDoubleReg
+
+
+PrevStackWrap	edi,Tan			;Tied to PrevStackElem below
+
+TanPushOne:
+	PrevStackElem	edi,Tan		;edi points to second result location
+	mov	EMSEG:[CURstk],edi
+ReturnOne:
+	mov	EMSEG:[edi].lManLo,0
+	mov	EMSEG:[edi].lManHi,1 shl 31
+	mov	EMSEG:[edi].ExpSgn,(0-TexpBias) shl 16 + bTAG_SNGL
+	ret
+
+
+PrevStackWrap	edi,SinCos		;Tied to PrevStackElem below
+
+eFSINCOS:
+    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
+	call	Trig2Result
+;Figure out signs
+	mov	ch,al			;Start with sign of sine
+	shl	ch,7-2			;Move bit 2 to bit 7 as sign bit
+	mov	ah,80H			;Assume sign of cosine is negative
+	test	al,110B			;Negative in octants 2 - 5
+	jpo	@F			;Occurs when 1 of these bits are set
+	xor	ah,ah			;Actually positve
+@@:
+;ch = sign of sine
+;ah = sign of cosine
+	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
+	jl	TinySinCos
+	push	eax			;Save octant and sign of cosine
+	call	ReducedSine		;On exit, edi = [CURstk]
+	pop	eax
+;The Sin() funcion restored the rounding vectors to normal.  Set them back.
+	mov	EMSEG:[RoundMode],offset PolyRound
+	mov	EMSEG:[ZeroVector],offset PolyZero
+	PrevStackElem	edi,SinCos	;edi points to second result location
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[Result],edi
+;Load x^2 back into registers
+	mov	ecx,EMSEG:[FloatTemp].ExpSgn
+	mov	ebx,EMSEG:[FloatTemp].lManHi
+	mov	esi,EMSEG:[FloatTemp].lManLo
+	mov	EMSEG:[ArgTemp].bSgn,ah	;Save sign
+	test	al,011B			;Look for octants 0,3,4,7
+	jpo	FastSine		;Use sine if not
+	mov	edi,offset tCosPoly
+	call	EvalPoly		;In emftran.asm
+	mov	ch,EMSEG:[ArgTemp].bSgn	;Get sign we already figured out
+	jmp	TransUnround
+
+FastSine:
+	mov	edi,offset tSinPoly
+	push	offset SineFinish
+	jmp	EvalPoly		;In emftran.asm
+
+TinySinCos:
+;ch = sign of sine
+;ah = sign of cosine
+;ebx:esi,high ecx = reduced argument
+;edi = [CURstk]
+	test	al,011B			;Look for octants 0,3,4,7
+	jpo	TinyCosSin		;Take cosine first if not
+	push	eax
+	call	SaveTinySin		;For sine, arg is result
+	pop	ecx
+;edi = [CURstk]
+;ch = sign of cosine
+;Set cosine to 1.0
+	PrevStackElem	edi,TinySinCos	;edi points to second result location
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[Result],edi
+CosReturnOne:
+;Cosine is nearly equal to 1.0.  Put in next smaller value and round it.
+	mov	ebx,-1
+	mov	esi,ebx			;Set mantissa to -1
+	mov	eax,ebx			;Set up rounding bits
+.erre	TexpBias eq 0
+	and	ecx,bSign shl 8		;Keep only sign
+	sub	ecx,1 shl 16		;Exponent of -1
+;A jump through [TransRound] is only valid if the number is known not to
+;underflow.  Unmasked underflow requires [RoundMode] be set.
+	jmp	EMSEG:[TransRound]
+
+	PrevStackWrap	edi,TinySinCos
+
+	PrevStackWrap	edi,TinyCosSin
+
+TinyCosSin:
+;Sine is nearly 1.0, cosine is argument
+;
+;ch = sign of sine
+;ah = sign of cosine
+;ebx:esi,high ecx = reduced argument
+;edi = [CURstk]
+	xchg	ah,ch			;Cosine sign to ch, sine sign to ah
+	push	edi			;Save place for sine
+	PrevStackElem	edi,TinyCosSin	;edi points to second result location
+	mov	EMSEG:[CURstk],edi
+	mov	EMSEG:[Result],edi
+	push	eax
+	call	SaveTinySin		;For sine, arg is result
+	pop	ecx
+;ch = sign of sine
+	pop	EMSEG:[Result]		;Set up location for sine
+	jmp	CosReturnOne
+
+;*******************************************************************************
+
+;********************* Polynomial Coefficients *********************
+
+;These polynomial coefficients were all taken from "Computer Approximations"
+;by J.F. Hart (reprinted 1978 w/corrections).  All calculations and 
+;conversions to hexadecimal were done with a character-string calculator
+;written in Visual Basic with precision set to 30 digits.  Once the constants
+;were typed into this file, all transfers were done with cut-and-paste
+;operations to and from the calculator to help eliminate any typographical
+;errors.
+
+
+tCosPoly	label	word
+
+;These constants are derived from Hart #3824: cos(x) = P(x^2),
+;accurate to 19.45 digits over interval [0, pi/4].  The original 
+;constants in Hart required that the argument x be divided by pi/4.  
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by a power of 4/pi.
+;The power is given in the table.
+
+	dd	7			;Degree seven
+
+;  Original Hart constant	      power	Scaled constant
+;
+;-0.38577 62037 2		 E-12  14  -0.113521232057839395845871741043E-10
+;Hex value:    0.C7B56AF786699CF1BD13FD290 HFFDC
+	dq	0C7B56AF786699CF2H
+	dw	(bSign shl 8)+bTAG_VALID,0FFDCH-1
+
+;+0.11500 49702 4263		  E-9  12  +0.208755551456778828747793797596E-8
+;Hex value:    0.8F74AA3CCE49E68D6F5444A18 HFFE4
+	dq	08F74AA3CCE49E68DH
+	dw	bTAG_VALID,0FFE4H-1
+
+;-0.24611 36382 63700 5		  E-7  10  -0.275573128656960822243472872247E-6
+;Hex value:    0.93F27B7F10CC8A1703EFC8A04 HFFEB
+	dq	093F27B7F10CC8A17H
+	dw	(bSign shl 8)+bTAG_VALID,0FFEBH-1
+
+;+0.35908 60445 88581 953	  E-5	8  +0.248015872828994630247806807317E-4
+;Hex value:    0.D00D00CD6BB3ECD17E10D5830 HFFF1
+	dq	0D00D00CD6BB3ECD1H
+	dw	bTAG_VALID,0FFF1H-1
+
+;-0.32599 18869 26687 55044	  E-3	6  -0.138888888888589604343951947246E-2
+;Hex value:    0.B60B60B609B165894CFE522AC HFFF7
+	dq	0B60B60B609B16589H
+	dw	(bSign shl 8)+bTAG_VALID,0FFF7H-1
+
+;+0.15854 34424 38154 10897 54	  E-1	4  +0.416666666666664302573692446873E-1
+;Hex value:    0.AAAAAAAAAAA99A1AF53042B08 HFFFC
+	dq	0AAAAAAAAAAA99A1BH
+	dw	bTAG_VALID,0FFFCH-1
+
+;-0.30842 51375 34042 45242 414	  E0	2  -0.499999999999999992843582920899E0
+;Hex value:    0.FFFFFFFFFFFFFEF7F98D3BFA8 HFFFF
+	dq	0FFFFFFFFFFFFFEF8H
+	dw	(bSign shl 8)+bTAG_VALID,0FFFFH-1
+
+;+0.99999 99999 99999 99996 415	  E0	0  (no change)
+;Hex value     0.FFFFFFFFFFFFFFFF56B402618 H0
+	dq	0FFFFFFFFFFFFFFFFH
+	dw	bTAG_VALID,00H-1
+
+
+tSinPoly	label	word
+
+;These constants are derived from Hart #3044: sin(x) = x * P(x^2),
+;accurate to 20.73 digits over interval [0, pi/4].  The original 
+;constants in Hart required that the argument x be divided by pi/4.  
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by a power of 4/pi.
+;The power is given in the table.
+
+	dd	7-1			;Degree seven, but the last coefficient
+					;is 1.0 and is not listed here.
+
+;  Original Hart constant	      power	Scaled constant
+;
+;-0.20225 31292 93		 E-13  15  -0.757786788401271156262125540409E-12
+;Hex value:    0.D54C4AF2B524F0F2D6411C90A HFFD8
+	dq	0D54C4AF2B524F0F3H
+	dw	(bSign shl 8)+bTAG_VALID,0FFD8H-1
+
+;+0.69481 52035 0522		 E-11  13  +0.160583476232246065559545749398E-9
+;Hex value:    0.B0903AF085DA66030F16E43BC HFFE0
+	dq	0B0903AF085DA6603H
+	dw	bTAG_VALID,0FFE0H-1
+
+;-0.17572 47417 61708 06	  E-8  11  -0.250521047382673309542092418731E-7
+;Hex value:    0.D73229320D2AF05971AC96FF4 HFFE7
+	dq	0D73229320D2AF059H
+	dw	(bSign shl 8)+bTAG_VALID,0FFE7H-1
+
+;+0.31336 16889 17325 348	  E-6	9  +0.275573192133901687156480447942E-5
+;Hex value:    0.B8EF1D2984D2FBA28A9CC9DEE HFFEE
+	dq	0B8EF1D2984D2FBA3H
+	dw	bTAG_VALID,0FFEEH-1
+
+;-0.36576 20418 21464 00052 9	  E-4	7  -0.198412698412531058609618529749E-3
+;Hex value:    0.D00D00D00C3FDDD7916E5CB28 HFFF4
+	dq	0D00D00D00C3FDDD8H
+	dw	(bSign shl 8)+bTAG_VALID,0FFF4H-1
+
+;+0.24903 94570 19271 62752 519	  E-2	5  +0.83333333333333203341753387264E-2
+;Hex value:    0.8888888888884C95D619A0343 HFFFA
+	dq	08888888888884C96H
+	dw	bTAG_VALID,0FFFAH-1
+
+;-0.80745 51218 82807 81520 2582  E-1	3  -0.166666666666666666281276062229E0
+;Hex value:    0.AAAAAAAAAAAAAA8E3AD80EAB8 HFFFE
+	dq	0AAAAAAAAAAAAAA8EH
+	dw	(bSign shl 8)+bTAG_VALID,0FFFEH-1
+
+;+0.78539 81633 97448 30961 41845 E0	1  +0.99999999999999999999812025812E0
+;Hex value:    0.FFFFFFFFFFFFFFFFF71F88110 H0
+;	dq	8000000000000000H	;This constant of 1.0 omitted here.
+;	dw	bTAG_VALID,0		;   It is handled in code.
+
+
+tTanPoly	label	word
+
+;These constants are derived from Hart #4286: tan(x) = x * P(x^2) / Q(x^2),
+;accurate to 19.94 digits over interval [0, pi/4].  The original 
+;constants in Hart required that the argument x be divided by pi/4.  
+;These constants have been scaled so this is no longer required.
+;Scaling is done by multiplying the constant by the same power of 4/pi
+;as the power of x the constant is used on.  However, the highest
+;degree coefficient of Q() is 1, and after scaling this way it would
+;become (4/pi)^8.  In order to keep this coefficient equal to one,
+;we scale everything again by (pi/4)^8.  This scaling is partially
+;canceled by the original scaling by powers of 4/pi, and the net
+;resulting power of pi/4 is given in the table.
+
+
+	dd	3			;First poly is degree 3
+
+;  Original Hart constant	        power	Scaled constant
+;
+;-.45649 31943 86656 31873 96113 7    E2  1  -35.8528916474714232910463077546
+;Hex value:    0.8F695C6D93AF6F97B6E022AB3 H6
+        dq      08F695C6D93AF6F98H
+        dw      (bSign shl 8)+bTAG_VALID,06H-1
+
+;+.14189 85425 27617 78388 00394 831  E5  3  +6874.60229709782436592720603503
+;Hex value:    0.D6D4D181240D0D08C88DF4AA6 HD
+        dq      0D6D4D181240D0D09H
+        dw      bTAG_VALID,0DH-1
+
+;-.89588 84400 67680 41087 29639 541  E6  5  -267733.884797157298951145495276
+;Hex value:    0.82BABC504220C62B1D0722684 H13
+        dq      082BABC504220C62BH
+        dw      (bSign shl 8)+bTAG_VALID,013H-1
+
+;+.10888 60043 72816 87521 38857 983  E8  7  +2007248.9111748838841548144685
+;Hex value:    0.F506874A160EB9C0994AADD6A H15
+        dq      0F506874A160EB9C1H
+        dw      bTAG_VALID,015H-1
+
+
+
+	dd	4			;Second poly is degree 4
+;NOTE: Eval2Poly assumes the first coefficient is 1.0, so it is omitted
+
+;  Original Hart constant	        power	Scaled constant
+;
+;-.10146 56190 25288 53387 54401 947  E4  2  -625.890950057027419879480354834
+;Hex value:    0.9C790553635355A95241A5324 HA
+        dq      09C790553635355A9H
+        dw      (bSign shl 8)+bTAG_VALID,0AH-1
+
+;+.13538 27128 05119 09382 89294 872  E6  4  +51513.6992033752080924797647367
+;Hex value:    0.C939B2FEFE0DC585E649870FE H10
+        dq      0C939B2FEFE0DC586H
+        dw      bTAG_VALID,010H-1
+
+;-.39913 09518 03516 51504 43427 94   E7  6  -936816.855188785264866481436899
+;Hex value:    0.E4B70DAEDA6F89E5A7CE626FA H14
+        dq      0E4B70DAEDA6F89E6H
+        dw      (bSign shl 8)+bTAG_VALID,014H-1
+
+;+.13863 79666 35676 29165 33913 361  E8  8  +2007248.91117488388417770850458
+;Hex value:    0.F506874A160EB9C0CCD8313BC H15
+        dq      0F506874A160EB9C1H
+        dw      bTAG_VALID,015H-1
diff --git a/private/ntos/dll/i386/emulator.asm b/private/ntos/dll/i386/emulator.asm
new file mode 100644
index 000000000..73a2c1c36
--- /dev/null
+++ b/private/ntos/dll/i386/emulator.asm
@@ -0,0 +1,267 @@
+        page    78,132
+        title   emulator - 80387 emulator for flat 32-bit OS
+;*******************************************************************************
+;        Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;emulator.asm -  80387 emulator
+;       by Tim Paterson
+;
+;Revision History:
+;
+; []    09/05/91  TP    Initial 32-bit version.
+; []    11/13/92  JWM   Bug fixes for esp-indexed addressing, handling of denormals.
+; []    01/18/93  JWM   Bug fixes for preservation of condition & error codes.
+;
+;*******************************************************************************
+
+        .386p
+        .387
+        .model  flat,Pascal
+        option oldstructs                               ;JWM
+
+;*******************************************************************************
+;
+;   Define segments.
+;
+;*******************************************************************************
+
+
+;These equates give access to the program that's using floating point.
+dseg    equ     ss                      ;Segment of program's data
+cseg    equ     es                      ;Segment of program's code
+
+edata           segment dword public 'FAR_DATA'
+edata           ends
+
+ecode           segment dword public 'CODE'
+ecode           ends
+
+
+assume  cs:ecode
+
+ifdef NT386
+assume ds:nothing
+assume fs:edata
+else
+assume ds:edata
+assume fs:nothing
+endif
+
+assume  es:nothing
+assume  gs:nothing
+assume  ss:nothing
+
+ifdef NT386
+        include  ks386.inc
+        include  nt386npx.inc
+        include  callconv.inc
+        include ..\..\vdm\i386\vdmtb.inc
+endif                           ; NT386
+
+;*******************************************************************************
+;
+;   List external functions.
+;
+;*******************************************************************************
+
+ifdef  NT386
+        EXTRNP   _NtRaiseException,3
+        EXTRNP   _RtlRaiseStatus,1
+        EXTRNP   _ZwRaiseException,3
+        EXTRNP   _NpxNpSkipInstruction,1
+endif           ; NT386
+
+ifdef _DOS32EXT
+        extern  _SelKrnGetEmulData:NEAR
+        extern  DOS32RAISEEXCEPTION:NEAR
+endif           ; _DOS32EXT
+
+ifdef _CRUISER
+        extern  DOS32IRAISEEXCEPTION:near
+endif           ; CRUISER
+
+
+;*******************************************************************************
+;
+;   Segment override macro (for NT)
+;
+;*******************************************************************************
+
+ifdef NT386
+        EMSEG EQU FS
+else
+        EMSEG EQU DS
+endif
+
+;;*******************************************************************************
+;;
+;;   Include some more macros and constants.
+;;
+;;*******************************************************************************
+;
+        include em387.inc
+        include emstack.inc             ; stack management macros
+;**************************************************************************
+;**************************************************************************
+;**************************************************************************
+subttl  emulator.asm - Emulator Task DATA Segment
+page
+;*********************************************************************;
+;                                                                     ;
+;                 Emulator Task DATA Segment                          ;
+;                                                                     ;
+;*********************************************************************;
+
+edata   segment
+
+ifdef NT386
+        db size EmulatorTebData dup (?) ; Make space for varibles
+else					; ifdef NT386
+
+Numlev          equ     8               ; Number of stack registers
+
+InitControlWord	equ	37FH		; Default - Round near,
+					; 64 bits, all exceptions masked
+
+RoundMode       dd      ?               ;Address of rounding routine
+SavedRoundMode  dd      ?               ;For restoring RoundMode
+ZeroVector      dd      ?               ;Address of sum-to-zero routine
+TransRound      dd      ?               ;Round mode w/o precision
+Result          dd      ?               ;Result pointer
+
+PrevCodeOff     dd      ?
+PrevDataOff     dd      ?
+
+(See note below on 'Emulator stack area')
+CURstk          dd      ?
+
+XBEGstk		db	(Numlev-1)*Reg87Len dup(?)	;Allocate register 1 - 7
+
+BEGstk EQU offset edata:XBEGstk
+INITstk EQU offset edata:XINITstk
+ENDstk EQU offset edata:XENDstk
+
+FloatTemp       db      Reg87Len dup(?)
+ArgTemp         db      Reg87Len dup(?)
+
+public Trap7Handler
+Trap7Handler    dd      0
+
+;We're DWORD aligned at this point
+
+LongStatusWord  label   dword           ;Combined Einstall, CURerr, StatusWord
+.erre   Einstall eq $
+.erre   StatusWord eq $+1
+.erre   CURerr eq $+3
+
+Einstall        db      0               ; Emulator installed flag
+
+StatusWord      label   word
+    SWerr       db      ?               ; Initially no exceptions (sticky flags)
+CurErrCond      label   word            ; Combined error and condition codes
+    SWcc        db      ?               ; Condition codes from various operations
+
+    CURerr      db      ?               ; initially 8087 exception flags clear
+                                        ; this is the internal flag reset after
+                                        ; each operation to detect per instruction
+                                        ; errors
+
+LongControlWord label   dword           ;Combined ControlWord and ErrMask
+.erre   ControlWord eq $
+.erre   ErrMask eq $+2
+
+ControlWord     label   word
+    CWmask      db      ?               ; exception masks
+    CWcntl      db      ?               ; arithmetic control flags
+
+    ErrMask     db      ?
+    dummy       db      ?
+
+endif                                   ; ifdef NT386 else
+
+;*******************************************************************************
+;
+; Emulator stack area
+;
+;The top of stack pointer CURstk is initialized to the last register 
+;in the list; on a real 8087, this corresponds to hardware register 0.
+;The stack grows toward lower addresses, so the first push (which is
+;hardware register 7) is stored into the second-to-last slot.  This gives
+;the following relationship between hardware registers and memory
+;locations:
+;
+; BEGstk --> |    reg 1    |  (lowest memory address)
+; 	     |    reg 2    |
+; 	     |    reg 3    |
+; 	     |    reg 4    |
+; 	     |    reg 5    |
+; 	     |    reg 6    |
+; 	     |    reg 7    |
+; 	     |    reg 0    |  <-- Initial top of stack (empty)
+; ENDstk -->
+;
+;This means that the wrap-around case on decrementing CURstk will not
+;occur until the last (8th) item is pushed.
+;
+;Note that the physical register numbers are only used in regard to
+;the tag word.  All other operations are relative the current top.
+
+
+edata	ends
+
+subttl  emulator.asm
+page
+;*********************************************************************;
+;                                                                     ;
+;               Start of Code Segment                                 ;
+;                                                                     ;
+;*********************************************************************;
+
+
+ecode segment
+
+        public  __fpemulatorbegin
+__fpemulatorbegin equ       $           ; emulator really starts here
+
+        include emfinit.asm
+        include emerror.asm             ; error handler
+        include emdisp.asm              ; dispatch tables
+
+        include emf386.asm              ; Flat 386 emulation entry
+        include emdecode.asm            ; instruction decoder
+
+        include emarith.asm             ; arithmetic dispatcher
+        include emfadd.asm              ; add and subtract
+        include emfmul.asm              ; multiply
+        include emfdiv.asm              ; division
+        include emround.asm             ; rounding
+        include emload.asm              ; load memory operands
+        include emstore.asm             ; store memory operands
+        include emfmisc.asm             ; miscellaneous instructions
+        include emfcom.asm              ; compare
+        include emfconst.asm            ; constant loading
+        include emlsbcd.asm             ; packed BCD conversion
+        include emxtract.asm            ; xtract and scale
+        include emfprem.asm             ; partial remainder
+        include emtrig.asm              ; trig instructions
+        include emftran.asm             ; transcendentals
+        include emlsenv.asm
+        include emfsqrt.asm             ; square root
+ifndef NT386
+        include emccall.asm
+endif
+
+UNUSED:
+eFSETPM:
+eFNOP:
+eFENI:
+eFDISI:
+        ret                     ;Return to EMLFINISH
+
+
+        public  __fpemulatorend
+__fpemulatorend equ     $       ; emulator ends here
+
+ecode   ends
+END
diff --git a/private/ntos/dll/i386/emxtract.asm b/private/ntos/dll/i386/emxtract.asm
new file mode 100644
index 000000000..ac8f7a6d1
--- /dev/null
+++ b/private/ntos/dll/i386/emxtract.asm
@@ -0,0 +1,309 @@
+	subttl	emxtract - FXTRACT and FSCALE instructions
+        page
+;*******************************************************************************
+;emxtract - FXTRACT and FSCALE instructions
+;
+;        Microsoft Confidential
+;
+;	 Copyright (c) Microsoft Corporation 1991
+;        All Rights Reserved
+;
+;Inputs:
+;	edi = [CURstk]
+;
+;Revision History:
+;
+; []	09/05/91  TP	Initial 32-bit version.
+;
+;*******************************************************************************
+
+
+XtractStackOver:
+	mov	EMSEG:[SWcc],C1		;Flag stack overflow
+XtractEmpty:
+;Result is two Indefinites (if exception masked)
+	call	StackError		;Put first indefinite at [edi] = ST(0)
+	jz	XtractExit		;Error was unmasked--just exit
+	mov	EMSEG:[CURstk],edi
+        mov     eax,EMSEG:[edi].ExpSgn
+        mov     EMSEG:[esi].ExpSgn,eax
+        mov     eax,EMSEG:[edi].lManHi
+        mov     EMSEG:[esi].lManHi,eax
+        mov     eax,EMSEG:[edi].lManLo
+        mov     EMSEG:[esi].lManLo,eax
+	ret
+
+	PrevStackWrap	edi,Xtract
+
+EM_ENTRY eFXTRACT
+eFXTRACT:
+;edi = [CURstk]
+	mov	esi,edi			;Save current ST
+	PrevStackElem	edi,Xtract
+;edi = ST(0)
+;esi = ST(1) (operand)
+	mov	eax,EMSEG:[esi].ExpSgn
+;Exception priority requires reporting stack underflow (i.e., using an EMPTY)
+;before stack overflow (i.e., no place for result).  Yes, both can happen
+;together if they've screwed with the stack! (ST empty when ST(-1) isn't).
+	cmp	al,bTAG_EMPTY		;Is operand empty?
+	jz	XtractEmpty
+	cmp	EMSEG:[edi].bTag,bTAG_EMPTY	;Is there an empty spot?
+	jnz	XtractStackOver
+	cmp	al,bTAG_ZERO		;Is it special?
+	jae	XtractSpclOrZero
+XtractNormal:
+	mov	EMSEG:[CURstk],edi
+.erre   TexpBias eq 0
+        movzx   ebx,ax                  ;Zero exponent
+;Save mantissa in ST(0)
+        mov     EMSEG:[edi].ExpSgn,ebx
+        mov     ebx,EMSEG:[esi].lManHi
+        mov     EMSEG:[edi].lManHi,ebx
+        mov     ebx,EMSEG:[esi].lManLo
+        mov     EMSEG:[edi].lManLo,ebx
+	mov	edi,esi			;Save ST(1) pointer in edi
+	shr	eax,16			;Move exponent down
+	call	NormInt16		;in emload.asm
+;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl
+	mov	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].lManHi,ebx
+	mov	EMSEG:[edi].ExpSgn,ecx
+XtractExit:
+	ret
+
+XtractSpcl:
+	cmp	al,bTAG_INF
+	jz	XtractInf
+	cmp	al,bTAG_NAN
+	jz	XtractNAN
+;Must be denormal.  Change tag to VALID or SNGL.
+	cmp	EMSEG:[esi].lManLo,0		;Any bits in low half?
+.erre	bTAG_VALID eq 1
+.erre	bTAG_SNGL eq 0
+	setnz	al			;if low half==0 then al=0 else al=1
+	mov	EMSEG:[CURerr],Denormal
+	test	EMSEG:[CWmask],Denormal	;Is it masked?
+	jnz	XtractNormal		;If so, ignore denormalization
+	ret
+
+XtractSpclOrZero:
+	ja	XtractSpcl
+;Operand is zero.  Result is ST(0) = 0 (same sign), ST(1) = -infinity
+	mov	EMSEG:[CURerr],ZeroDivide
+	test	EMSEG:[CWmask],ZeroDivide	;Exception masked?
+	jz	XtractExit
+	mov	EMSEG:[CURstk],edi
+        mov     EMSEG:[edi].ExpSgn,eax
+        mov     eax,EMSEG:[esi].lManHi
+        mov     EMSEG:[edi].lManHi,eax
+        mov     eax,EMSEG:[esi].lManLo
+        mov     EMSEG:[edi].lManLo,eax
+	mov	EMSEG:[esi].ExpSgn,(IexpMax-IexpBias+TexpBias) shl 16 + bSign shl 8 + bTAG_INF
+	mov	EMSEG:[esi].bMan7,80H	;Change zero to infinity
+	ret
+
+XtractInf:
+;Result is ST(0) = infinity (same sign), ST(1) = +infinity
+        mov     EMSEG:[esi].bSgn,0            ;Ensure ST(1) is positive
+XtractQNAN:
+        mov     EMSEG:[CURstk],edi
+        mov     EMSEG:[edi].ExpSgn,eax
+        mov     eax,EMSEG:[esi].lManHi
+        mov     EMSEG:[edi].lManHi,eax
+        mov     eax,EMSEG:[esi].lManLo
+        mov     EMSEG:[edi].lManLo,eax
+        ret
+
+XtractNAN:
+;Result is two QNANs, signal Invalid Operation if SNAN
+	test	EMSEG:[esi].bMan7,40H		;Is it SNAN?
+	jnz	XtractQNAN
+	mov	EMSEG:[CURerr],Invalid
+	test	EMSEG:[CWmask],Invalid
+	jz	XtractExit
+	or	EMSEG:[esi].bMan7,40H		;Change to QNAN
+        jmp     XtractQNAN
+
+;*******************************************************************************
+;
+;FSCALE instruction
+
+;Actual instruction entry point is in emarith.asm
+
+;Dispatch table for scale
+;
+;One operand has been loaded into ecx:ebx:esi ("source"), the other is
+;pointed to by edi ("dest").  
+;
+;Tag of source is shifted.  Tag values are as follows:
+
+.erre	TAG_SNGL	eq	0	;SINGLE: low 32 bits are zero
+.erre	TAG_VALID	eq	1
+.erre	TAG_ZERO	eq	2
+.erre	TAG_SPCL	eq	3	;NAN, Infinity, Denormal, Empty
+
+;Any special case routines not found in this file are in emarith.asm
+
+tFscaleDisp	label	dword		;Source (reg)	Dest (*[di] = ST)
+	dd	ScaleDouble		;single		single
+	dd	ScaleDouble		;single		double
+	dd	ScaleX			;single		zero
+	dd	ScaleSpclDest		;single		special
+	dd	ScaleDouble		;double		single
+	dd	ScaleDouble		;double		double
+	dd	ScaleX			;double		zero
+	dd	ScaleSpclDest		;double		special
+	dd	ScaleX			;zero		single
+	dd	ScaleX			;zero		double
+	dd	ScaleX			;zero		zero
+	dd	ScaleSpclDest		;zero		special
+	dd	ScaleSpclSource		;special	single
+	dd	ScaleSpclSource		;special	double
+	dd	ScaleSpclSource		;special	zero
+	dd	TwoOpBothSpcl		;special	special
+	dd	ScaleTwoInf		;Two infinites
+
+
+;The unmasked response to overflow and underflow with FSCALE is complicated 
+;by the extreme range it can generate.  Normally, the exponent is biased
+;by 24,576 in the appropriate direction to bring it back into range.
+;This may not be enough, however.  If it isn't, a result of infinity
+;(with the correct sign) is returned for overflow, regardless of the 
+;rounding mode.  For underflow, zero (with the correct sign) is returned,
+;even if it could be represented as a denormal.  This may be the only 
+;operation in which the unmasked response destroys the operands beyond 
+;recovery.
+
+BigScale:
+;Scale factor is much too big.  Just shift mantissa right two bits to get
+;MSB out of sign bit and ensure no overflow when we add.
+	mov	cl,2			;Always shift 2 bits if it's big
+	jmp	ScaleCont
+
+ScaleDouble:
+;ebx:esi = ST(1) mantissa
+;ecx = ST(1) sign in bit 15, exponent in high half
+;edi = pointer to ST(0)
+	rol	ecx,16			;Bring exponent down, sign to top
+	or	ch,ch			;Check sign of exponent
+	js	ScaleX			;No work if less than zero
+	cmp	cx,30			;Scale factor exceed 30 bits?
+	jge	BigScale
+	not	cl			;cl = amount to shift right (mod 32)
+ScaleCont:
+	shr	ebx,cl			;ebx = exponent adjustment for ST(0)
+;Use two's complement if negative (complement and increment)
+	mov     eax,ecx
+	cdq				;Extend sign through edx
+	xor	ebx,edx			;Complement if negative
+	sub	ebx,edx			;Increment if negative
+;Scale exponent
+	movsx	eax,EMSEG:[edi].wExp		;Get exponent to adjust
+	add	eax,ebx			;Can't overflow
+	cmp	eax,IexpMax-IexpBias	;Within normal range?
+	jge	ScaleOverflow
+	cmp	eax,IexpMin-IexpBias
+	jle	ScaleUnderflow
+SaveScaledExp:
+;Result fit withing normal range
+	mov	EMSEG:[edi].wExp,ax		;Update exponent of ST(0)
+ScaleX:
+	ret
+
+ScaleOverflow:
+;eax = exponent that's too big
+	mov	EMSEG:[CURerr],Overflow
+	test	EMSEG:[CWmask],Overflow	;Is exception unmasked?
+	jz	UnmaskedScaleOvfl
+;Produce masked overflow response
+	mov	al,EMSEG:[CWcntl]		;Get rounding control
+	mov	ah,al
+;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and +
+;i.e., RCup & sign OR RCdown & not sign
+.erre	RCchop eq RCup + RCdown		;Always return max value
+.erre	RCnear eq 0			;Never return max value
+	sar	ch,7			;Expand sign through whole byte
+.erre	(RCdown and bSign) eq 0		;Don't want to change real sign
+	xor	ch,RCdown		;Flip sign for RCdown bit
+	and	ah,ch			;RCup & sign  OR  RCdown & not sign
+	jz	ScaleToInfinity		;Save Infinity
+;Get max value
+	sub	ecx,1 shl 16		;Drop exponent by 1
+	xor	esi,esi
+	dec	esi			;esi == -1
+	mov	ebx,esi
+SaveScaleMax:
+	mov	EMSEG:[edi].lManLo,esi
+	mov	EMSEG:[edi].lManHi,ebx
+	mov	EMSEG:[edi].ExpSgn,ecx
+	ret
+
+UnmaskedScaleOvfl:
+	sub	eax,UnderBias		;Unmasked response
+	cmp	eax,IexpMax-IexpBias	;Within normal range now?
+	jl	SaveScaledExp		;Use exponent biased by 24K
+ScaleToInfinity:
+	mov	ebx,1 shl 31
+	xor	esi,esi
+	mov	ecx,(IexpMax-IexpBias+TexpBias) shl 16 + bTAG_INF
+	mov	ch,EMSEG:[edi].bSgn		;Give it same sign
+	jmp	SaveScaleMax		;Use infinity
+
+ScaleUnderflow:
+;eax = exponent that's too big
+	test	EMSEG:[CWmask],Underflow	;Is exception unmasked?
+	jz	ScaleSetUnder
+	cmp	eax,-32768		;Does exponent fit in 16 bits?
+	jg	@F
+	mov	ax,-32768		;Max value
+@@:
+;Set up for denormalizer
+	mov	ebx,EMSEG:[edi].lManHi
+	mov	esi,EMSEG:[edi].lManLo
+	shrd	ecx,eax,16		;Move exponent to high end of ecx
+	mov	ch,EMSEG:[edi].bSgn		;Keep sign
+	xor	eax,eax			;No sticky bits
+	mov	EMSEG:[Result],edi
+	jmp	Denormalize		;In emround.asm
+
+ScaleSetUnder:
+;Underflow exception not masked.  Adjust exponent and try again.
+	mov	EMSEG:[CURerr],Underflow
+	add	eax,UnderBias		;Unmasked response
+	cmp	eax,IexpMin-IexpBias	;Within normal range now?
+	jg	SaveScaledExp		;Use exponent biased by 24K
+	mov	EMSEG:[CURerr],Underflow
+ScaleToZero:
+	mov	ecx,bTAG_ZERO
+	mov	ch,EMSEG:[edi].bSgn		;Give it same sign
+	xor	ebx,ebx
+	mov	esi,ebx
+	jmp	SaveScaleMax		;Set to zero
+
+;***
+ScaleSpclDest:
+	mov	al,EMSEG:[edi].bTag		;Pick up tag
+	cmp	al,bTAG_INF		;Scaling infinity?
+	jz	ScaleRet		;No change if so
+	jmp	SpclDest		;In emarith.asm
+
+ScaleRet:
+	ret
+
+;***
+ScaleSpclSource:
+	cmp	cl,bTAG_INF		;Scaling by infinity?
+	jnz	SpclSource		;in emarith.asm
+	or	ch,ch			;Scaling by -infinity?
+	js	ScaleToZero
+	cmp	EMSEG:[edi].bTag,bTAG_ZERO	;Zero scaled by +infinity?
+	jnz	ScaleToInfinity
+	jmp	ReturnIndefinite	;Invalid operation
+
+;***
+ScaleTwoInf:
+	or	ch,ch			;Scaling by +infinity?
+	jns	ScaleRet		;All done then
+;Scaling infinity by -infinity
+	jmp	ReturnIndefinite	;Invalid operation
diff --git a/private/ntos/dll/i386/ldrthunk.asm b/private/ntos/dll/i386/ldrthunk.asm
new file mode 100644
index 000000000..617c0ef00
--- /dev/null
+++ b/private/ntos/dll/i386/ldrthunk.asm
@@ -0,0 +1,89 @@
+        title   "LdrInitializeThunk"
+;++
+;
+;  Copyright (c) 1989  Microsoft Corporation
+;
+;  Module Name:
+;
+;     ldrthunk.s
+;
+;  Abstract:
+;
+;     This module implements the thunk for the LdrpInitialize APC routine.
+;
+;  Author:
+;
+;     Steven R. Wood (stevewo) 27-Apr-1990
+;
+;  Environment:
+;
+;     Any mode.
+;
+;  Revision History:
+;
+;--
+
+.386p
+        .xlist
+include ks386.inc
+include callconv.inc                    ; calling convention macros
+        .list
+
+        EXTRNP  _LdrpInitialize,3
+
+_TEXT   SEGMENT DWORD PUBLIC 'CODE'
+        ASSUME  DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+        page , 132
+
+;++
+;
+; VOID
+; LdrInitializeThunk(
+;    IN PVOID NormalContext,
+;    IN PVOID SystemArgument1,
+;    IN PVOID SystemArgument2
+;    )
+;
+; Routine Description:
+;
+;    This function computes a pointer to the context record on the stack
+;    and jumps to the LdrpInitialize function with that pointer as its
+;    parameter.
+;
+; Arguments:
+;
+;    NormalContext - User Mode APC context parameter (ignored).
+;
+;    SystemArgument1 - User Mode APC system argument 1 (ignored).
+;
+;    SystemArgument2 - User Mode APC system argument 2 (ignored).
+;
+; Return Value:
+;
+;    None.
+;
+;--
+
+cPublicProc _LdrInitializeThunk , 4
+
+NormalContext   equ [esp + 4]
+SystemArgument1 equ [esp + 8]
+SystemArgument2 equ [esp + 12]
+Context         equ [esp + 16]
+
+        lea     eax,Context             ; Calculate address of context record
+        mov     NormalContext,eax       ; Pass as first parameter to
+if DEVL
+        xor     ebp,ebp                 ; Mark end of frame pointer list
+endif
+IFDEF STD_CALL
+        jmp     _LdrpInitialize@12      ; LdrpInitialize
+ELSE
+        jmp     _LdrpInitialize         ; LdrpInitialize
+ENDIF
+
+stdENDP _LdrInitializeThunk
+
+_TEXT   ends
+        end
diff --git a/private/ntos/dll/i386/npxemltr.def b/private/ntos/dll/i386/npxemltr.def
new file mode 100644
index 000000000..0ddc11dbc
--- /dev/null
+++ b/private/ntos/dll/i386/npxemltr.def
@@ -0,0 +1,17 @@
+;      SCCSID = @(#)npxemltr.def	13.2 89/11/15
+;
+;	 IBM/Microsoft Confidential
+;
+;	 Copyright (c) IBM Corporation 1987, 1989
+;	 Copyright (c) Microsoft Corporation 1987, 1989
+;
+;	 All Rights Reserved
+LIBRARY npxemltr
+
+DESCRIPTION 'OS/2 V2.0 80387 emulator.  Version 6.00.001'
+
+DATA  NONE
+
+EXPORTS
+    NPXNPHandler
+    NPXFPDataSize
diff --git a/private/ntos/dll/i386/npxnp.c b/private/ntos/dll/i386/npxnp.c
new file mode 100644
index 000000000..745709033
--- /dev/null
+++ b/private/ntos/dll/i386/npxnp.c
@@ -0,0 +1,204 @@
+/*++
+
+Copyright (c) 1989  Microsoft Corporation
+
+Module Name:
+
+    npxnp.c
+
+Abstract:
+
+    This module contains support for non-Flat mode NPX faults when
+    the application has it's CR0_EM bit clear.
+
+Author:
+
+    Ken Reneris (kenr) 8-Dec-1994
+
+Environment:
+
+    User Mode only
+
+Revision History:
+
+--*/
+
+
+#include "csrdll.h"
+
+static UCHAR MOD16[] = { 0, 1, 2, 0 };
+static UCHAR MOD32[] = { 0, 1, 4, 0 };
+
+UCHAR
+NpxNpReadCSEip (
+    IN PCONTEXT Context
+    )
+#pragma warning(disable:4035)
+{
+    _asm {
+        push    es
+        mov     ecx, Context
+        mov     eax, [ecx] CONTEXT.SegCs
+        mov     es, ax
+        mov     eax, [ecx] CONTEXT.Eip
+        inc     dword ptr [ecx] CONTEXT.Eip     ; Advance EIP
+        mov     al, es:[eax]
+        pop     es
+    }
+}
+#pragma warning(default:4035)
+
+
+VOID
+NpxNpSkipInstruction (
+    IN PCONTEXT Context
+    )
+/*++
+
+Routine Description:
+
+    This functions gains control when the system has no installed
+    NPX support, but the thread has cleared it's EM bit in CR0.
+
+    The purpose of this function is to move the instruction
+    pointer forward over the current NPX instruction.
+
+Enviroment:
+
+    16:16 mode
+
+Arguments:
+
+Return Value:
+
+--*/
+{
+    BOOLEAN     fPrefix;
+    UCHAR       ibyte, Mod, rm;
+    UCHAR       Address32Bits;
+    ULONG       CallerCs;
+
+    Address32Bits = 0;                          // assume called from 16:16
+
+    //
+    // Lookup and determine callers default mode
+    //
+
+    CallerCs = Context->SegCs;
+    _asm {
+        mov     eax, CallerCs
+        lar     eax, eax
+        test    eax, 400000h
+        jz      short IsDefault16Bit
+
+        mov     Address32Bits, 1
+
+IsDefault16Bit:
+    }
+
+    //
+    // No sense in using a try-except since we are not on the
+    // correct stack.  A fault here could occur if the start
+    // of an NPX instruction is near the end of a selector, and the
+    // end of the instruction is past the selectors end.  This
+    // would kill the app anyway.
+    //
+
+    //
+    // Read any instruction prefixes
+    //
+
+    fPrefix = TRUE;
+    while (fPrefix) {
+        ibyte = NpxNpReadCSEip(Context);
+
+        switch (ibyte) {
+            case 0x2e:  // cs override, skip it
+            case 0x36:  // ss override, skip it
+            case 0x3e:  // ds override, skip it
+            case 0x26:  // es override, skip it
+            case 0x64:  // fs override, skip it
+            case 0x65:  // gs override, skip it
+            case 0x66:  // operand size override, skip it
+                break;
+
+            case 0x67:
+                // address size override
+                Address32Bits ^= 1;
+                break;
+
+            default:
+                fPrefix = FALSE;
+                break;
+        }
+    }
+
+    //
+    // Handle first byte of NPX instruction
+    //
+
+    if (ibyte == 0x9b) {
+
+        //
+        // FWait instruction - single byte opcode - all done
+        //
+
+        return;
+    }
+
+    if (ibyte < 0xD8 || ibyte > 0xDF) {
+
+        //
+        // Not an ESC instruction
+        //
+
+#if DBG
+        DbgPrint ("P5_FPU_PATCH: 16: Not NPX ESC instruction\n");
+#endif
+        return;
+    }
+
+    //
+    // Get ModR/M byte for NPX opcode
+    //
+
+    ibyte = NpxNpReadCSEip(Context);
+
+    if (ibyte > 0xbf) {
+        //
+        // Outside of ModR/M range for addressing, all done
+        //
+
+        return;
+    }
+
+    Mod = ibyte >> 6;
+    rm  = ibyte & 0x7;
+    if (Address32Bits) {
+        Context->Eip += MOD32 [Mod];
+        if (Mod == 0  &&  rm == 5) {
+            // disp 32
+            Context->Eip += 4;
+        }
+
+        //
+        // If SIB byte, read it
+        //
+
+        if (rm == 4) {
+            ibyte = NpxNpReadCSEip(Context);
+
+            if (Mod == 0  &&  (ibyte & 7) == 5) {
+                // disp 32
+                Context->Eip += 4;
+            }
+        }
+
+    } else {
+        Context->Eip += MOD16 [Mod];
+        if (Mod == 0  &&  rm == 6) {
+            // disp 16
+            Context->Eip += 2;
+        }
+    }
+}
diff --git a/private/ntos/dll/i386/nt386npx.inc b/private/ntos/dll/i386/nt386npx.inc
new file mode 100644
index 000000000..b1095bc75
--- /dev/null
+++ b/private/ntos/dll/i386/nt386npx.inc
@@ -0,0 +1,40 @@
+NPX_CONTEXT_FULL		EQU 00001000Fh	;/ Full context
+
+ContextFlags equ CsContextFlags
+ctx_env      equ CsFloatSave
+ctx_stack    equ CsFloatSave+FpRegisterArea  ;need to change this puppy
+ctx_Cr0NpxState equ CsFloatSave+FpCr0NpxState
+ctx_SegGs    equ CsSegGs
+ctx_SegFs    equ CsSegFs
+ctx_SegEs    equ CsSegEs
+ctx_SegDs    equ CsSegDs
+ctx_RegEdi   equ CsEdi
+ctx_RegEsi   equ CsEsi
+ctx_RegEbp   equ CsEbp
+ctx_RegEbx   equ CsEbx
+ctx_RegEdx   equ CsEdx
+ctx_RegEcx   equ CsEcx
+ctx_RegEax   equ CsEax
+ctx_RegEip   equ CsEip
+ctx_SegCs    equ CsSegCs
+ctx_EFlags   equ CsEflags
+ctx_RegEsp   equ CsEsp
+ctx_SegSs    equ CsSegSs
+
+XCPT_FLOAT_INVALID_OPERATION    EQU STATUS_FLOAT_INVALID_OPERATION
+XCPT_FLOAT_DENORMAL_OPERAND     EQU STATUS_FLOAT_DENORMAL_OPERAND
+XCPT_FLOAT_DIVIDE_BY_ZERO       EQU STATUS_FLOAT_DIVIDE_BY_ZERO
+XCPT_FLOAT_OVERFLOW             EQU STATUS_FLOAT_OVERFLOW
+XCPT_FLOAT_UNDERFLOW            EQU STATUS_FLOAT_UNDERFLOW
+XCPT_FLOAT_INEXACT_RESULT       EQU STATUS_FLOAT_INEXACT_RESULT
+XCPT_FLOAT_STACK_CHECK          EQU STATUS_FLOAT_STACK_CHECK
+
+ExceptionNum                equ ErExceptionCode
+FHandlerFlags               equ ErExceptionFlags
+NestedExceptionReportRecord equ ErExceptionRecord
+ExceptionAddress            equ ErExceptionAddress
+CParameters                 equ ErNumberParameters
+
+
+Em87Busy    equ     1
+Em87Idle    equ     0
diff --git a/private/ntos/dll/i386/ntnapntr.asm b/private/ntos/dll/i386/ntnapntr.asm
new file mode 100644
index 000000000..3c1c425f3
--- /dev/null
+++ b/private/ntos/dll/i386/ntnapntr.asm
@@ -0,0 +1,449 @@
+;++
+;
+; Copyright (c) 1991  Microsoft Corporation
+;
+; Module Name:
+;
+;    ntnap.asm
+;
+; Abstract:
+;
+;    This module implements the system service dispatch procedure.
+;    It also creates a "profile" of each service by counting and
+;    timing calls.
+;
+; Author:
+;
+;    Russ Blake (russbl) 22-Apr-1991
+;
+; Environment:
+;
+;    User or kernel mode.
+;
+; Revision History:
+;
+;--
+
+include ks386.inc
+include callconv.inc                    ; calling convention macros
+include mac386.inc
+include ntnap.inc
+
+.386
+
+EXTRN           _NapDllInit:near
+EXTRN           _NapRecordInfo:near
+
+NapStart        equ     [ebp - 08h]
+NapEnd          equ     [ebp - 010h]
+NapServiceNum   equ     [ebp - 014h]
+
+NapLocalSize    equ     4 * 5
+
+NapCalSrvNum    equ     0FFFFFFFFh
+
+;++
+;
+; Routine Description:
+;
+;    This routine is called to save registers during API profiling.
+;    The objecttive is to preserve the caller's environment
+;    while timing takes place and, once, while dll initialization
+;    takes place.  This routine svaes registers on the stack to
+;    permit recursivce calls.
+;
+;    There should be a matching call to NapRestoreRegs to restore
+;    the registers.
+;
+; Arguments:
+;
+;    All registers.
+;
+; Return Value:
+;
+;    None.  All registers are preserved on the stack.
+;
+;--
+
+
+.386p
+
+_TEXT   SEGMENT DWORD USE32 PUBLIC 'CODE'
+        ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+cPublicProc _NapSaveRegs
+
+    ;
+    ; This is how the stack looks like upon entering this routine:
+    ;
+    ;    ---+----+----+----+----+----
+    ;       |   Return Address  |
+    ;    ---+----+----+----+----+----
+    ;        esp+                esp+
+    ;        0                   4
+    ;
+    ;
+    ; -> popping makes esp go ->
+    ; <- pushing makes esp go <-
+    ;
+
+        push    ebp
+        mov     ebp,esp         ; Remember where we are during this stuff
+                                ; ebp = Original esp - 4
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        push    esi
+        push    edi
+        pushfd
+        push    ds
+        push    es
+        push    ss
+        push    fs
+        push    gs
+
+        mov     eax,[ebp+4]     ; Grab Return Address
+        push    eax             ; Put Return Address on Stack
+        mov     ebp,[ebp+0]     ; Restore original ebp
+
+    ;
+    ; This is how the stack looks like just before executing RET:
+    ;
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |  Return  Address  |        g s        |        f s        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     0
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |        s s        |        e s        |        d s        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     c
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |      eflags       |        edi        |        esi        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |        edx        |        ecx        |        ebx        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+----
+    ;    |        eax        |   original  ebp   |   Return Address  |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+----
+    ;                         was
+    ;                         ebp+
+    ;                         0
+    ;
+
+        stdRET    _NapSaveRegs
+
+stdENDP _NapSaveRegs
+
+cPublicProc _NapRestoreRegs,,near
+
+    ;
+    ; This is how the stack looks like upon entering this routine:
+    ;
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |  Return  Address  |        g s        |        f s        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     0
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |        s s        |        e s        |        d s        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     c
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |      eflags       |        edi        |        esi        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     18
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;    |        edx        |        ecx        |        ebx        |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+
+    ;     esp+
+    ;     24
+    ;
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+----
+    ;    |        eax        |   original  ebp   |   Return Address  |
+    ;    +----+----+----+----+----+----+----+----+----+----+----+----+----
+    ;     esp+                esp+                esp+
+    ;     30                  34                  38
+    ;
+        pop     eax             ; Get Return Address
+        push    ebp             ; Save a temporary copy of original BP
+        mov     ebp,esp         ; BP = Original SP + 4
+
+        mov     [ebp+038h],eax  ; Put Return Address on Stack
+        pop     eax             ; Get Original BP
+        mov     [ebp+034h],eax  ; Put it in the original BP place
+
+        pop     gs
+        pop     fs
+        pop     ss
+        pop     es
+        pop     ds
+        popfd
+        pop     edi
+        pop     esi
+        pop     edx
+        pop     ecx
+        pop     ebx
+        pop     eax
+        pop     ebp
+
+        stdRET    _NapRestoreRegs
+
+stdENDP _NapRestoreRegs
+
+
+;++
+;
+; Routine Description:
+;
+;    This routine is called by the initialization code in the
+;    Nt Api Profiler to calibrate the cost of profiling.
+;    It simulates the overhead of a profiled call to a system
+;    service, but carefully avoids doing any of the normal
+;    work associated with such a call.
+;
+;    NOTE:  This routine's code should exactly parallel that of
+;           _NapDispatch, except for any operation normally
+;           (i.e., when not profiling) executed to call a system service.
+;           This amounts to an "int 2Eh" in the middle of the routine.
+;
+; Arguments:
+;
+;    eax - Service Number of the routine being called.  Must be -1
+;          for all calls to this routine.  The routine
+;          _NapRecordInfo notes this value and discards
+;          the call.
+;
+;    edx - Pointer to the parameters to the Service; ignored by
+;          this routine.
+;
+; Return Value:
+;
+;    None.
+;
+;--
+
+
+cPublicProc _NapCalibrate   , ,near
+
+
+        push    ebp                     ; Locals: the value of
+        mov     ebp, esp                ; the perf counter before and
+        sub     esp, NapLocalSize       ; after the API call
+
+        mov     eax, NapCalSrvNum       ; special routine number
+        mov     NapServicenum, eax      ; is used for calibration
+                                        ; can't be passed in eax from
+                                        ; C routine, so load it here
+                                        ; save the service routine number
+
+
+        stdCall    _NapSaveRegs           ; save register state so call to
+                                        ; get counter does not destroy them
+
+        stdCall    _NapDllInit            ; initialize dll if necessary
+
+; Now call NtQueryPerformanceCounter to get the starting count;
+; Store this locally
+
+        push    0                       ; don't need frequency: pass 0
+        lea     eax, NapStart           ; (eax) = pointer to counter
+        push    eax                     ; pass pointer to counter
+        mov     eax, NapCounterServiceNumber
+        lea     edx, [esp]              ; (edx) -> arguments
+        int     2Eh                     ; get the current counter value
+        add     esp, 08h                ; remove counter parameters
+
+; Restore caller's registers
+
+        stdCall   _NapRestoreRegs
+
+; We're just calibrating the overhead, so we don't call the system
+; service here.
+
+; Save regsiters so we can complete the profile accounting.
+
+        stdCall   _NapSaveRegs
+
+; Now get the ending counter.
+
+        push    0                       ; don't need frequency: pass 0
+        lea     eax, NapEnd             ; (eax) = pointer to counter
+        push    eax                     ; pass pointer to counter
+        mov     eax, NapCounterServiceNumber
+        lea     edx, [esp]              ; (edx) -> arguments
+        int     2Eh                     ; get the current counter value
+        add     esp, 08h                ; remove counter parameters
+
+; Compute the time for this call and increment the nukmber of calls.
+
+        lea     eax, NapEnd             ; pointer to start/end counters
+                                        ; ID of this routine
+        stdCall   _NapRecordInfo, <NapServiceNum, eax>
+
+        stdCall   _NapRestoreRegs
+                                        ; restore caller's registers
+        leave                           ; we needed this for pseudo locals
+        stdRET    _NapCalibrate
+stdENDP _NapCalibrate
+
+
+;++
+;
+; Routine Description:
+;
+;    This routine is called by the USRSTUBS_ENTRY1 MACRO in the
+;    services.prf to carry out profiling on an Nt system api call.
+;
+; Arguments:
+;
+;    eax - Service Number of the routine being called.  This number
+;          is assigned by genprof.c from the table in services.tab.
+;
+;    edx - Pointer to the parameters to the Service.
+;
+; Return Value:
+;
+;    Whatever the system service returns.
+;
+;--
+
+
+
+cPublicProc _NapProfileDispatch , ,near
+
+        push    ebp                     ; Locals: the value of
+        mov     ebp, esp                ; the perf counter before and
+        sub     esp, NapLocalSize       ; after the API call
+
+        mov     NapServicenum, eax
+                                        ; save the service routine number
+
+        stdCall   _NapSaveRegs            ; save register state so call to
+                                        ; get counter does not destroy them
+
+        stdCall   _NapDllInit             ; initialize dll if necessary
+
+; Now call NtQueryPerformanceCounter to get the starting count;
+; Store this locally
+
+        push    0                       ; don't need frequency: pass 0
+        lea     eax, NapStart           ; (eax) = pointer to counter
+        push    eax                     ; pass pointer to counter
+        mov     eax, NapCounterServiceNumber
+        lea     edx, [esp]              ; (edx) -> arguments
+        int     2Eh                     ; get the current counter value
+        add     esp, 08h                ; remove counter parameters
+
+; Restore caller's registers
+
+        stdCall   _NapRestoreRegs
+
+        INT     2Eh                     ; invoke system service
+
+; Save regsiters so we can complete the profile accounting.
+
+        stdCall   _NapSaveRegs
+
+; Now get the ending counter.
+
+        push    0                       ; don't need frequency: pass 0
+        lea     eax, NapEnd             ; (eax) = pointer to counter
+        push    eax                     ; pass pointer to counter
+        mov     eax, NapCounterServiceNumber
+        lea     edx, [esp]              ; (edx) -> arguments
+        int     2Eh                     ; get the current counter value
+        add     esp, 08h                ; remove counter parameters
+
+; Compute the time for this call and increment the number of calls.
+
+        lea     eax, NapEnd             ; pointer to start/end counters
+                                        ; ID of this routine
+        stdCall   _NapRecordInfo, <NapServiceNum, eax>
+
+        stdCall   _NapRestoreRegs
+                                        ; restore caller's registers
+        leave                           ; we needed this for pseudo locals
+        stdRET    _NapProfileDispatch
+stdENDP _NapProfileDispatch
+
+;++
+;
+; Routine Description:
+;
+;    This routine is claled to get the spin lock associated with
+;    a particular api.  It prevents the simultaneous update
+;    from multiple threads in this or other processors of the
+;    profiling data for the api.
+;
+; Arguments:
+;
+;    SpinLockAddr - address of the spin lock within the data
+;                   for the api being updated.
+;
+; Return Value:
+;
+;    None.
+;
+;--
+
+
+cPublicProc _NapAcquireSpinLock      , ,near
+
+        push    eax
+        mov     eax, [esp+8]            ; get address of lock
+WaitForLock:
+        lock bts dword ptr [eax], 0     ; test and set the spinlock
+        jc      SHORT WaitForLock       ; spinlock owned: go to SpinLabel
+        pop     eax
+
+        stdRET    _NapAcquireSpinLock
+
+stdENDP _NapAcquireSpinLock
+
+
+;++
+;
+; Routine Description:
+;
+;    This routine is called to release the spin lock associated with
+;    a particular api.
+;
+; Arguments:
+;
+;    SpinLockAddr - address of the spin lock within the data
+;                   for the api being updated.
+;
+; Return Value:
+;
+;    None.
+;
+;--
+
+
+cPublicProc _NapReleaseSpinLock     , ,near
+
+        push    eax
+        mov     eax, [esp+8]            ; get address of lock
+        lock btr dword ptr [eax], 0     ; release spinlock
+        pop     eax
+        stdRET    _NapReleaseSpinLock
+
+stdENDP _NapReleaseSpinLock
+
+
+_TEXT           ends
+
+                end
diff --git a/private/ntos/dll/i386/sources b/private/ntos/dll/i386/sources
new file mode 100644
index 000000000..5e5cd357a
--- /dev/null
+++ b/private/ntos/dll/i386/sources
@@ -0,0 +1,14 @@
+386_DLLLIBOBJECTS=$(BASEDIR)\public\sdk\lib\i386\exsup.lib
+
+i386_SOURCES=..\i386\critsect.asm \
+             ..\i386\ldrthunk.asm \
+             ..\i386\emulator.asm \
+             ..\i386\npxnp.c      \
+             i386\usrstubs.asm
+
+ASM_DEFINES=-DNT386
+
+CONDITIONAL_INCLUDES=\
+	$(CONDITIONAL_INCLUDES) \
+	emccall.asm\
+	except32.inc