diff options
Diffstat (limited to 'private/ntos/dll/i386')
32 files changed, 11182 insertions, 0 deletions
diff --git a/private/ntos/dll/i386/critsect.asm b/private/ntos/dll/i386/critsect.asm new file mode 100644 index 000000000..4b1f87cb4 --- /dev/null +++ b/private/ntos/dll/i386/critsect.asm @@ -0,0 +1,285 @@ + title "Critical Section Support" +;++ +; +; Copyright (c) 1991 Microsoft Corporation +; +; Module Name: +; +; critsect.asm +; +; Abstract: +; +; This module implements functions to support user mode critical sections. +; +; Author: +; +; Bryan M. Willman (bryanwi) 2-Oct-91 +; +; Environment: +; +; Any mode. +; +; Revision History: +; +; +; +; WARNING!!!!!!!!!! This code is duplicated in +; windows\base\client\i386\critsect.asm +; +; Some day we should put it in a .inc file that both include. +; +;-- + +.486p + .xlist +include ks386.inc +include callconv.inc ; calling convention macros + .list + +_DATA SEGMENT DWORD PUBLIC 'DATA' + public _LdrpLockPrefixTable +_LdrpLockPrefixTable label dword + dd offset FLAT:Lock1 + dd offset FLAT:Lock2 + dd offset FLAT:Lock3 + dd offset FLAT:Lock4 + dd offset FLAT:Lock5 + dd 0 +_DATA ENDS + +_TEXT SEGMENT PARA PUBLIC 'CODE' + ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING + + EXTRNP _RtlpWaitForCriticalSection,1 + EXTRNP _RtlpUnWaitCriticalSection,1 +if DEVL + EXTRNP _RtlpNotOwnerCriticalSection,1 +endif + +CriticalSection equ [esp + 4] + + page , 132 + subttl "RtlEnterCriticalSection" + +;++ +; +; NTSTATUS +; RtlEnterCriticalSection( +; IN PRTL_CRITICAL_SECTION CriticalSection +; ) +; +; Routine Description: +; +; This function enters a critical section. +; +; Arguments: +; +; CriticalSection - supplies a pointer to a critical section. +; +; Return Value: +; +; STATUS_SUCCESS or raises an exception if an error occured. +; +;-- + + align 16 +cPublicProc _RtlEnterCriticalSection,1 +cPublicFpo 1,0 + + mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb() + mov edx,CriticalSection ; interlocked inc of + mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread + +if DBG + cmp dword ptr TbSpare1[ecx],0 + jz @f + int 3 +@@: +endif ; DBG +Lock1: + lock inc dword ptr CsLockCount[edx] ; ... CriticalSection->LockCount + jnz @F + +setowner: + mov CsOwningThread[edx],eax + mov dword ptr CsRecursionCount[edx],1 + +if DBG + inc dword ptr TbCountOfOwnedCriticalSections[ecx] + push edi + mov edi,CsDebugInfo[edx] + inc dword ptr CsEntryCount[edi] + pop edi +endif ; DBG + + xor eax,eax + stdRET _RtlEnterCriticalSection + + align 16 +@@: + cmp CsOwningThread[edx],eax + jne @F + inc dword ptr CsRecursionCount[edx] +if DBG + mov eax,CsDebugInfo[edx] + inc dword ptr CsEntryCount[eax] +endif ; DBG + xor eax,eax + stdRET _RtlEnterCriticalSection + +@@: + stdCall _RtlpWaitForCriticalSection, <edx> + mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb() + mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread + mov edx,CriticalSection + jmp setowner + +stdENDP _RtlEnterCriticalSection + + page , 132 + subttl "RtlLeaveCriticalSection" +;++ +; +; NTSTATUS +; RtlLeaveCriticalSection( +; IN PRTL_CRITICAL_SECTION CriticalSection +; ) +; +; Routine Description: +; +; This function leaves a critical section. +; +; Arguments: +; +; CriticalSection - supplies a pointer to a critical section. +; +; Return Value: +; +; STATUS_SUCCESS or raises an exception if an error occured. +; +;-- + + align 16 +cPublicProc _RtlLeaveCriticalSection,1 +cPublicFpo 1,0 + + mov edx,CriticalSection +if DBG + mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb() + mov eax,TbClientId+4[ecx] ; (eax) == NtCurrentTeb()->ClientId.UniqueThread + cmp eax,CsOwningThread[edx] + je @F + stdCall _RtlpNotOwnerCriticalSection, <edx> + mov eax,STATUS_INVALID_OWNER + stdRET _RtlLeaveCriticalSection +@@: +endif ; DBG + xor eax,eax ; Assume STATUS_SUCCESS + dec dword ptr CsRecursionCount[edx] + jnz leave_recurs ; skip if only leaving recursion + + mov CsOwningThread[edx],eax ; clear owning thread id + +if DBG + mov ecx,fs:PcTeb ; (ecx) == NtCurrentTeb() + dec dword ptr TbCountOfOwnedCriticalSections[ecx] +endif ; DBG + +Lock2: + lock dec dword ptr CsLockCount[edx] ; interlocked dec of + ; ... CriticalSection->LockCount + jge @F + stdRET _RtlLeaveCriticalSection + +@@: + stdCall _RtlpUnWaitCriticalSection, <edx> + xor eax,eax ; return STATUS_SUCCESS + stdRET _RtlLeaveCriticalSection + + align 16 +leave_recurs: +Lock3: + lock dec dword ptr CsLockCount[edx] ; interlocked dec of + ; ... CriticalSection->LockCount + stdRET _RtlLeaveCriticalSection + +_RtlLeaveCriticalSection endp + + page ,132 + subttl "RtlTryEnterCriticalSection" +;++ +; +; BOOL +; RtlTryEnterCriticalSection( +; IN PRTL_CRITICAL_SECTION CriticalSection +; ) +; +; Routine Description: +; +; This function attempts to enter a critical section without blocking. +; +; Arguments: +; +; CriticalSection (a0) - Supplies a pointer to a critical section. +; +; Return Value: +; +; If the critical section was successfully entered, then a value of TRUE +; is returned as the function value. Otherwise, a value of FALSE is returned. +; +;-- + +CriticalSection equ [esp + 4] + +cPublicProc _RtlTryEnterCriticalSection,1 +cPublicFpo 1,0 + + mov ecx,CriticalSection ; interlocked inc of + mov eax, -1 ; set value to compare against + mov edx, 0 ; set value to set +Lock4: + lock cmpxchg dword ptr CsLockCount[ecx],edx ; Attempt to acquire critsect + jnz short tec10 ; if nz, critsect already owned + + mov eax,fs:TbClientId+4 ; (eax) == NtCurrentTeb()->ClientId.UniqueThread + mov CsOwningThread[ecx],eax + mov dword ptr CsRecursionCount[ecx],1 + +if DBG + mov eax,fs:PcTeb ; (ecx) == NtCurrentTeb() + inc dword ptr TbCountOfOwnedCriticalSections[eax] +endif ; DBG + + mov eax, 1 ; set successful status + + stdRET _RtlTryEnterCriticalSection + +tec10: +; +; The critical section is already owned. If it is owned by another thread, +; return FALSE immediately. If it is owned by this thread, we must increment +; the lock count here. +; + mov eax, fs:TbClientId+4 ; (eax) == NtCurrentTeb()->ClientId.UniqueThread + cmp CsOwningThread[ecx], eax + jz tec20 ; if eq, this thread is already the owner + xor eax, eax ; set failure status + stdRET _RtlTryEnterCriticalSection + +tec20: +; +; This thread is already the owner of the critical section. Perform an atomic +; increment of the LockCount and a normal increment of the RecursionCount and +; return success. +; +Lock5: + lock inc dword ptr CsLockCount[ecx] + inc dword ptr CsRecursionCount[ecx] + mov eax, 1 + stdRET _RtlTryEnterCriticalSection + +stdENDP _RtlTryEnterCriticalSection + + +_TEXT ends + end diff --git a/private/ntos/dll/i386/emarith.asm b/private/ntos/dll/i386/emarith.asm new file mode 100644 index 000000000..3b09de0b6 --- /dev/null +++ b/private/ntos/dll/i386/emarith.asm @@ -0,0 +1,335 @@ + subttl emarith.asm - Arithmetic Operations + page +;******************************************************************************* +;emarith.asm - Arithmetic Operations +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; Arithmetic Operations +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + NextStackWrap esi,TwoOp ;Tied to NextStackElem below + +EM_ENTRY eFPREM +eFPREM: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PremCont ;Return address if normal +PremPointTopTwo: + push offset PremSpclDone ;Return address if special + mov ebp,offset tFpremDisp +PointTopTwo: + mov esi,edi + NextStackElem esi,TwoOp +TwoOpSiDi: + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo +TwoOpSetResult: + mov EMSEG:[Result],edi ;Save result pointer +TwoOpResultSet: + mov ah,EMSEG:[edi].bTag +TwoOpDispAh: + mov al,cl +TwoOpDispatch: + and eax,TAG_MASK + 100H*TAG_MASK ;Look at internal tags only + shl al,TAG_SHIFT + or al,ah + xor ah,ah ;Zero ah +;UNDONE: masm bug! ebp + scaled index requires a displacement. +;UNDONE: No displacement is needed here, so masm should generate a +;UNDONE: zero. It doesn't! dec eax so we can add 4*1 back. + dec eax ;UNDONE + jmp dword ptr cs:[ebp+4*eax+4];UNDONE Go to appropriate routine. + +EM_ENTRY eFPREM1 +eFPREM1: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset Prem1Cont ;Return address if normal + jmp PremPointTopTwo + +EM_ENTRY eFSCALE +eFSCALE: + mov ebp,offset tFscaleDisp + jmp PointTopTwo + +EM_ENTRY eFPATAN +eFPATAN: + mov ebp,offset tFpatanDisp +TopTwoPop: + push offset PopWhenDone + mov esi,edi + add edi,Reg87Len ;edi = ST(1) + cmp edi,ENDstk + jb TwoOpSiDi + mov edi,BEGstk + jmp TwoOpSiDi + +EM_ENTRY eFYL2X +eFYL2X: + mov ebp,offset tFyl2xDisp + jmp TopTwoPop + +EM_ENTRY eFYL2XP1 +eFYL2XP1: + mov ebp,offset tFyl2xp1Disp + jmp TopTwoPop + +;******************************************************************************* + +page +;-----------------------------------------------------------; +; ; +; Special Case Routines for Arithmetic Functions ; +; ; +;-----------------------------------------------------------; + +;There are four kinds of "specials", encoded in the tag: +; +; Empty +; Infinity +; NAN (which can be QNAN or SNAN) +; Denormal +; +;Empty always results in an Invalid Operation exception with Stack Flag set +;and C1 (O/U#) bit clear, and returns Indefinite (a specific QNAN). +; +;Operations on NAN return the same NAN except it is always modified to a +;QNAN. If both operands are NAN, the one with the larger mantissa is +;returned. An SNAN causes an Invalid Operation exception except for +;internal FP stack operations, FCHS, and FABS. A QNAN does not cause +;and exception. +; +;Operations on Infinity return a result depending on the operation. +; +;UNDONE: Old code plays with sign of NAN when two NANs with equal +;mantissas are used. Why? + +;"***" means entry point from dispatch tables + +;*** +DivSpclSource: + cmp cl,bTAG_INF + jnz SpclSource +;Division by infinity always returns zero + xor ch,EMSEG:[edi].bSgn + jmp SignedZero ;in emfmul.asm + +;*** +MulSpclSource: + cmp cl,bTAG_INF + jnz SpclSource +MulByInf: + cmp EMSEG:[edi].bTag,bTAG_ZERO ;Infinity * zero? + jz ReturnIndefinite +XorSourceSign: + xor ch,EMSEG:[edi].bSgn + jmp SaveResultEdi + +;*** +AddSpclSource: + cmp cl,bTAG_INF + jnz SpclSource + xor ch,dl ;Flip sign of infinity if subtracting + jmp SaveResultEdi + +DenormalSource: + mov cl,bTAG_VALID ;Change denormal to DOUBLE + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jnz TwoOpResultSet +AbortOp: + mov cl,bTAG_NOPOP ;Unmasked, don't pop stack + ret + +DenormalDisp: +;Repeat dispatch, but for normal ops + jmp dword ptr cs:[ebp+4*(TAG_VALID + TAG_VALID shl TAG_SHIFT)] + +;*** +DivrSpclSource: + cmp cl,bTAG_INF + jz XorSourceSign ;Return infinity +SpclSource: + cmp cl,bTAG_DEN + jz DenormalSource + cmp cl,bTAG_EMPTY + jz StackError +;Must be a NAN +SourceNAN: + test ebx,1 shl 30 ;Check for SNAN + jnz SaveResultEdi ;If QNAN, just use it as result +SourceSNAN: + or EMSEG:[CURerr],Invalid ;Flag the error + or ebx,1 shl 30 ;Make it into a QNAN + test EMSEG:[CWmask],Invalid ;Is it masked? + jnz SaveResultEdi ;If so, update with masked response + mov cl,bTAG_NOPOP ;Unmasked, don't pop stack + ret + + +;*** +DivrSpclDest: + mov eax,EMSEG:[edi].ExpSgn ;Pick up tag + cmp al,bTAG_INF + jnz SpclDest +;Division by infinity always returns zero + xor ch,ah + jmp SignedZero ;in emfmul.asm + +;*** +MulSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF + jnz SpclDest + cmp cl,bTAG_ZERO ;Infinity * zero? + jz ReturnIndefinite +XorDestSign: + xor EMSEG:[edi].bSgn,ch ;Xor signs + ret + +;*** +AddSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF + jnz SpclDest + xor EMSEG:[edi].bSgn,dh ;Flip sign of infinity if subtracting + ret + +DenormalDest: + mov ah,bTAG_VALID ;Change denormal to DOUBLE + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jnz TwoOpDispAh + mov cl,bTAG_NOPOP ;Unmasked, don't pop stack + ret + +;*** +DivSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF + jz XorDestSign ;Return infinity +SpclDest: + cmp al,bTAG_DEN + jz DenormalDest +SpclDestNotDen: + cmp al,bTAG_EMPTY + jz StackError +;Must be a NAN +DestNAN: + test EMSEG:[edi].bMan7,40H ;Check for SNAN + jnz ReturnDest ;If QNAN, just use it as result +DestSNAN: + or EMSEG:[CURerr],Invalid ;Flag the error + test EMSEG:[CWmask],Invalid ;Is it masked? + jz AbortOp ;No - preserve value + or EMSEG:[edi].bMan7,40H ;Make it into a QNAN + ret + +StackError: + mov EMSEG:[CURerr],Invalid+StackFlag +ReturnIndefinite: + or EMSEG:[CURerr],Invalid + test EMSEG:[CWmask],Invalid ;Is it masked? + jz AbortOp ;No - preserve value + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].lManHi,0C0000000H + mov EMSEG:[edi].ExpSgn,TexpMax shl 16 + bSign shl 8 + bTAG_NAN +ReturnDest: + ret + + +AddTwoInf: +;Adding two infinites. +;If signs are the same, return that infinity. Otherwise, Invalid Operation. + xor ch,dl ;Possibly subtracting source + xor ah,dh ;Possibly subtracting dest + xor ch,ah ;Compare signs + js ReturnIndefinite + mov EMSEG:[edi].bSgn,ah ;Correct the sign if subtracting + ret + +;*** +TwoOpBothSpcl: +;ebp = dispatch table address + mov al,EMSEG:[edi].bTag + mov ah,cl + cmp ax,(bTAG_NAN shl 8) + bTag_NAN ;Are both NAN? + jz TwoNANs + cmp cl,bTAG_EMPTY + jz StackError + cmp al,bTAG_EMPTY + jz StackError + cmp cl,bTAG_NAN + jz SourceNAN + cmp al,bTAG_NAN + jz DestNAN + cmp ax,(bTAG_INF shl 8) + bTag_INF ;Are both infinity? + jz TwoInfs +;At least one of the operands is a denormal + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jz AbortOp ;If not, don't do operation +;Denormal exception is masked, treat denormals as VALID +;Dispatch through operation table in ebp again + cmp ax,(bTAG_DEN shl 8) + bTag_DEN ;Are both denormal? + jz DenormalDisp +;Have an infinity and a denormal + cmp al,bTAG_INF + jz DestInf +;Source is denormal, Dest is infinity + jmp dword ptr [ebp+4*(TAG_SPCL + TAG_VALID shl TAG_SHIFT)] + +DestInf: +;Source is infinity, Dest is denormal + jmp dword ptr [ebp+4*(TAG_VALID + TAG_SPCL shl TAG_SHIFT)] + +TwoNANs: +;Two NANs. Use largest mantissa + cmp ebx,EMSEG:[edi].lManHi + ja BiggerNAN + jb DestBigger +;Now we know they're both the same type, SNAN or QNAN + cmp esi,EMSEG:[edi].lManLo + ja SourceNAN +;UNDONE: Old code did funny business with signs when mantissas were equal + jmp DestNAN + +BiggerNAN: + test EMSEG:[edi].bMan7,40H ;Is smaller one SNAN? + jz SourceSNAN + jmp SourceNAN + +DestBigger: + test ebx,40H ;Is smaller one SNAN? + jz DestSNAN + jmp DestNAN + +TwoInfs: + mov ah,EMSEG:[edi].bSgn + jmp dword ptr [ebp+4*16] ;Go do code for two infinites + + +;*** +DivideByMinusZero: + mov ch,bSign +;*** +DivideByZero: + or EMSEG:[CURerr],ZeroDivide + test EMSEG:[CWmask],ZeroDivide ;Is exception masked? + jz AbortOp ;No - preserve value +;Set up a signed infinity + xor ch,EMSEG:[edi].bSgn ;Get result sign + and ecx,1 shl 15 ;Keep only sign bit + or ecx,(4000H+TexpBias) shl 16 + bTAG_INF ;ExpSgn of infinity + mov ebx,1 shl 31 + xor esi,esi + jmp SaveResultEdi diff --git a/private/ntos/dll/i386/emdecode.asm b/private/ntos/dll/i386/emdecode.asm new file mode 100644 index 000000000..242a8aa69 --- /dev/null +++ b/private/ntos/dll/i386/emdecode.asm @@ -0,0 +1,39 @@ + subttl emdecode.asm - Instruction decoding + page +;*** +;emdecode.asm - Instruction decoding +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; +; All Rights Reserved +; +;Purpose: +; Further decoding of instructions done here. +; +;Revision History: +; +; 8/23/91 TP Rewritten for 32 bits +; +;******************************************************************************* + +;On entry, eax = r/m bits * 4. This is used to jump directly to the +;correct instruction within the group. + +GroupFCHS: + jmp tGroupFCHSdisp[eax] + +GroupFLD1: + jmp tGroupFLD1disp[eax] + +GroupF2XM1: + jmp tGroupF2XM1disp[eax] + +GroupFPREM: + jmp tGroupFPREMdisp[eax] + +GroupFENI: + jmp tGroupFENIdisp[eax] + + diff --git a/private/ntos/dll/i386/emdisp.asm b/private/ntos/dll/i386/emdisp.asm new file mode 100644 index 000000000..7e7402f66 --- /dev/null +++ b/private/ntos/dll/i386/emdisp.asm @@ -0,0 +1,298 @@ + subttl emdisp.asm - Emulator Dispatch Tables + page +; +; IBM/Microsoft Confidential +; +; Copyright (c) IBM Corporation 1987, 1989 +; Copyright (c) Microsoft Corporation 1987, 1989 +; +; All Rights Reserved +; +;Revision History: (also see emulator.hst) +; +; 1/21/92 JWM Minor modifications for DOSX32 emulator +; 8/23/91 TP Direct dispatch off of 6-bit opcode +; 10/30/89 WAJ Added this header. +; +;******************************************************************************* + +;*********************************************************************; +; ; +; Dispatch Tables ; +; ; +;*********************************************************************; + + +; These tables are based upon the layout of the 8087 instructions +; +; 8087 instruction fields: |escape|MF|Arith|MOD|Op|r/m|disp1|disp2| +; field length in bits: 5 2 1 2 3 3 8 8 +; +; Disp1 and Disp2 are optional address bytes present only if MOD <> 11. +; When (MOD <> 11) r/m describes which regs (SI,DI,BX,BP) are added to +; Disp1 and Disp2 to calculate the effective address. This form +; (memory format) is used for Loads, Stores, Compares, and Arithmetic +; When using memory format MF determines the Type of the Memory operand +; i.e. Single Real, Double real, Single Integer, or Double Integer +; Arith is 0 for Arithmetic opetations (and compares), set to 1 otherwise +; Op mostly determines which type of operation to do though when not in +; memory format some of that is coded into MF and r/m +; All of the tables are set up to do a jump based upon one or more of the +; above fields. The outline for decoding instructions is: +; +; IF (memory format) THEN +; Assemble Effective Address (using MOD and r/m and EffectiveAddressTab) +; Jump through table to operation, using MF, Arith and Op bits +; ELSE (Register format) +; Jump through table to operation, using MF, Arith and Op bits + + ALIGN 4 + +;*********************************************************************; +; +; Memory address calculation tables + +EA386Tab label dword ; Uses |r/m|MOD+1| for indexing + dd NoEffectiveAddress + dd Exx00 ; eax + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd Exx00 ; ecx + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd Exx00 ; edx + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd Exx00 ; ebx + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd SIB00 ; esp (S-I-B follows) + dd SIB01 + dd SIB10 + dd NoEffectiveAddress + dd Direct386 ; ebp (00 = direct addressing) + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd Exx00 ; esi + dd Exx01 + dd Exx10 + dd NoEffectiveAddress + dd Exx00 ; edi + dd Exx01 + dd Exx10 + +;*********************************************************************; +; +;Opcode dispatching tables +;Indexed by | op1 | op2 |0 0| (op1 = MF|Arith) + + public tOpRegDisp +tOpRegDisp label dword + dd eFADDtop + dd eFMULtop + dd eFCOM + dd eFCOMP + dd eFSUBtop + dd eFSUBRtop + dd eFDIVtop + dd eFDIVRtop + + dd eFLDreg + dd eFXCH + dd eFNOP ;UNDONE: also reserved on 387 + dd eFSTP ;Special form 1 + dd GroupFCHS ;FCHS,FABS,FTST,FXAM + dd GroupFLD1 ;FLD1,FLDL2T,FLDL2E,FLDPI,FLDLG2,FLDLN2,FLDZ + dd GroupF2XM1 ;F2XM1,FYL2X,FPTAN,FPATAN,FXTRACT,FPREM1,FDECSTP,FINCSTP + dd GroupFPREM ;FPREM,FYL2XP1,FSQRT,FSINCOS,FRNDINT,FSCALE,FSIN,FCOS + + dd UNUSED + dd UNUSED + dd UNUSED + dd UNUSED + dd UNUSED + dd eFUCOMPP ;UNDONE: also reserved on 387 + dd UNUSED + dd UNUSED + + dd UNUSED + dd UNUSED + dd UNUSED + dd UNUSED + dd GroupFENI ;FENI,FDISI,FCLEX,FINIT + dd UNUSED + dd UNUSED + dd UNUSED + + dd eFADDreg + dd eFMULreg + dd eFCOM ;Special form 2 + dd eFCOMP ;Special form 3 + dd eFSUBRreg + dd eFSUBreg + dd eFDIVRreg + dd eFDIVreg + + dd eFFREE + dd eFXCH ;Special form 4 + dd eFST + dd eFSTP + dd eFUCOM + dd eFUCOMP + dd UNUSED + dd UNUSED + + dd eFADDPreg + dd eFMULPreg + dd eFCOMP ;Special form 5 + dd eFCOMPP ;UNDONE: also reserved on 387 + dd eFSUBRPreg + dd eFSUBPreg + dd eFDIVRPreg + dd eFDIVPreg + + dd eFFREE ;Special form 6 UNDONE: "and pop stack"? + dd eFXCH ;Special form 7 + dd eFSTP ;Special form 8 + dd eFSTP ;Special form 9 + dd eFSTSWax ;UNDONE: also reserved on 387 + dd UNUSED + dd UNUSED + dd UNUSED + + +tOpMemDisp label dword +;MF = 00 (32-bit Real), Arith = 0 + dd eFADD32 + dd eFMUL32 + dd eFCOM32 + dd eFCOMP32 + dd eFSUB32 + dd eFSUBR32 + dd eFDIV32 + dd eFDIVR32 +;MF = 00 (32-bit Real), Arith = 1 + dd eFLD32 + dd UNUSED + dd eFST32 + dd eFSTP32 + dd eFLDENV + dd eFLDCW + dd eFSTENV + dd eFSTCW +;MF = 01 (32-bit Int), Arith = 0 + dd eFIADD32 + dd eFIMUL32 + dd eFICOM32 + dd eFICOMP32 + dd eFISUB32 + dd eFISUBR32 + dd eFIDIV32 + dd eFIDIVR32 +;MF = 01 (32-bit Int), Arith = 1 + dd eFILD32 + dd UNUSED + dd eFIST32 + dd eFISTP32 + dd UNUSED + dd eFLD80 + dd UNUSED + dd eFSTP80 +;MF = 10 (64-bit Real), Arith = 0 + dd eFADD64 + dd eFMUL64 + dd eFCOM64 + dd eFCOMP64 + dd eFSUB64 + dd eFSUBR64 + dd eFDIV64 + dd eFDIVR64 +;MF = 10 (64-bit Real), Arith = 1 + dd eFLD64 + dd UNUSED + dd eFST64 + dd eFSTP64 + dd eFRSTOR + dd UNUSED + dd eFSAVE + dd eFSTSW +;MF = 11 (16-bit Int), Arith = 0 + dd eFIADD16 + dd eFIMUL16 + dd eFICOM16 + dd eFICOMP16 + dd eFISUB16 + dd eFISUBR16 + dd eFIDIV16 + dd eFIDIVR16 +;MF = 11 (16-bit Int), Arith = 1 + dd eFILD16 + dd UNUSED + dd eFIST16 + dd eFISTP16 + dd eFBLD + dd eFILD64 + dd eFBSTP + dd eFISTP64 + + +tGroupFLD1disp label dword + dd eFLD1 + dd eFLDL2T + dd eFLDL2E + dd eFLDPI + dd eFLDLG2 + dd eFLDLN2 + dd eFLDZ + dd UNUSED + + +tGroupF2XM1disp label dword + dd eF2XM1 + dd eFYL2X + dd eFPTAN + dd eFPATAN + dd eFXTRACT + dd eFPREM1 + dd eFDECSTP + dd eFINCSTP + + +tGroupFCHSdisp label dword + dd eFCHS + dd eFABS + dd UNUSED + dd UNUSED + dd eFTST + dd eFXAM + dd UNUSED + dd UNUSED + + +tGroupFPREMdisp label dword + dd eFPREM + dd eFYL2XP1 + dd eFSQRT + dd eFSINCOS + dd eFRNDINT + dd eFSCALE + dd eFSIN + dd eFCOS + + +tGroupFENIdisp label dword + dd eFENI + dd eFDISI + dd eFCLEX + dd eFINIT + dd eFSETPM + dd UNUSED + dd UNUSED + dd UNUSED + + diff --git a/private/ntos/dll/i386/emerror.asm b/private/ntos/dll/i386/emerror.asm new file mode 100644 index 000000000..3fc135a06 --- /dev/null +++ b/private/ntos/dll/i386/emerror.asm @@ -0,0 +1,469 @@ + page ,132 + subttl emerror.asm - Emulator error handler +;*** +;emerror.asm - Emulator error handler +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1987, 1991 +; +; All Rights Reserved +; +;Purpose: +; Emulator error handler +; +;Revision History: (also see emulator.hst) +; +; 10/30/89 WAJ Added this header. +; 11/15/89 WAJ Major changes for Dos32RaiseExcpetion(). +; 12/01/89 WAJ Now set cbExceptionInfo correctly. +; 02/08/90 WAJ Fixed GP fault in 32 bit exception handler. +; 09/03/91 JWM Modified entry/exit sequence for DOSX32. +; 02/15/92 JWM Adapted for NT. +; +;******************************************************************************* + +ifdef _DOS32EXT +include except32.inc +endif + +;*** error_return - return to user code (regardless of error) +; +; This macro returns to user code. It goes to some lengths +; to restore the flags on the instruction immediately before +; the return so that any pending trace trap will be +; acknowledged immediately after the retfd (and before the +; next user instruction) instead of after the instruction +; following the return as would be the case if we returned +; using iretd. +; +; ENTRY ((SS:ESP)) = user's EAX +; ((SS:ESP)+4) = return EIP +; ((SS:ESP)+8) = return CS +; ((SS:ESP)+12) = user's EFLAGS +; EXIT return to user program, above arguments +; popped off stack, user's EAX and EFLAGS +; restored. + +error_return macro noerror +ifdef _DOS32EXT + sti ; JWM, 9/3/91 + push dword ptr [esp+8] ; JWM, 9/6/91 + popfd ; JWM, 9/6/91 +endif ; DOS32EXT + +ifdef NT386 +if DBG + push dword ptr [esp+8] ; On checked build, allow + popfd ; single step to work +endif +endif + iretd + endm + + +TESTif macro nam + mov bl,err&nam ; default error number + if (nam ge 100h) + test ah,nam/256 + else ;not (nam ge 100h) + test al,nam + endif ;(nam ge 100h) + JSNZ signalerror + endm + +EM_ENTRY eCommonExceptions +CommonExceptions: + mov ebx,[esp].[OldLongStatus] + and ebx,LongSavedFlags ;preserve condition codes, error flags + or EMSEG:[LongStatusWord],ebx ;merge saved status word, condition codes + pop eax + pop ecx + pop edx + pop ebx + add esp,4 ; toss esp value + pop ebp + pop esi + pop edi + add esp,8 ;toss old PrevCodeOff and StatusWord + pop ds + call Emexcept + error_return noerror + +ifdef _DOS32EXT + +EmExcept PROC C, OldEIP:DWORD, OldCS:DWORD, OldFlags:DWORD + +LOCAL SSAR:DWORD +LOCAL ec:_DX32_CONTEXT + + ;* + ;* Set up SS access rights. + ;* + + push ds + mov [ec.R_Eax], eax + GetEmData ds,ax + + mov eax, ss + lar eax, eax + mov [SSAR], eax + + ;* + ;* Fill in ExceptionContext structure. + ;* + + + mov [ec.NPXContextFlags], NPX_CONTEXT_FULL + mov [ec.R_Edi], edi + mov [ec.R_Esi], esi + + mov eax, [ebp] + mov [ec.R_Ebp], eax + + lea eax, [OldFlags+4] + mov [ec.R_Esp], eax + + mov [ec.R_Ebx], ebx + mov [ec.R_Edx], edx + mov [ec.R_Ecx], ecx + + mov eax, EMSEG:[PrevCodeOff] + + mov [ec.R_Eip], eax + mov eax, [OldFlags] + mov [ec.EFlags], eax + + mov eax, [OldCS] + movzx eax,ax + mov [ec.SegCs], eax + mov ax,ss + movzx eax,ax + mov [ec.SegSs], eax + + pop eax + movzx eax,ax + mov [ec.SegDs], eax ; ds was pushed on entry. + + mov ax,es + movzx eax,ax + mov [ec.SegEs], eax + + mov ax,fs + movzx eax,ax + mov [ec.SegFs], eax + + mov ax,gs + movzx eax,ax + mov [ec.SegGs], eax + + lea esi, [ec] + add esi, 4 + + push ebp + call SaveState + pop ebp + + lea eax, [ec] + push ds + push es + + mov bx, seg FLAT:CURstk + mov ds, ebx + mov es, ebx + push eax + + call DOS32RAISEEXCEPTION + + add esp, 4 + + pop es + pop ds + +RaiseExceptRet: + or eax, eax + JZ ExceptNotHandled + + ;* + ;* Copy new flags, cs, eip to new stack. + ;* + + mov ds, [ec.SegSs] + mov esi, [ec.R_Esp] ; ds:esi == new ss:esp + + mov eax, [ec.Eflags] ; set up iretd frame + mov [esi-4], eax + + mov eax, [ec.SegCs] + mov [esi-8], eax + + mov eax, [ec.R_Eip] + mov [esi-12], eax + + ;* + ;* Put new stack pointer on stack. + ;* + + push ds + sub esi, 12 + push esi + + ;* + ;* Reset other registers. + ;* + + mov edi, [ec.R_Edi] + mov esi, [ec.R_Esi] + mov ebx, [ec.R_Ebx] + mov edx, [ec.R_Edx] + mov ecx, [ec.R_Ecx] + mov eax, [ec.R_Eax] + mov ds, [ec.SegDs] + mov es, [ec.SegEs] + mov fs, [ec.SegFs] + mov gs, [ec.SegGs] + + mov ebp, [ec.R_Ebp] ; must do this last. + + lss esp, fword ptr [esp] ; reset ss:esp + + sti ; JWM, 9/3/91 + push [esp+8] ; JWM, 9/6/91 + popfd ; JWM, 9/6/91 + + iretd ; reset flags, cs, eip + +ExceptNotHandled: +EmExcept ENDP + +endif ; ifdef _DOS32EXT + +ifdef NT386 + +ISIZE equ 4 +ISizeEC equ (ContextFrameLength + ISIZE - 1) and (not (ISIZE - 1)) +ISizeExceptStruct equ (ExceptionRecordLength + ISIZE - 1) and (not (ISIZE - 1)) + +ec_off EQU 4+ISizeEc +estruct_off EQU ec_off+ISizeExceptStruct + +SSAR EQU <[ebp][-4]> +ec EQU <[ebp][-ec_off]> +eStruct EQU <[ebp][-estruct_off]> + +OldEIP EQU <ebp+8> +OldCS EQU <ebp+12> +OldFlags EQU <ebp+16> + + +EmExcept PROC NEAR + + push ebp + mov ebp,esp + sub esp,estruct_off + + + ;* + ;* Set up SS access rights. + ;* + + push ds + mov [ec.ctx_RegEax], eax + GetEmData ds,ax + + mov eax, ss + lar eax, eax + mov [SSAR], eax + + ;* + ;* Fill in ExceptionContext structure. + ;* + + + mov dword ptr [ec.ContextFlags], NPX_CONTEXT_FULL + mov dword ptr [ec.ctx_Cr0NpxState], CR0_EM + mov [ec.ctx_RegEdi], edi + mov [ec.ctx_RegEsi], esi + + mov eax, [ebp] + mov [ec.ctx_RegEbp], eax + + lea eax, [OldFlags+4] + mov [ec.ctx_RegEsp], eax + + mov [ec.ctx_RegEbx], ebx + mov [ec.ctx_RegEdx], edx + mov [ec.ctx_RegEcx], ecx + + mov eax, [OldEIP] + + mov [ec.ctx_RegEip], eax + mov eax, [OldFlags] + mov [ec.ctx_EFlags], eax + + mov eax, [OldCS] + movzx eax,ax + mov [ec.ctx_SegCs], eax + mov ax,ss + movzx eax,ax + mov [ec.ctx_SegSs], eax + + pop eax + movzx eax,ax + mov [ec.ctx_SegDs], eax ; ds was pushed on entry. + + mov ax,es + movzx eax,ax + mov [ec.ctx_SegEs], eax + + mov ax,fs + movzx eax,ax + mov [ec.ctx_SegFs], eax + + mov ax,gs + movzx eax,ax + mov [ec.ctx_SegGs], eax + + lea esi, [ec] + add esi, ctx_env + + or EMSEG:[StatusWord], 8000H ; set 'busy' bit + or EMSEG:[SWerr], Summary ; set Summary bit + or EMSEG:[CURerr], Summary + + mov cl, EMSEG:[ErrMask] + push ecx + push ebp + call SaveState + pop ebp + pop ecx + + call GetEMSEGStatusWord ; EAX = status word + test al, cl ; test status word against mask + jne short Err00 + +ifdef TRACENPX + mov edx, 0C1020304h ; Raise bogus exception code, to trace with + jmp short Err50 +endif + mov al, Invalid + +; +; According to the floating error priority, we test what is the cause of +; the NPX error and raise an appropriate exception. +; + +Err00: + test al, Invalid ; Invalid Op? + jz short Err10 ; No, go check next + + mov edx, XCPT_FLOAT_INVALID_OPERATION + test al, StackFlag ; Stack fault? + jz short Err50 ; No, go raise invalid op + mov edx, XCPT_FLOAT_STACK_CHECK + jmp short Err50 ; Go raise stack fault + +Err10: mov edx, XCPT_FLOAT_DIVIDE_BY_ZERO + test al, ZeroDivide + jnz short Err50 + mov edx, XCPT_FLOAT_DENORMAL_OPERAND + test al, Denormal + jnz short Err50 + mov edx, XCPT_FLOAT_OVERFLOW + test al, Overflow + jnz short Err50 + mov edx, XCPT_FLOAT_UNDERFLOW + test al, Underflow + jnz short Err50 + mov edx, XCPT_FLOAT_INEXACT_RESULT + +Err50: mov [eStruct.ExceptionNum], edx + + xor eax,eax + mov [eStruct.fHandlerFlags], eax + mov [eStruct.NestedExceptionReportRecord], eax + mov dword ptr [eStruct.CParameters], 1 ; GeorgioP convention + mov [eStruct.ErExceptionInformation], eax ; GeorgioP convention + + mov eax, EMSEG:[PrevCodeOff] + mov [eStruct.ExceptionAddress], eax + + lea edx, [eStruct] + + lea eax, [ec] + push ds + push es + + +;TRUE, this is a first-chance exception + + stdCall _NtRaiseException,<edx, eax, 1> + stdCall _RtlRaiseStatus, <eax> + + pop es + pop ds + +RaiseExceptRet: + or eax, eax + JZ ExceptNotHandled + + ;* + ;* Copy new flags, cs, eip to new stack. + ;* + + mov ds, [ec.ctx_SegSs] + mov esi, [ec.ctx_RegEsp] ; ds:esi == new ss:esp + + mov eax, [ec.ctx_Eflags] ; set up iretd frame + mov [esi-4], eax + + mov eax, [ec.ctx_SegCs] + mov [esi-8], eax + + mov eax, [ec.ctx_RegEip] + mov [esi-12], eax + + ;* + ;* Put new stack pointer on stack. + ;* + + push ds + sub esi, 12 + push esi + + ;* + ;* Reset other registers. + ;* + + mov edi, [ec.ctx_RegEdi] + mov esi, [ec.ctx_RegEsi] + mov ebx, [ec.ctx_RegEbx] + mov edx, [ec.ctx_RegEdx] + mov ecx, [ec.ctx_RegEcx] + mov eax, [ec.ctx_RegEax] + mov ds, [ec.ctx_SegDs] + mov es, [ec.ctx_SegEs] + mov fs, [ec.ctx_SegFs] + mov gs, [ec.ctx_SegGs] + + mov ebp, [ec.ctx_RegEbp] ; must do this last. + + lss esp, fword ptr [esp] ; reset ss:esp + + sti ; JWM, 9/3/91 + push [esp+8] ; JWM, 9/6/91 + popfd ; JWM, 9/6/91 + + iretd ; reset flags, cs, eip + +ExceptNotHandled: +EmExcept ENDP + +endif ; ifdef NT386 + +ifdef DEBUG + +lab PageFault + mov al, byte ptr cs:[iax] + ret +endif diff --git a/private/ntos/dll/i386/emf386.asm b/private/ntos/dll/i386/emf386.asm new file mode 100644 index 000000000..40e92abda --- /dev/null +++ b/private/ntos/dll/i386/emf386.asm @@ -0,0 +1,552 @@ + subttl emf386.asm - 32 bit Emulator Interrupt Handler + page +;*** +;emf386.asm - 32 bit Emulator Interrupt Handler +; +; IBM/Microsoft Confidential +; +; Copyright (c) IBM Corporation 1987, 1989 +; Copyright (c) Microsoft Corporation 1987, 1989 +; +; All Rights Reserved +; +;Purpose: +; 32 bit Emulator Interrupt Handler +; +;Revision History: (also see emulator.hst) +; +; 1/21/92 JWM Minor modifications for DOSX32 emulator +; 8/23/91 TP Reduce to only two decoding steps +; +;******************************************************************************* + + +;*********************************************************************; +; ; +; Main Entry Point and Address Calculation Procedure ; +; ; +; 80386 version ; +; ; +;*********************************************************************; +; +; This routine fetches the 8087 instruction, calculates memory address +; if necessary into ES:ESI and calls a routine to emulate the instruction. +; Most of the dispatching is done through tables. (see comments in CONST) +; +; The instruction dispatching is designed to favor the 386 addressing modes + + +ifdef _DOS32EXT ; JWM +public __astart +__astart: + mov eax, 1 + ret + +public _Ms32KrnlHandler +_Ms32KrnlHandler: +endif + +ifdef NT386 + +; +; NPXEmulatorTable is a table read by the Windows/NT kernel in +; order to support the R3 emulator +; +public _NPXEMULATORTABLE +_NPXEMULATORTABLE label dword + dd offset NpxNpHandler ; Address of Ring3 Trap7 handler + dd offset tRoundMode ; Address of rounding vector table +endif + +public NPXNPHandler +NPXNPHandler: + +ifdef DEBUG + int 3 +endif + cld ; clear direction flag forever + +ifdef NT386 + + +;-- BUGBUG - bryanwi - 16Oct91 - Hack FP fix, not pointing IDT:7 at this +; routine for 16bit code is the right thing to do. +; +; Check to see if we are running on flat SS. If so, assume things +; are OK and proceed. (If a 16bit app loads the flat SS and then +; does an FP instruction, they're hosed, no skin off our nose.) +; +; If SS not what we expect, then either (a) a flat apps is *very* +; confused, or (b) a 16 bit app has hit an FP instuction. In either +; case, this emulator is not going to work. Therefore, raise an exception. +; + + push ax ; use form that will word with any SS + mov ax,ss + or ax,RPL_MASK + cmp ax,(KGDT_R3_DATA OR RPL_MASK) + pop ax + jz OK_Segment ; Segments are OK, proceed normally. + + jmp Around + +_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' + + align 4 + +EmerStk db 1024 dup (?) ; *** SaveContext is assumed to be +SaveContext db size ContextFrameLength dup (?) ; *** at the top of the EmerStk by +SaveException db size ExceptionRecordLength dup (?) ; *** the function @ 13f:0 + +_DATA ENDS + +Around: +; +; Trap occured in 16bit code, get to flat environment and raise exception +; + + push eax ; save EAX on old stack + mov ax, ds + push eax ; Save DS on old stack + + mov ax,(KGDT_R3_DATA OR RPL_MASK) + mov ds,ax + ASSUME DS:FLAT + + pop dword ptr [SaveContext.CsSegDs] ; remove ds from old stack + pop dword ptr [SaveContext.CsEax] ; remove eax from old stack + pop dword ptr [SaveContext.CsEip] ; copy eip from old stack + pop dword ptr [SaveContext.CsSegCs] ; copy cs from old stack + pop dword ptr [SaveContext.CsEflags] ; copy eflag from old stack + + push dword ptr [SaveContext.CsEFlags] ; restore eflag to old stack + push dword ptr [SaveContext.CsSegCs] ; restore cs to old stack + push dword ptr [SaveContext.CsEip] ; restore eip to old stack + mov dword ptr [SaveContext.CsEsp], esp + +; +; Build rest of context frame +; + + mov dword ptr [SaveContext.CsContextFlags],CONTEXT_CONTROL OR CONTEXT_SEGMENTS OR CONTEXT_INTEGER + mov dword ptr [SaveContext.CsEbx], ebx + mov dword ptr [SaveContext.CsEcx], ecx + mov dword ptr [SaveContext.CsEdx], edx + mov dword ptr [SaveContext.CsEsi], esi + mov dword ptr [SaveContext.CsEdi], edi + mov dword ptr [SaveContext.CsEbp], ebp + mov dword ptr [SaveContext.CsSegEs], es + mov dword ptr [SaveContext.CsSegFs], fs + mov dword ptr [SaveContext.CsSegGs], gs + mov dword ptr [SaveContext.CsSegSs], ss + + mov ss,ax ; Switch to new stack + mov esp,(OFFSET FLAT:EmerStk) + 1024 + ASSUME SS:FLAT + +; +; ss: flat, esp -> EmerStk +; + + mov ax,KGDT_R3_TEB OR RPL_MASK + mov fs, ax + mov ecx, fs:[TbVdm] + or ecx, ecx + jne short DoVdmFault + + mov ecx, offset SaveContext ; (ecx) -> context record + mov edx, offset SaveException ; (edx) -> exception record + + mov dword ptr [edx.ErExceptionCode],STATUS_ILLEGAL_FLOAT_CONTEXT + mov dword ptr [edx.ErExceptionFlags],0 + mov dword ptr [edx.ErExceptionRecord],0 + mov ebx, [ecx.CsEip] + mov [edx.ErExceptionAddress],ebx + mov [edx.ErNumberParameters],0 + +; +; ZwRaiseException(edx=ExceptionRecord, ecx=ContextRecord, TRUE=FirstChance) +; + + stdCall _ZwRaiseException, <edx, ecx, 1> + +; +; If we come back HERE, things are hosed. We cannot bugcheck because +; we are in user space, so int-3 and loop forever instead. +; + +Forever: + int 3 + jmp short Forever + +DoVdmFault: +; +; Does the VDM want the fault, or should the instruction be skipped +; + test ds:[ecx].VtVdmContext.CsFloatSave.FpCr0NpxState, CR0_EM + jz short SkipNpxInstruction + + add dword ptr [SaveContext.CsEsp], 12 ; remove from old stack + +; jump to the dos extender NPX exception handler + +; jmp far ptr 013fh:0 + db 0eah + dd 0 + dw 013fh + +SkipNpxInstruction: + mov ax,(KGDT_R3_DATA OR RPL_MASK) + mov es,ax + + stdCall _NpxNpSkipInstruction, <offset SaveContext> + + mov ebx, dword ptr [SaveContext.CsEbx] + mov ecx, dword ptr [SaveContext.CsEcx] + mov edx, dword ptr [SaveContext.CsEdx] + mov edi, dword ptr [SaveContext.CsEdi] + mov esi, dword ptr [SaveContext.CsEsi] + mov ebp, dword ptr [SaveContext.CsEbp] + mov gs, dword ptr [SaveContext.CsSegGs] + mov fs, dword ptr [SaveContext.CsSegFs] + mov es, dword ptr [SaveContext.CsSegEs] + + mov eax, dword ptr [SaveContext.CsEsp] + mov ss, dword ptr [SaveContext.CsSegSs] ; switch to original stack + mov esp, eax + + add esp, 12 ; remove eflag, cs, eip + push dword ptr [SaveContext.CsEflags] + push dword ptr [SaveContext.CsSegCs] + push dword ptr [SaveContext.CsEip] + mov eax, dword ptr [SaveContext.CsEax] + mov ds, dword ptr [SaveContext.CsSegDs] + + iretd ; restore eflag, cs, eip + +OK_Segment: +endif + + + push ds ; save segment registers + + GetEmData ds + + push EMSEG:[LongStatusWord] ;In case we're saving status + push EMSEG:[PrevCodeOff] ;In case we save environment +;Save registers in order of their index number + push edi + push esi + push ebp + push esp + add dword ptr [esp],regFlg-regESP ; adjust to original esp + push ebx + push edx + push ecx + push eax + + cmp EMSEG:[Einstall], 0 ; Make sure emulator is initialized. + je InstalEm + +EmInstalled: + mov edi,[esp].regEIP ;edi = 387 instruction address + movzx edx, word ptr cseg:[edi] ;dx = esc and opcode + +; Check for unmasked errors + mov al, EMSEG:[CURerr] ; fetch errors + and al, EMSEG:[ErrMask] + jnz short PossibleException + +; UNDONE: rip test for FWAIT in final version + cmp dl, 9bh ;FWAIT? + je sawFWAIT + +NoException: +Execute387inst: +;Enter here if look-ahead found another 387 instruction + mov EMSEG:[PrevCodeOff],edi + mov EMSEG:[CurErrCond],0 ;clear error and cond. codes, show busy + add edi, 2 ; point past opcode + +;CONSIDER: remove the two instruction below and muck with EA386Tab +;CONSIDER: to optimize for mem ops instead of reg ops. + add dh,40h ; No effective address? + jc NoEffectiveAddress0 ; yes, go do instruction + rol dh,2 ; rotate MOD field next to r/m field + mov bl,dh + and ebx,1FH ; Mask to MOD and r/m fields +MemModeDispatch: ;Label for debugging + jmp EA386Tab[4*ebx] + + +InstalEm: + call EmulatorInit + mov edi,DefaultControlWord ; Default mode to start in + mov eax, edi + call SetControlWord ; Set it + mov EMSEG:[LongControlWord], edi ; reset reserved bits + jmp EmInstalled + +; ************************ + +; +; We are about to execute a new FP instruction and there is an +; unmasked expcetion. Check to see if the new FP instruction is +; a "no wait" instruction. If so, let it proceede; otherwise, raise +; the exception. +; + +PossibleException: + cmp edx, 0E3DBh ; if fninit, no exception + je short NoException + + cmp edx, 0E2DBh ; if fnclex, no exception + je short NoException + + cmp edx, 0E0DFh ; if "fnstsw ax", no exception + je short NoException + + cmp dl, 0D9h ; possible encoding for fnstenv or fnstcw? + je short pe20 ; yes, check mod r/m + cmp dl, 0DDh ; possible encoding for fnsave or fnstsw? + jne short pe30 + +pe20: mov bl, dh ; bl = op2 + shr bl, 3 + and bl, 7 ; bl = mod r/m + cmp bl, 6 ; is it a 6 or 7? + jnc short NoException ; yes, no exception + +pe30: + jmp CommonExceptions ; unmasked exception is pending, raise it + +; ************************ + + + +; 386 address modes + +; SIB does not handle SS overrides for ebp + +SIB macro modval + local SIBindex,SIBbase + + movzx ebx,byte ptr cseg:[edi] ; ebx = SIB field + inc edi ; bump past SIB field + mov eax,ebx + and al,7 ; mask down to base register + +if modval eq 0 + cmp al,5 ; base = ebp + jne short SIBbase ; yes - get base register value + mov eax,cseg:[edi] ; eax = disp32 + add edi,4 ; bump past displacement + SKIP 3,SIBindex +endif + +SIBbase: + mov eax,[esp+4*eax] ; eax = base register value + +SIBindex: + mov [esp].regESP,0 ; no esp indexing allowed + mov cl,bl + shr cl,6 ; cl = scale factor + and bl,7 shl 3 ; ebx = 8 * index register + shr bl,1 + mov esi,[esp+1*ebx] ; esi = index register value + shl esi,cl ; esi = scaled index register value + add esi,eax ; esi = SIB address value + endm + + + ALIGN 4 + +SIB00: + SIB 00 ; decode SIB field + jmp CommonMemory + + ALIGN 4 + +SIB01: + SIB 01 ; decode SIB field + movsx eax,byte ptr cseg:[edi] + inc edi + add esi,eax + jmp short CommonMemory + + ALIGN 4 + +SIB10: + SIB 10 ; decode SIB field + mov eax,cseg:[edi] + add edi,4 + add esi,eax + jmp short CommonMemory + + +; 386 single register addressing + + ALIGN 4 + +Exx00: + and bl,7 shl 2 ; mask off mod bits + mov esi,[esp+1*ebx] + jmp short CommonMemory + + ALIGN 4 + +Exx01: + and bl,7 shl 2 ; mask off mod bits + mov esi,[esp+1*ebx] + movsx eax,byte ptr cseg:[edi] + inc edi + add esi,eax + jmp short CommonMemory + + ALIGN 4 + +Exx10: + and bl,7 shl 2 ; mask off mod bits + mov esi,[esp+1*ebx] + add esi,cseg:[edi] + add edi,4 + jmp short CommonMemory + + +; 386 direct addressing + + ALIGN 4 + +Direct386: + mov esi,cseg:[edi] + add edi,4 + +CommonMemory: + MOV [esp].regEIP,edi ; final return offset + + +; At this point ESI = memory address, dx = |Op|r/m|MOD|escape|MF|Arith| +; Current format of opcode and address mode bytes (after rol dh,2) +; +; 7 6 5 4 3 2 1 0 +; |1 1 0 1 1| op1 | dl +; +; 7 6 5 4 3 2 1 0 +; | op2 | r/m |mod| dh +; +;op1 and op2 fields together make the FP opcode + + rol dx,5 ; dl = | op1 | op2 |? ?| + and edx,0FCH ;Keep only op1 & op2 bits + push offset EMLFINISH + mov edi,EMSEG:[CURstk] +MemOpDisp: ;Debugging label +;edi = [CURstk] + jmp tOpMemDisp[edx] + + + ALIGN 4 + + +NoEffectiveAddress0: + rol dh,2 +NoEffectiveAddress: ; Either Register op or Miscellaneous + mov [esp].regEIP,edi ; final return offset + +;Current format of opcode and address mode bytes (after rol dh,2) +; +; 7 6 5 4 3 2 1 0 +; |1 1 0 1 1| op1 | dl +; +; 7 6 5 4 3 2 1 0 +; | op2 | r/m |mod| dh +; +;op1 and op2 fields together make the FP opcode + + mov al,dh ;Save r/m bits (contains reg. no.) + rol dx,5 ; dl = | op1 | op2 |? ?| + and edx,0FCH ;Keep only op1 & op2 bits + push offset EMLFINISH + and eax,7 shl 2 ;Mask to register number * 4 + mov edi,EMSEG:[CURstk] + lea esi,[2*eax+eax] ;Register no. * 12 + add esi,edi + cmp esi,ENDstk ;Run past end? + jae RegWrap +RegOpDisp: ;Debugging label +;eax = r/m bits * 4 +;esi = FP register address +;edi = [CURstk] + jmp tOpRegDisp[edx] + + ALIGN 4 +RegWrap: + sub esi,ENDstk - BEGstk ;Wrap around JWM +RegOpDispWrap: ;Debugging label + jmp tOpRegDisp[edx] + + +SawFwait: + inc edi ; bump past FWAIT + mov [esp].regEIP,edi ; final return offset + mov EMSEG:[CURErr],0 ; clear current error and cond. codes + +; return from routine; restore registers and return + + align 4 +EMLFINISH: +; check for errors + mov al, EMSEG:[CURerr] ; fetch errors + or al, EMSEG:[SWerr] + mov EMSEG:[SWerr],al ; set errors in sticky error flag + and al,EMSEG:[ErrMask] + jnz CommonExceptions + +ifdef TRACENPX + jmp CommonExceptions +endif + +if DBG eq 0 + +; +; On a free build, look ahead to next instruction +; + +;09BH is FWAIT - just skip it +;0D8H - 0DFH is 387 instruction, emulate it + mov edi,[esp].regEIP ;edi = 387 instruction address + mov dx,cseg:[edi] + cmp dl,09BH ;FWAIT? + jz short SawFwait + sub dl,0D8H + cmp dl,8 + jb ReExecute +endif + mov ebx,[esp].[OldLongStatus] + and ebx,LongSavedFlags ;preserve condition codes, error flags + or EMSEG:[LongStatusWord],ebx ;merge saved status word, condition codes + + pop eax + pop ecx + pop edx + pop ebx + add esp,4 ; toss esp value + pop ebp + pop esi + pop edi + add esp,8 ;toss old PrevCodeOff and StatusWord + mov EMSEG:[CURerr],Summary ;Indicate we are not busy + pop ds + error_return ; common exit sequence + +ReExecute: + mov eax,EMSEG:[LongStatusWord] + mov ebx,[esp].[OldLongStatus] + and ebx,LongSavedFlags ;preserve condition codes, error flags + or eax,ebx ;merge saved status word, condition codes + mov [esp].OldLongStatus,eax + mov eax,EMSEG:[PrevCodeOff] + mov [esp].OldCodeOff,eax + lea eax,[esp+regFlg+4] ;must restore "saved" esp + mov [esp].RegEsp,eax + jmp Execute387inst diff --git a/private/ntos/dll/i386/emfadd.asm b/private/ntos/dll/i386/emfadd.asm new file mode 100644 index 000000000..cdd0a8f58 --- /dev/null +++ b/private/ntos/dll/i386/emfadd.asm @@ -0,0 +1,396 @@ + subttl emfadd.asm - Addition and Subtraction + page +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emfadd.asm - long double add and subtract +; by Tim Paterson +; +;Purpose: +; Long double add/subtract. +;Outputs: +; Jumps to [RoundMode] to round and store result. +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + +;******************************************************************************* +; Dispatch for Add/Sub/Subr +; +; Signs are passed in dx: +; xor source sign with dl +; xor dest sign with dh +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty +;Any special case routines not found in this file are in emarith.asm +tFaddDisp label dword ;Source (reg) Dest (*[di]) + dd AddDouble ;single single + dd AddDouble ;single double + dd AddSourceSign ;single zero + dd AddSpclDest ;single special + dd AddDouble ;double single + dd AddDouble ;double double + dd AddSourceSign ;double zero + dd AddSpclDest ;double special + dd AddDestSign ;zero single + dd AddDestSign ;zero double + dd AddZeroZero ;zero zero + dd AddSpclDest ;zero special + dd AddSpclSource ;special single + dd AddSpclSource ;special double + dd AddSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd AddTwoInf ;Two infinities + +EM_ENTRY eFISUB16 +eFISUB16: + call Load16Int + mov dx,bSign ;Change sign of source + jmp AddSetResult + +EM_ENTRY eFISUBR16 +eFISUBR16: + call Load16Int + mov dx,bSign shl 8 ;Change sign of dest + jmp AddSetResult + +EM_ENTRY eFIADD16 +eFIADD16: + call Load16Int + xor edx,edx ;Both signs positive + jmp AddSetResult + +EM_ENTRY eFISUB32 +eFISUB32: + call Load32Int + mov dx,bSign ;Change sign of source + jmp AddSetResult + +EM_ENTRY eFISUBR32 +eFISUBR32: + call Load32Int + mov dx,bSign shl 8 ;Change sign of dest + jmp AddSetResult + +EM_ENTRY eFIADD32 +eFIADD32: + call Load32Int + xor edx,edx ;Both signs positive + jmp AddSetResult + +EM_ENTRY eFSUB32 +eFSUB32: + call Load32Real + mov dx,bSign ;Change sign of source + jmp AddSetResult + +EM_ENTRY eFSUBR32 +eFSUBR32: + call Load32Real + mov dx,bSign shl 8 ;Change sign of dest + jmp AddSetResult + +EM_ENTRY eFADD32 +eFADD32: + call Load32Real + xor edx,edx ;Both signs positive + jmp AddSetResult + +EM_ENTRY eFSUB64 +eFSUB64: + call Load64Real + mov dx,bSign ;Change sign of source + jmp AddSetResult + +EM_ENTRY eFSUBR64 +eFSUBR64: + call Load64Real + mov dx,bSign shl 8 ;Change sign of dest + jmp AddSetResult + +EM_ENTRY eFADD64 +eFADD64: + call Load64Real + xor edx,edx ;Both signs positive + jmp AddSetResult + + +PolyAddDouble: +;This entry point is used by polynomial evaluator. +;It checks the operand in registers for zero, and doesn't require +;signs to be set up in dx. +; +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +;edi = pointer to op2 in ds + xor edx,edx ;Addition + cmp cl,bTAG_ZERO ;Adding to zero? + jnz AddDouble +;Number in registers is zero, so just return value from memory. + mov ecx,EMSEG:[edi].ExpSgn + mov ebx,EMSEG:[edi].lManHi + mov esi,EMSEG:[edi].lManLo + ret + +EM_ENTRY eFSUBPreg +eFSUBPreg: + push offset PopWhenDone + +EM_ENTRY eFSUBreg +eFSUBreg: + xchg esi,edi + +EM_ENTRY eFSUBtop +eFSUBtop: + mov dx,bSign ;Change sign of source + jmp AddHaveSgn + +EM_ENTRY eFSUBRPreg +eFSUBRPreg: + push offset PopWhenDone + +EM_ENTRY eFSUBRreg +eFSUBRreg: + xchg esi,edi + +EM_ENTRY eFSUBRtop +eFSUBRtop: + mov dx,bSign shl 8 ;Change sign of dest + jmp AddHaveSgn + + +InsignifAdd: + mov eax,1 ;Set sticky bit + shl ch,1 ;Get sign, CY set IFF subtracting mant. + jnc ReturnOp1 + sub esi,eax ;Subtract 1 from mantissa + sbb ebx,0 + neg eax +ReturnOp1: +;ebx:esi:eax = normalized unrounded mantissa +;high half of ecx = exponent +;high bit of ch = sign + jmp EMSEG:[RoundMode] + +EM_ENTRY eFADDPreg +eFADDPreg: + push offset PopWhenDone + +EM_ENTRY eFADDreg +eFADDreg: + xchg esi,edi + +EM_ENTRY eFADDtop +eFADDtop: + xor edx,edx ;Both signs positive +AddHaveSgn: + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo +AddSetResult: + mov ebp,offset tFaddDisp + mov EMSEG:[Result],edi ;Save result pointer + mov al,cl + mov ah,EMSEG:[edi].bTag + test ax,ZEROorSPCL * 100H + ZEROorSPCL + jnz TwoOpDispatch + +;.erre AddDouble eq $ ;Fall into AddDouble + +;********* +AddDouble: +;********* +; +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;dl = sign change for op1 +;dh = sign change for op2 +;edi = pointer to op2 + + xor ch,dl ;Flip sign if subtracting + mov eax,EMSEG:[edi].ExpSgn + xor ah,dh ;Flip sign if subtracting + mov edx,EMSEG:[edi].lManHi + mov edi,EMSEG:[edi].lManLo + +AddDoubleReg: +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7 + + cmp eax,ecx ;Compare exponents +.erre TexpBias eq 0 ;Not biased, use signed jump + jle short HavLg ;op1 is larger, we have the right order + xchg esi,edi + xchg ebx,edx + xchg eax,ecx +HavLg: +;Larger in ebx:esi. Note that if the exponents were equal, things like +;the sign bit or tag may have determined which is "larger". It doesn't +;matter which is which if the exponents are equal, however. + and ah,80H ;Keep sign bit + sar ch,1 ;Extend sign into bit 6 of byte + xor ch,ah ;See if signs are the same + xor ax,ax ;Clear out sign and tag + neg eax ;ax still 0 + add eax,ecx ;Get exponent difference + shr eax,16 ;Bring exp. difference down to low end + jz short Aligned + cmp eax,64+1 ;Is difference in range? +;CONSIDER: tell me again why 1/4 LSB could have effect. It seems like +;CONSIDER: 1/2 LSB is the limit. + ja short InsignifAdd ; (Even 1/4 LSB could have effect) + mov cl,al ;Shift count to cl +;High half ecx = exponent +;ch bit 7 = sign difference +;ch bit 6 = sign +;cl = shift count + xor eax,eax ;Prepare to take bits shifted out + cmp cl,32 ;More than a whole word? + jb short ShortShift + xchg eax,edx ;Save bits shifted out in eax + xchg edi,eax + sub cl,32 + cmp cl,8 ;Safe to shift this much + jb short ShortSticky +;Collapse all (sticky) bits of eax into LSB of edi + neg eax ;Sets CY if eax was not zero + sbb eax,eax ;-1 if CY was set, zero otherwise + neg eax ;Sticky bit in LSB only + or di,ax ;Move sticky bit up + cmp cl,32 ;Less than another Dword? + jb short ShortShift + mov eax,edi + xor edi,edi ;edx = edi = 0 +ShortSticky: +;Shift will not be more than 8 bits + or ah,al ;Move up sticky bits +ShortShift: + shrd eax,edi,cl ;Save bits shifted out in eax + shrd edi,edx,cl + shr edx,cl +Aligned: + shl ch,1 ;Were signs the same? + jc short SubMant ;No--go subtract mantissas +;Add mantissas + add esi,edi + adc ebx,edx + jnc short AddExit +;Addition of mantissas overflowed. Bump exponent and shift right + shrd eax,esi,1 + shrd esi,ebx,1 ;Faster than RCR + sar ebx,1 + or ebx,1 shl 31 ;Set MSB + add ecx,1 shl 16 +AddExit: +;ebx:esi:eax = normalized unrounded mantissa +;high half of ecx = exponent +;high bit of ch = sign + jmp EMSEG:[RoundMode] + +NegMant: +;To get here, exponents must have been equal and op2 was bigger than op1. +;Note that this means nothing ever got shifted into eax. + not ch ;Change sign of result + not ebx + neg esi + sbb ebx,-1 + js short AddExit ;Already normalized? + test ebx,40000000H ;Only one bit out of normal? + jz short NormalizeAdd + jmp short NormOneBit + +SubMant: +;Subtract mantissas + neg eax ;Pretend minuend is zero extended + sbb esi,edi + sbb ebx,edx + jc short NegMant + js short AddExit ;Already normalized? +NormChk: + test ebx,40000000H ;Only one bit out of normal? + jz short NormalizeAdd +;One bit normalization +NormOneBit: + sub ecx,1 shl 16 ;Adjust exponent +ShiftOneBit: ;Entry point from emfmul.asm + shld ebx,esi,1 + shld esi,eax,1 + shl eax,1 + jmp EMSEG:[RoundMode] + +;*********** +AddZeroZero: ;Entry point for adding two zeros +;*********** + mov ah,EMSEG:[edi].bSgn ;Get sign of op + xor ch,dl ;Possibly subtracting source + xor ah,dh ;Possibly subtracting dest + xor ch,ah ;Do signs match? + js FindZeroSign ;No - use rounding mode to set sign + mov EMSEG:[edi].bSgn,ah ;Correct the sign if subtracting + ret ;Result at [edi] is now correct + +ZeroChk: +;Upper 64 bits were all zero, but there could be 1 bit in the MSB +;of eax. + or eax,eax + jnz short OneBitLeft + mov ebx,eax + mov esi,eax ;Zero mantissa +FindZeroSign: +;Round to -0 if "round down" mode, round to +0 otherwise + xor ecx,ecx ;Zero exponent, positive sign + mov dl,EMSEG:[CWcntl] ;Get control word + and dl,RoundControl + cmp dl,RCdown ;Rounding down? + jnz ZeroJmp + mov ch,80H ;Set sign bit +ZeroJmp: + mov cl,bTAG_ZERO + jmp EMSEG:[ZeroVector] + +OneBitLeft: + xchg ebx,eax ;Bit now normalized + sub ecx,64 shl 16 ;Adjust exponent + jmp EMSEG:[RoundMode] + +NormalizeAdd: +;Inputs: +; ebx:esi:eax = 65-bit number +; ecx high half = exponent +; +;Since we are more than 1 bit out of normalization, exponents must have +;differed by 0 or 1. Thus rounding will not be necessary for 64 bits. + bsr edx,ebx ;Scan for MSB + jnz short ShortNorm + bsr edx,esi + jz short ZeroChk + sub ecx,32 shl 16 ;Adjust exponent + mov ebx,esi ;Push it up 32 bits + mov esi,eax +ShortNorm: +;Bit number in edx ranges from 0 to 31 + mov cl,dl + not cl ;Convert bit number to shift count + shld ebx,esi,cl + shld esi,eax,cl + shl edx,16 ;Move exp. adjustment to high end + lea ecx,[ecx+edx-(31 shl 16)] ;Adjust exponent + xor eax,eax ;No extra bits + jmp EMSEG:[RoundMode] + +AddDestSign: + xor EMSEG:[edi].bSgn,dh + ret + +AddSourceSign: + xor ch,dl + jmp SaveResult diff --git a/private/ntos/dll/i386/emfcom.asm b/private/ntos/dll/i386/emfcom.asm new file mode 100644 index 000000000..43b5672c6 --- /dev/null +++ b/private/ntos/dll/i386/emfcom.asm @@ -0,0 +1,402 @@ + subttl emfcom.asm - Comparison Instructions + page +;******************************************************************************* +;emfcom.asm - Comparison Instructions +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FCOM,FCOMP,FCOMPP,FUCOM,FUCOMP,FUCOMPP,FTST,FXAM instructions +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;******************************************************************************* +;Dispatch table for compare +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty +;Any special case routines not found in this file are in emarith.asm +tFcomDisp label dword ;Source (reg) Dest (*[di] = ST) + dd ComDouble ;single single + dd ComDouble ;single double + dd ComDestZero ;single zero + dd ComSpclDest ;single special + dd ComDouble ;double single + dd ComDouble ;double double + dd ComDestZero ;double zero + dd ComSpclDest ;double special + dd ComSrcZero ;zero single + dd ComSrcZero ;zero double + dd ComEqual ;zero zero + dd ComSpclDest ;zero special + dd ComSpclSource ;special single + dd ComSpclSource ;special double + dd ComSpclSource ;special zero + dd ComBothSpcl ;special special + + +EM_ENTRY eFICOMP16 +eFICOMP16: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + push offset ComOpLoaded + jmp Load16Int ;Returns to ComOpLoaded + +EM_ENTRY eFICOM16 +eFICOM16: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComOpLoaded + jmp Load16Int ;Returns to ComOpLoaded + +EM_ENTRY eFICOMP32 +eFICOMP32: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + push offset ComOpLoaded + jmp Load32Int ;Returns to ComOpLoaded + +EM_ENTRY eFICOM32 +eFICOM32: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComOpLoaded + jmp Load32Int ;Returns to ComOpLoaded + +EM_ENTRY eFCOMP32 +eFCOMP32: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + push offset ComOpLoaded + jmp Load32Real ;Returns to ComOpLoaded + +EM_ENTRY eFCOM32 +eFCOM32: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComOpLoaded + jmp Load32Real ;Returns to ComOpLoaded + +EM_ENTRY eFCOMP64 +eFCOMP64: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + push offset ComOpLoaded + jmp Load64Real ;Returns to ComOpLoaded + +EM_ENTRY eFCOM64 +eFCOM64: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComOpLoaded + jmp Load64Real ;Returns to ComOpLoaded + +EM_ENTRY eFUCOMPP +eFUCOMPP: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComPop2 + jmp eFUCOM0 + +EM_ENTRY eFUCOMP +eFUCOMP: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + jmp eFUCOM0 + +EM_ENTRY eFUCOM +eFUCOM: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 +eFUCOM0: +;esi = pointer to st(i) from instruction field +;edi = [CURstk] + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo + mov dl,40H ;Flag FUCOM - Look for SNAN + jmp UComOpLoaded + +EM_ENTRY eFCOMPP +eFCOMPP: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset ComPop2 + jmp eFCOM0 + +EM_ENTRY eFCOMP +eFCOMP: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + push offset PopWhenDone + jmp eFCOM0 + +EM_ENTRY eFCOM +eFCOM: + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 +eFCOM0: +;esi = pointer to st(i) from instruction field +;edi = [CURstk] + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo + +ComOpLoaded: +; mov EMSEG:[UpdateCCodes],1 + mov dl,0 ;flag FCOM - Look for any NAN +UComOpLoaded: + mov ebp,offset tFcomDisp + mov al,cl + mov ah,EMSEG:[edi].bTag + test ax,ZEROorSPCL * 100H + ZEROorSPCL + jnz TwoOpDispatch + +;.erre ComDouble eq $ ;Fall into ComDouble + +;********* +ComDouble: +;********* +; +;ebx:esi = op1 mantissa +;ecx = op1 sign in bit 15, exponent in high half +;edi = pointer to op2 + mov eax,EMSEG:[edi].ExpSgn + and ax,bSign shl 8 ;Keep sign only + and cx,bSign shl 8 + cmp ah,ch ;Are signs the same? + jnz StBigger + cmp eax,ecx ;Are exponents the same? + jl StSmaller + jg StBigger + cmp EMSEG:[edi].lManHi,ebx ;Compare mantissas + jnz MantDif + cmp EMSEG:[edi].lManLo,esi ;Set flags for ST - src + jz ComEqual +MantDif: + adc al,al ;Copy CY flag to bit 0 + rol ah,1 ;Rotate sign to bit 0 + xor al,ah ;Flip saved CY bit if negative + mov EMSEG:[SWcc],al ;Set condition code + ret + +StSmaller: + not ah +StBigger: +;ah = sign of ST +;ch = sign of other operand +;ST is bigger if it is positive (smaller if it is negative). +;Use the sign bit directly as the "less than" bit C0. +.erre C0 eq 1 + shr ah,7 ;Bring sign down to bit 0, clear CY + mov EMSEG:[SWcc],ah ;Bit set if ST smaller (negative) + ret + +ComEqual: + mov EMSEG:[SWcc],CCequal + ret + + + +PopWhenDone: +.erre bTAG_NOPOP eq -1 + inc cl ;OK to pop? + jz ComPopX ;No - had unmasked Invalid Operation + + POPSTret + +ComPop2: +.erre bTAG_NOPOP eq -1 + inc cl ;OK to pop? + jz ComPopX ;No - had unmasked Invalid Operation + mov esi,EMSEG:[CURstk] + mov EMSEG:[esi].bTag,bTAG_EMPTY + add esi,Reg87Len*2 + cmp esi,ENDstk ;JWM + je PopOneOver + ja PopTwoOver + mov EMSEG:[esi-Reg87Len].bTag,bTAG_EMPTY + mov EMSEG:[CURstk],esi +ComPopX: + ret + +PopOneOver: + mov EMSEG:[CURstk],BEGstk ;JWM +ifdef NT386 + mov EMSEG:[INITstk].bTAG,bTAG_EMPTY +else + mov EMSEG:[XINITstk].bTAG,bTAG_EMPTY +endif + ret + +PopTwoOver: + mov EMSEG:[CURstk],BEGstk+Reg87Len ;JWM +ifdef NT386 + mov EMSEG:[BEGstk].bTAG,bTAG_EMPTY +else + mov EMSEG:[XBEGstk].bTAG,bTAG_EMPTY +endif + ret + +;******************************************************************************* +;Special cases for FCOM/FUCOM. +;These don't share with those in emarith.asm because NANs are treated +;differently. +ComDestZero: +;ST is zero, so Src is bigger if it is positive (smaller if it is negative). +;Use the sign bit directly as the "less than" bit C0. + not ch ;C0 is 1 if ST < Src +.erre C0 eq 1 + shr ch,7 ;Bring sign down to bit 0 + mov EMSEG:[SWcc],ch ;Bit set if Src smaller (negative) + ret + +ComSrcZero: +;ST is bigger if it is positive (smaller if it is negative). +;Use the sign bit directly as the "less than" bit C0. + mov al,EMSEG:[edi].bSgn +.erre C0 eq 1 + shr al,7 ;Bring sign down to bit 0 + mov EMSEG:[SWcc],al ;Bit set if ST smaller (negative) + ret + +ComSpclSource: + cmp cl,bTAG_NAN + jz ComSrcNAN + cmp cl,bTAG_INF + jz ComDestZero + cmp cl,bTAG_DEN + jz ComDenormal +;Must be empty +ComEmpty: + mov EMSEG:[CURerr],Invalid+StackFlag + jmp ComChkMask + +ComSrcNAN: + shl edx,24 ;Move dl to high byte + test ebx,edx ;See if we report error with this NAN +ComChkNAN: + jnz Incomp +ComInvalid: + mov EMSEG:[CURerr],Invalid ;Flag the error +ComChkMask: + test EMSEG:[CWmask],Invalid ;Is exception masked? + jnz Incomp + mov cl,bTAG_NOPOP ;Unmasked, don't pop stack +Incomp: + mov EMSEG:[SWcc],CCincomprable + ret + +ComSpclDest: + mov al,EMSEG:[edi].bTag + cmp al,bTAG_INF + jz ComSrcZero + cmp al,bTAG_Empty + jz ComEmpty + cmp al,bTAG_DEN + jz ComDenormal +;Must be NAN +ComDestNAN: + test EMSEG:[edi].bMan7,dl ;See if we report error with this NAN + jmp ComChkNAN + +ComBothSpcl: + mov al,EMSEG:[edi].bTag + cmp cl,bTAG_EMPTY + jz ComEmpty + cmp al,bTAG_EMPTY + jz ComEmpty + cmp cl,bTAG_NAN + jz ComSrcNAN + cmp al,bTAG_NAN + jz ComDestNAN + mov ah,cl + cmp ax,(bTAG_INF shl 8) + bTag_INF ;Are both Infinity? + jz ComDouble ;If so, compare their signs +;Must have at least one denormal +ComDenormal: + or EMSEG:[CURerr],Denormal + jmp ComDouble + +;******************************************************************************* + +XAM_Unsupported equ 0 +XAM_NAN equ C0 +XAM_Norm equ C2 +XAM_Inf equ C2+C0 +XAM_Zero equ C3 +XAM_Empty equ C3+C0 +XAM_Den equ C3+C2 + +tXamTag label byte +.erre TAG_SNGL eq $-tXamTag + db XAM_Norm ;TAG_SNGL +.erre TAG_VALID eq $-tXamTag + db XAM_Norm ;TAG_VALID +.erre TAG_ZERO eq $-tXamTag + db XAM_Zero ;TAG_ZERO +.erre TAG_EMPTY eq $-tXamTag + db XAM_Empty ;TAG_EMPTY + db 0 + db 0 + db 0 +.erre TAG_INF eq $-tXamTag + db XAM_Inf ;TAG_INF + db 0 + db 0 + db 0 +.erre TAG_NAN eq $-tXamTag + db XAM_NAN ;TAG_NAN + db 0 + db 0 + db 0 +.erre TAG_DEN eq $-tXamTag + db XAM_Den ;TAG_DEN + +EM_ENTRY eFXAM +eFXAM: +;edi = [CURstk] + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + mov eax,EMSEG:[edi].ExpSgn ;Get sign and tag + mov bl,ah ;Save sign + and bl,bSign ;Keep only sign bit + and eax,0FH ;Save low 4 bits of tag + mov al,tXamTag[eax] ;Lookup cond. codes for this tag +.erre C1 eq 2 ;Bit 1 +.erre bSign eq 80H ;Bit 7 + shr bl,7-1 ;Move sign bit to CC C1 + or al,bl + mov EMSEG:[SWcc],al + ret + +;******************************************************************************* + +EM_ENTRY eFTST +eFTST: +;edi = [CURstk] + and [esp].[OldLongStatus+4],NOT(ConditionCode SHL 16) ;clear C0,C1,C2,C3 + mov eax,EMSEG:[edi].ExpSgn + cmp al,bTAG_ZERO + jz ComEqual + ja TestSpcl +;Either single or double, non-zero. Just check sign. +TestSign: + shr ah,7 ;Bring sign down to bit 0 + mov EMSEG:[SWcc],ah ;Bit set if negative + ret + +TestSpcl: + cmp al,bTAG_INF + jz TestSign ;Normal test for Infinity + cmp al,bTAG_EMPTY + jz ComEmpty + cmp al,bTAG_NAN + jz ComInvalid +;Must be denormal + mov EMSEG:[CURerr],Denormal + jmp TestSign diff --git a/private/ntos/dll/i386/emfconst.asm b/private/ntos/dll/i386/emfconst.asm new file mode 100644 index 000000000..210cccf65 --- /dev/null +++ b/private/ntos/dll/i386/emfconst.asm @@ -0,0 +1,126 @@ + subttl emfconst.asm - Loading of 387 on chip constants + page +;******************************************************************************* +;emfconst.asm - Loading of 387 on chip constants +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FLDZ, FLD1, FLDPI, FLDL2T, FLDL2E, FLDLG2, FLDLN2 instructions +;Inputs: +; edi = [CURstk] +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + + PrevStackWrap edi,Ld1 ;Tied to PrevStackElem below + +EM_ENTRY eFLD1 +eFLD1: +;edi = [CURstk] + PrevStackElem edi,Ld1 ;Point to receiving location + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty? + jnz FldErr ;in emload.asm + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].lManHi,1 shl 31 + mov EMSEG:[edi].ExpSgn,bTAG_SNGL ;Exponent and sign are zero + ret + + + PrevStackWrap edi,Ldz ;Tied to PrevStackElem below + +EM_ENTRY eFLDZ +eFLDZ: +;edi = [CURstk] + PrevStackElem edi,Ldz ;Point to receiving location + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty? + jnz FldErr ;in emload.asm + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].lManHi,0 + mov EMSEG:[edi].ExpSgn,bTAG_ZERO ;Exponent and sign are zero + ret + +;******************************************************************************* + +;The 5 irrational constants need to be adjusted according to rounding mode. + +DefConst macro cName,low,high,expon,round +c&cName&lo equ low +c&cName&hi equ high +c&cName&exp equ expon +c&cName&rnd equ round + endm + +DefConst FLDL2T,0CD1B8AFEH,0D49A784BH,00001H,0 + +DefConst FLDL2E,05C17F0BCH,0B8AA3B29H,00000H,1 + +DefConst FLDLG2,0FBCFF799H,09A209A84H,0FFFEH,1 + +DefConst FLDLN2,0D1CF79ACH,0B17217F7H,0FFFFH,1 + +DefConst FLDPI,02168C235H,0C90FDAA2H,00001H,1 + + +LoadConstant macro cName,nojmp +EM_ENTRY e&cName +e&cName: + mov ebx,c&cName&hi + mov edx,c&cName&lo + mov ecx,c&cName&exp shl 16 + c&cName&rnd +ifb <nojmp> + jmp CommonConst +endif + endm + +LoadConstant FLDL2T + +LoadConstant FLDL2E + +LoadConstant FLDLG2 + +LoadConstant FLDLN2 + +LoadConstant FLDPI,nojmp + +CommonConst: +;ebx:edx = mantissa of constant, rounded to nearest +;high ecx = exponent +;ch = sign +;cl = rounding flag: 1 indicates roundup occured for round nearest, else 0 +;edi = [CURstk] + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearConst ;Adjust constant if not round nearest +StoreConst: + mov cl,bTAG_VALID + mov esi,edx + jmp FldCont ;In emload.asm + +NotNearConst: +;It is known that the five constants positive irrational numbers. +;This means they are never exact, and chop and round down always +;produce the same answer. It is also know that the values are such +;that rounding only alters bits in the last byte. +; +;A flag in cl indicates if the number has been rounded up for round +;nearest (1 = rounded up, 0 = rounded down). In chop and round down +;modes, this flag can be directly subtracted to reverse the rounding. +;In round up mode, we want to add (1-flag) = -(flag-1). +.erre RCchop eq 0CH ;Two bits set only for chop + test EMSEG:[CWcntl],RCdown ;DOWN bit set? + jnz DirectRoundConst ;If so, it's chop or down +;Round Up mode + dec cl ;-1 if round up needed, else 0 +DirectRoundConst: + sub dl,cl ;Directed rounding + jmp StoreConst diff --git a/private/ntos/dll/i386/emfdiv.asm b/private/ntos/dll/i386/emfdiv.asm new file mode 100644 index 000000000..a06b910f2 --- /dev/null +++ b/private/ntos/dll/i386/emfdiv.asm @@ -0,0 +1,473 @@ + subttl emfdiv.asm - Division + page +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emfdiv.asm - long double divide +; by Tim Paterson +; +;Purpose: +; Long double division. +;Inputs: +; ebx:esi = op1 mantissa +; ecx = op1 sign in bit 15, exponent in high half +; edi = pointer to op2 and result location +; [Result] = edi +; +; Exponents are unbiased. Denormals have been normalized using +; this expanded exponent range. Neither operand is allowed to be zero. +;Outputs: +; Jumps to [RoundMode] to round and store result. +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;Dispatch tables for division +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). edi points to dividend for fdiv, +;to divisor for fdivr. +; +;Tag of source is shifted. Tag values are as follows: +; +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;dest = dest / source +tFdivDisp label dword ;Source (reg) Dest (*[di]) + dd DivSingle ;single single + dd DivSingle ;single double + dd XorDestSign ;single zero + dd DivSpclDest ;single special + dd DivDouble ;double single + dd DivDouble ;double double + dd XorDestSign ;double zero + dd DivSpclDest ;double special + dd DivideByZero ;zero single + dd DivideByZero ;zero double + dd ReturnIndefinite ;zero zero + dd DivSpclDest ;zero special + dd DivSpclSource ;special single + dd DivSpclSource ;special double + dd DivSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd ReturnIndefinite ;Two infinities + +;dest = source / dest +tFdivrDisp label dword ;Source (reg) Dest (*[di]) + dd DivrSingle ;single single + dd DivrDouble ;single double + dd DivideByZero ;single zero + dd DivrSpclDest ;single special + dd DivrSingle ;double single + dd DivrDouble ;double double + dd DivideByZero ;double zero + dd DivrSpclDest ;double special + dd XorSourceSign ;zero single + dd XorSourceSign ;zero double + dd ReturnIndefinite ;zero zero + dd DivrSpclDest ;zero special + dd DivrSpclSource ;special single + dd DivrSpclSource ;special double + dd DivrSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd ReturnIndefinite ;Two infinities + + +EM_ENTRY eFIDIV16 +eFIDIV16: + push offset DivSetResult + jmp Load16Int ;Returns to DivSetResult + +EM_ENTRY eFIDIVR16 +eFIDIVR16: + push offset DivrSetResult + jmp Load16Int + +EM_ENTRY eFIDIV32 +eFIDIV32: + push offset DivSetResult + jmp Load32Int + +EM_ENTRY eFIDIVR32 +eFIDIVR32: + push offset DivrSetResult + jmp Load32Int + +EM_ENTRY eFDIV32 +eFDIV32: + push offset DivSetResult + jmp Load32Real ;Returns to DivSetResult + +EM_ENTRY eFDIVR32 +eFDIVR32: + push offset DivrSetResult ;Returns to DivrSetResult + jmp Load32Real + +EM_ENTRY eFDIV64 +eFDIV64: + push offset DivSetResult + jmp Load64Real ;Returns to DivSetResult + +EM_ENTRY eFDIVR64 +eFDIVR64: + push offset DivrSetResult + jmp Load64Real ;Returns to DivrSetResult + + +EM_ENTRY eFDIVRPreg +eFDIVRPreg: + push offset PopWhenDone + +EM_ENTRY eFDIVRreg +eFDIVRreg: + xchg esi,edi + +EM_ENTRY eFDIVRtop +eFDIVRtop: + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo +DivrSetResult: +;cl has tag of dividend + mov ebp,offset tFdivrDisp + mov EMSEG:[Result],edi ;Save result pointer + mov ah,cl + mov al,EMSEG:[edi].bTag + and ah,not 1 ;Ignore single vs. double on dividend + cmp ax,1 +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + jz DivrDouble ;Divisor was double + ja TwoOpResultSet +;.erre DivrSingle eq $ ;Fall into DivrSingle + +;********* +DivrSingle: +;********* +;Computes op1/op2 +;Op1 is double, op2 is single (low 32 bits are zero) + mov edx,ebx + mov eax,esi ;Mantissa in edx:eax + mov ebx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManHi + jmp DivSingleReg + + +SDivBigUnderflow: +;Overflow flag set could only occur with denormals (true exp < -32768) + or EMSEG:[CURerr],Underflow + test EMSEG:[CWmask],Underflow ;Is exception masked? + jnz UnderflowZero ;Yes, return zero (in emfmul.asm) + add ecx,Underbias shl 16 ;Fix up exponent + jmp ContSdiv ;Continue with multiply + + +EM_ENTRY eFDIVPreg +eFDIVPreg: + push offset PopWhenDone + +EM_ENTRY eFDIVreg +eFDIVreg: + xchg esi,edi + +EM_ENTRY eFDIVtop +eFDIVtop: + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo +DivSetResult: +;cl has tag of divisor + mov ebp,offset tFdivDisp + mov EMSEG:[Result],edi ;Save result pointer + mov al,cl + mov ah,EMSEG:[edi].bTag + and ah,not 1 ;Ignore single vs. double on dividend + cmp ax,1 +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + jz DivDouble ;Divisor was double + ja TwoOpResultSet +;.erre DivSingle eq $ ;Fall into DivSingle + +;********* +DivSingle: +;********* +;Computes op2/op1 +;Op2 is double, op1 is single (low 32 bits are zero) + xchg edi,ebx ;Mantissa in edi, op2 ptr to ebx + xchg ebx,ecx ;ExpSgn to ebx, op2 ptr to ecx + mov edx,EMSEG:[ecx].lManHi + mov eax,EMSEG:[ecx].lManLo + mov ecx,EMSEG:[ecx].ExpSgn ;Op2 loaded + +DivSingleReg: +;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7 +;divisor mantissa in edi, exponent in high ebx, sign in bh bit 7 + + xor ch,bh ;Compute result sign + xor bx,bx ;Clear out sign and tag + sub ecx,1 shl 16 ;Exponent adjustment needed + sub ecx,ebx ;Compute result exponent +.erre TexpBias eq 0 ;Exponents not biased + jo SDivBigUnderflow ;Dividing denormal by large number +ContSdiv: + +;If dividend >= divisor, the DIV instruction will overflow. Check for +;this condition and shift the dividend right one bit if necessary. +; +;In previous versions of this algorithm for 24-bit and 53-bit mantissas, +;this shift was always performed without a test. This meant that a 1-bit +;normalization might be required at the end. This worked fine because +;32 or 64 bits were calculated, so extra precision was available for +;normalization. However, this version needs all 64 bits that are calculated, +;so we can't afford a normalization shift at the end. This test tells us +;up front how to align so we'll be normalized. + xor ebx,ebx ;Extend dividend + cmp edi,edx ;Will DIV overflow? + ja DoSdiv ;No, we're safe + shrd ebx,eax,1 + shrd eax,edx,1 + shr edx,1 + add ecx,1 shl 16 ;Bump exponent to account for shift +DoSdiv: + div edi + xchg ebx,eax ;Save quotient in ebx, extend remainder + div edi + mov esi,eax +;We have a 64-bit quotient in ebx:esi. Now compare remainder*2 with divisor +;to compute round and sticky bits. + mov eax,-1 ;Set round and sticky bits + shl edx,1 ;Double remainder + jc RoundJmp ;If too big, round & sticky set + cmp edx,edi ;Is remainder*2 > divisor? + ja RoundJmp + +;Observe, oh wondering one, how you can assume the result of this last +;compare is not equality. Use the following notation: n=numerator, +;d=denominator,q=quotient,r=remainder,b=base(2^64 here). If +;initially we had n < d then there was no shift and we will find q and r +;so that q*d+r=n*b, if initially we had n >= d then there was a shift and +;we will find q and r so that q*d+r=n*b/2. If we have equality here +;then r=d/2 ==> n={possibly 2*}(2*q+1)*d/(2*b), since this can only +;be integral if d is a multiple of b, but by definition b/2 <= d < b, we +;have a contradiction. Equality is thus impossible at this point. + + cmp edx,1 ;Check for zero remainder + sbb eax,-2 ;eax==0 if CY, ==1 if NC (was -1) +RoundJmp: + jmp EMSEG:[RoundMode] + +;******************************************************************************* + +DDivBigUnderflow: +;Overflow flag set could only occur with denormals (true exp < -32768) + or EMSEG:[CURerr],Underflow + test EMSEG:[CWmask],Underflow ;Is exception masked? + jnz UnderflowZero ;Yes, return zero (in emfmul.asm) + add ecx,Underbias shl 16 ;Fix up exponent + jmp ContDdiv ;Continue with multiply + +DivrDoubleSetFlag: +;Special entry point used by FPATAN to set bit 6 of flag dword pushed +;on stack before call. + or byte ptr [esp+4],40H +;********* +DivrDouble: +;********* +;Computes op1/op2 + mov edx,ebx + mov eax,esi ;Mantissa in edx:eax + mov ebx,EMSEG:[edi].ExpSgn + mov esi,EMSEG:[edi].lManHi + mov edi,EMSEG:[edi].lManLo + jmp short DivDoubleReg + +HighHalfEqual: +;edx:eax:ebp = dividend +;esi:edi = divisor +;ecx = exponent and sign of result +; +;High half of dividend is equal to high half of divisor. This will cause +;the DIV instruction to overflow. If whole dividend >= whole divisor, then +;we just shift the dividend right 1 bit. + cmp eax,edi ;Is dividend >= divisor? + jae ShiftDividend ;Yes, divide it by two +;DIV instruction would overflow, so skip it and calculate the effective +;result. Assume a quotient of 2^32-1 and calculate the remainder. See +;detailed comments under MaxQuo below--this is a copy of that code. + push ecx ;Save exp. and sign + mov ebx,-1 ;Max quotient digit + sub eax,edi ;Calculate correct remainder +;Currently edx == esi, but the next instruction ensures that is no longer +;true, since eax != 0. This will allow us to skip the MaxQuo check at +;DivFirstDigit. + add edx,eax ;Should set CY if quotient fit + mov eax,edi ;ecx:eax has new remainder + jc ComputeSecond ;Remainder was positive +;Quotient doesn't fit. Note that we can no longer ensure that edx != esi +;after making a correction. + mov ecx,edx ;Need remainder in ecx:eax + jmp DivCorrect1 + +;********* +DivDouble: +;********* +;Computes op2/op1 + mov eax,edi ;Move op2 pointer + mov edi,esi + mov esi,ebx ;Mantissa in esi:edi + mov ebx,ecx ;ExpSgn to ebx + mov ecx,EMSEG:[eax].ExpSgn ;Op2 loaded + mov edx,EMSEG:[eax].lManHi + mov eax,EMSEG:[eax].lManLo + +DivDoubleReg: +;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7 +;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7 + + xor ch,bh ;Compute result sign + xor bx,bx ;Clear out sign and tag + sub ecx,1 shl 16 ;Exponent adjustment needed + sub ecx,ebx ;Compute result exponent +.erre TexpBias eq 0 ;Exponents not biased + jo DDivBigUnderflow ;Dividing denormal by large number +ContDdiv: + +;If dividend >= divisor, we must shift the dividend right one bit. +;This will ensure the result is normalized. +; +;In previous versions of this algorithm for 24-bit and 53-bit mantissas, +;this shift was always performed without a test. This meant that a 1-bit +;normalization might be required at the end. This worked fine because +;32 or 64 bits were calculated, so extra precision was available for +;normalization. However, this version needs all 64 bits that are calculated, +;so we can't afford a normalization shift at the end. This test tells us +;up front how to align so we'll be normalized. + xor ebp,ebp ;Extend dividend + cmp esi,edx ;Dividend > divisor + ja DoDdiv + jz HighHalfEqual ;Go compare low halves +ShiftDividend: + shrd ebp,eax,1 + shrd eax,edx,1 + shr edx,1 + add ecx,1 shl 16 ;Bump exponent to account for shift +DoDdiv: + push ecx ;Save exp. and sign + +;edx:eax:ebp = dividend +;esi:edi = divisor +; +;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits": +;Guess a quotient digit by dividing two MSDs of dividend by the MSD of +;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then +;this guess will be no more than 2 larger than the correct value of that +;quotient digit (and never smaller). Divisor meets magnitude condition +;because it's normalized. + + div esi ;Guess first quotient "digit" + +;Check out our guess. +;Currently, remainder in edx = dividend - (quotient * high half divisor). +;The definition of remainder is dividend - (quotient * all divisor). So +;if we subtract (quotient * low half divisor) from edx, we'll get +;the true remainder. If it's negative, our guess was too big. + + mov ebx,eax ;Save quotient + mov ecx,edx ;Save remainder + mul edi ;Quotient * low half divisor + sub ebp,eax ;Subtract from dividend extension + sbb ecx,edx ;Subtract from remainder + mov eax,ebp ;Low remainder to eax + jnc DivFirstDigit ;Was quotient OK? +DivCorrect1: + dec ebx ;Quotient was too big + add eax,edi ;Add divisor back into remainder + adc ecx,esi + jnc DivCorrect1 ;Repeat if quotient is still too big +DivFirstDigit: + cmp ecx,esi ;Would DIV instruction overflow? + jae short MaxQuo ;Yes, figure alternate quotient + mov edx,ecx ;Remainder back to edx:eax + +;Compute 2nd quotient "digit" + +ComputeSecond: + div esi ;Guess 2nd quotient "digit" + mov ebp,eax ;Save quotient + mov ecx,edx ;Save remainder + mul edi ;Quotient * low half divisor + neg eax ;Subtract from dividend extended with 0 + sbb ecx,edx ;Subtract from remainder + jnc DivSecondDigit ;Was quotient OK? +DivCorrect2: + dec ebp ;Quotient was too big + add eax,edi ;Add divisor back into remainder + adc ecx,esi + jnc DivCorrect2 ;Repeat if quotient is still too big +DivSecondDigit: +;ebx:ebp = quotient +;ecx:eax = remainder +;esi:edi = divisor +;Now compare remainder*2 with divisor to compute round and sticky bits. + mov edx,-1 ;Set round and sticky bits + shld ecx,eax,1 ;Double remainder + jc DDivEnd ;If too big, round & sticky set + shl eax,1 + sub edi,eax + sbb esi,ecx ;Subtract remainder*2 from divisor + jb DDivEnd ;If <0, use round & sticky bits set + +;Observe, oh wondering one, how you can assume the result of this last +;compare is not equality. Use the following notation: n=numerator, +;d=denominator,q=quotient,r=remainder,b=base(2^64 here). If +;initially we had n < d then there was no shift and we will find q and r +;so that q*d+r=n*b, if initially we had n >= d then there was a shift and +;we will find q and r so that q*d+r=n*b/2. If we have equality here +;then r=d/2 ==> n={possibly 2*}(2*q+1)*d/(2*b), since this can only +;be integral if d is a multiple of b, but by definition b/2 <= d < b, we +;have a contradiction. Equality is thus impossible at this point. + +;No round bit, but set sticky bit if remainder != 0. + or eax,ecx ;Is remainder zero? + add eax,-1 ;Set CY if non-zero + adc edx,1 ;edx==0 if NC, ==1 if CY (was -1) +DDivEnd: + mov esi,ebp ;Result in ebx:esi + mov eax,edx ;Round/sticky bits to eax + pop ecx ;Recover sign/exponent + jmp EMSEG:[RoundMode] + + +MaxQuo: +;ebx = first quotient "digit" +;ecx:eax = remainder +;esi:edi = divisor +;On exit, ebp = second quotient "digit" +; +;Come here if divide instruction would overflow. This must mean that ecx == esi, +;i.e., the high halves of the dividend and divisor are equal. Assume a result +;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) ) +; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend +;and divisor are equal, dividend - divisor * 2^32 can be computed by +;subtracting only the low halves. When adding divisor (in esi) to this, note +;that ecx == esi, and we want the result in ecx anyway. +; +;Note also that since the dividend is a previous remainder, the +;dividend - divisor * 2^32 calculation must always be negative. Thus the +;addition of divisor back to it should generate a carry if it goes positive. + + mov ebp,-1 ;Max quotient digit + sub eax,edi ;Calculate correct remainder + add ecx,eax ;Should set CY if quotient fit + mov eax,edi ;ecx:eax has new remainder + jc DivSecondDigit ;Remainder was positive + jmp DivCorrect2 diff --git a/private/ntos/dll/i386/emfinit.asm b/private/ntos/dll/i386/emfinit.asm new file mode 100644 index 000000000..baf0f0598 --- /dev/null +++ b/private/ntos/dll/i386/emfinit.asm @@ -0,0 +1,46 @@ + subttl emfinit.asm - Emulator initialization and FINIT instruction + page +;******************************************************************************* +;emfinit.asm - Emulator initialization and FINIT instruction +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +EM_ENTRY eEmulatorInit +EmulatorInit: +EM_ENTRY eFINIT +eFINIT: + mov esi,BEGstk + mov EMSEG:[CURstk],INITstk + mov ecx,Numlev + xor eax,eax + +EmInitLoop: + mov EMSEG:[esi].ExpSgn,bTAG_EMPTY ;Exponent and sign are zero + mov EMSEG:[esi].lManHi,eax + mov EMSEG:[esi].lManLo,eax + + add esi, Reg87Len + loop EmInitLoop + + mov EMSEG:[StatusWord],ax ; clear status word + mov [esp+4].OldStatus,ax ; clear saved status word. + mov EMSEG:[PrevCodeOff],eax + mov EMSEG:[PrevDataOff],eax + mov EMSEG:[LongControlWord],InitControlWord + mov eax,offset Round64near + mov EMSEG:[RoundMode],eax ;Address of round routine + mov EMSEG:[TransRound],eax ;Address of round routine + mov EMSEG:[SavedRoundMode],eax + mov EMSEG:[ZeroVector],offset SaveResult + mov EMSEG:[Einstall], 1 + ret diff --git a/private/ntos/dll/i386/emfmisc.asm b/private/ntos/dll/i386/emfmisc.asm new file mode 100644 index 000000000..6adc08dc3 --- /dev/null +++ b/private/ntos/dll/i386/emfmisc.asm @@ -0,0 +1,81 @@ + subttl emfmisc.asm - FABS, FCHS, FFREE, FXCH + page +;******************************************************************************* +;emfmisc.asm - FABS, FCHS, FFREE, FXCH +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FABS, FCHS, FFREE, FXCH instructions +;Inputs: +; edi = [CURstk] +; esi = pointer to st(i) from instruction field +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;****** +EM_ENTRY eFABS +eFABS: +;****** + cmp EMSEG:[edi].bTag,bTAG_EMPTY + jz StackError ;in emarith.asm + mov EMSEG:[edi].bSgn,0 ;Turn sign bit off + ret + +;****** +EM_ENTRY eFCHS +eFCHS: +;****** + cmp EMSEG:[edi].bTag,bTAG_EMPTY + jz StackError ;in emarith.asm + not EMSEG:[edi].bSgn ;Flip the sign + ret + +;****** +EM_ENTRY eFFREE +eFFREE: +;****** + mov EMSEG:[esi].bTag,bTAG_EMPTY + ret + +;****** +EM_ENTRY eFXCH +eFXCH: +;****** + cmp EMSEG:[esi].bTag,bTAG_EMPTY + jz XchDestEmpty +XchgChkSrc: + cmp EMSEG:[edi].bTag,bTAG_EMPTY + jz XchSrcEmpty +DoSwap: +;Swap [esi] with [edi] + mov eax,EMSEG:[edi] + xchg eax,EMSEG:[esi] + mov EMSEG:[edi],eax + mov eax,EMSEG:[edi+4] + xchg eax,EMSEG:[esi+4] + mov EMSEG:[edi+4],eax + mov eax,EMSEG:[edi+8] + xchg eax,EMSEG:[esi+8] + mov EMSEG:[edi+8],eax + ret + +XchDestEmpty: + call ReturnIndefinite ;in emarith.asm - ZF set if unmasked + jnz XchgChkSrc ;Continue if masked + ret + +XchSrcEmpty: + xchg edi,esi ;pass pointer in esi + call ReturnIndefinite ;in emarith.asm - ZF set if unmasked + xchg edi,esi + jnz DoSwap ;Continue if masked + ret diff --git a/private/ntos/dll/i386/emfmul.asm b/private/ntos/dll/i386/emfmul.asm new file mode 100644 index 000000000..2a5fcca9f --- /dev/null +++ b/private/ntos/dll/i386/emfmul.asm @@ -0,0 +1,238 @@ + subttl emfmul.asm - Multiplication + page +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emfmul.asm - long double multiply +; by Tim Paterson +; +;Purpose: +; Long double multiplication. +;Inputs: +; ebx:esi = op1 mantissa +; ecx = op1 sign in bit 15, exponent in high half +; edi = pointer to op2 and result location +; [Result] = edi +; +; Exponents are unbiased. Denormals have been normalized using +; this expanded exponent range. Neither operand is allowed to be zero. +;Outputs: +; Jumps to [RoundMode] to round and store result. +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + +;Dispatch table for multiply +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: + +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;Any special case routines not found in this file are in emarith.asm + +tFmulDisp label dword ;Source (reg) Dest (*[di]) + dd MulSingle ;single single + dd MulDouble ;single double + dd XorDestSign ;single zero + dd MulSpclDest ;single special + dd MulDouble ;double single + dd MulDouble ;double double + dd XorDestSign ;double zero + dd MulSpclDest ;double special + dd XorSourceSign ;zero single + dd XorSourceSign ;zero double + dd XorDestSign ;zero zero + dd MulSpclDest ;zero special + dd MulSpclSource ;special single + dd MulSpclSource ;special double + dd MulSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd XorDestSign ;Two infinities + + +EM_ENTRY eFIMUL16 +eFIMUL16: + push offset MulSetResult + jmp Load16Int ;Returns to MulSetResult + +EM_ENTRY eFIMUL32 +eFIMUL32: + push offset MulSetResult + jmp Load32Int ;Returns to MulSetResult + +EM_ENTRY eFMUL32 +eFMUL32: + push offset MulSetResult + jmp Load32Real ;Returns to MulSetResult + +EM_ENTRY eFMUL64 +eFMUL64: + push offset MulSetResult + jmp Load64Real ;Returns to MulSetResult + +EM_ENTRY eFMULPreg +eFMULPreg: + push offset PopWhenDone + +EM_ENTRY eFMULreg +eFMULreg: + xchg esi,edi + +EM_ENTRY eFMULtop +eFMULtop: + mov ecx,EMSEG:[esi].ExpSgn + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo +MulSetResult: + mov ebp,offset tFmulDisp + mov EMSEG:[Result],edi ;Save result pointer + mov al,cl + or al,EMSEG:[edi].bTag + cmp al,bTAG_VALID +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + jz MulDouble + ja TwoOpResultSet +;.erre MulSingle eq $ ;Fall into MulSingle + + +;********* +MulSingle: +;********* + + mov edx,EMSEG:[edi].ExpSgn + mov eax,EMSEG:[edi].lManHi + +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;op2 high mantissa in eax, exponent in high edx, sign in dh bit 7 + + xor ch,dh ;Compute result sign + xor dx,dx ;Clear out sign and tag + add ecx,edx ;Result exponent +.erre TexpBias eq 0 ;Exponents not biased + jo SMulBigUnderflow ;Multiplying two denormals +ContSmul: + +;Value in ecx is correct exponent if result is not normalized. +;If result comes out normalized, 1 will be added. + + mul ebx ;Compute product + mov ebx,edx + mov esi,eax + xor eax,eax ;Extend with zero + +;Result in ebx:esi:eax +;ecx = exponent minus one in high half, sign in ch + or ebx,ebx ;Check for normalization + jns ShiftOneBit ;In emfadd.asm + add ecx,1 shl 16 ;Adjust exponent + jmp EMSEG:[RoundMode] + +SMulBigUnderflow: + or EMSEG:[CURerr],Underflow + add ecx,Underbias shl 16 ;Fix up exponent + test EMSEG:[CWmask],Underflow ;Is exception masked? + jz ContSmul ;No, continue with multiply +UnderflowZero: + or EMSEG:[CURerr],Precision +SignedZero: + and ecx,bSign shl 8 ;Preserve sign bit + xor ebx,ebx + mov esi,ebx + mov cl,bTAG_ZERO + jmp EMSEG:[ZeroVector] + +;******************************************************************************* + +DMulBigUnderflow: +;Overflow flag set could only occur with denormals (true exp < -32768) + or EMSEG:[CURerr],Underflow + test EMSEG:[CWmask],Underflow ;Is exception masked? + jnz UnderflowZero ;Yes, return zero + add ecx,Underbias shl 16 ;Fix up exponent + jmp ContDmul ;Continue with multiply + +PolyMulToZero: + ret ;Return the zero in registers + +PolyMulDouble: +;This entry point is used by polynomial evaluator. +;It checks the operand in registers for zero. + cmp cl,bTAG_ZERO ;Adding to zero? + jz PolyMulToZero + +;********* +MulDouble: +;********* + + mov eax,EMSEG:[edi].ExpSgn + mov edx,EMSEG:[edi].lManHi + mov edi,EMSEG:[edi].lManLo + +MulDoubleReg: ;Entry point used by transcendentals +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7 + + xor ch,ah ;Compute result sign + xor ax,ax ;Clear out sign and tag + add ecx,eax ;Result exponent +.erre TexpBias eq 0 ;Exponents not biased + jo DMulBigUnderflow ;Multiplying two denormals +ContDmul: + +;Value in ecx is correct exponent if result is not normalized. +;If result comes out normalized, 1 will be added. + + mov ebp,edx ;edx is used by MUL instruction + +;Generate and sum partial products, from least to most significant + + mov eax,edi + mul esi ;Lowest partial product + add eax,-1 ;CY set IFF eax<>0 + sbb cl,cl ;Sticky bit: 0 if zero, -1 if nz + xchg edi,edx ;Save high result + +;First product: cl reflects low dword non-zero (sticky bit), edi has high dword + + mov eax,ebx + mul edx + add edi,eax + adc edx,0 ;Sum first results + xchg edx,esi ;High result to esi + +;Second product: accumulated in esi:edi:cl + + mov eax,ebp ;Next mult. to eax + mul edx + add edi,eax ;Sum low results + adc esi,edx ;Sum high results + mov eax,ebx + mov ebx,0 ;Preserve CY flag + adc ebx,ebx ;Keep carry out of high sum + +;Third product: accumulated in ebx:esi:edi:cl + + mul ebp + add esi,eax + adc ebx,edx + mov eax,edi + or al,cl ;Collapse sticky bits into eax + +;Result in ebx:esi:eax +;ecx = exponent minus one in high half, sign in ch +MulDivNorm: + or ebx,ebx ;Check for normalization + jns ShiftOneBit ;In emfadd.asm + add ecx,1 shl 16 ;Adjust exponent + jmp EMSEG:[RoundMode] diff --git a/private/ntos/dll/i386/emfprem.asm b/private/ntos/dll/i386/emfprem.asm new file mode 100644 index 000000000..3cb8670bb --- /dev/null +++ b/private/ntos/dll/i386/emfprem.asm @@ -0,0 +1,407 @@ + subttl emfprem.asm - FPREM and FPREM1 instructions + page +;******************************************************************************* +;emfprem.asm - FPREM and FPREM1 instructions +; by Tim Paterson +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Inputs: +; edi = [CURstk] +; ST(1) loaded into ebx:esi & ecx +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + +;Dispatch table for remainder +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: + +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;Any special case routines not found in this file are in emarith.asm + + ;Divisor Dividend +tFpremDisp label dword ;Source(ST(1)) Dest (ST(0)) + dd PremDouble ;single single + dd PremDouble ;single double + dd PremX ;single zero + dd PremSpclDest ;single special + dd PremDouble ;double single + dd PremDouble ;double double + dd PremX ;double zero + dd PremSpclDest ;double special + dd ReturnIndefinite ;zero single + dd ReturnIndefinite ;zero double + dd ReturnIndefinite ;zero zero + dd PremSpclDest ;zero special + dd PremSpclSource ;special single + dd PremSpclSource ;special double + dd PremSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd ReturnIndefinite ;Two infinites + + +PremSpclDone: + add sp,4 ;Clean off return address for normal + ret + +;*** +PremSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF ;Dividing infinity? + jz ReturnIndefinite ;Invalid operation if so + jmp SpclDest ;In emarith.asm + +;*** +PremSpclSource: + cmp cl,bTAG_INF ;Dividing by infinity? + jnz SpclSource ;in emarith.asm +PremX: +;Return Dest unchanged, quotient = 0 + mov EMSEG:[SWcc],0 + ret +;******************************************************************************* + +;Map quotient bits to condition codes + +Q0 equ C1 +Q1 equ C3 +Q2 equ C0 + +MapQuo label byte + db 0 + db Q0 + db Q1 + db Q1+Q0 + db Q2 + db Q2+Q0 + db Q2+Q1 + db Q2+Q1+Q0 + +Prem1Cont: + +;edx:eax = remainder, normalized +;ebx:esi = divisor +;ebp = quotient +;edi = exponent difference, zero or less +;ecx = 0 (positive sign) +; +;At this point, 0 <= remainder < divisor. However, for FPREM1 we need +; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether +;we choose + or - is dependent on whichever gives us an even quotient +;(the usual IEEE rounding rule). Quotient must be incremented if we +;use negative remainder. + + cmp edi,-1 + jl PremCont ;Remainder < divisor/2 + jg NegRemainExp0 ;Remainder > divisor/2 +;Exponent is -1 + cmp edx,ebx + jl PremCont ;Remainder < divisor/2 + jg NegRemain ;Remainder > divisor/2 + cmp eax,esi + jl PremCont ;Remainder < divisor/2 + jg NegRemain ;Remainder > divisor/2 +;Remainder = divisor/2. Ensure quotient is even + test ebp,1 ;Even? + jz PremCont +NegRemain: +;Theoretically we subtract divisor from remainder once more, leaving us +;with a negative remainder. But since we use sign/magnitude representation, +;we want the abs() of that with sign bit set--so subtract remainder from +;(larger) divisor. Note that exponent difference is -1, so we must align +;binary points first. + add esi,esi + adc ebx,ebx ;Double divisor to align binary points +NegRemainExp0: + sub esi,eax + sbb ebx,edx ;Subtract remainder + mov eax,esi + mov edx,ebx ;Result in edx:eax + mov ch,bSign ;Flip sign of remainder + inc ebp ;Increase quotient +;Must normalize result of subtraction + bsr ecx,edx ;Look for 1 bit + jnz @F + sub edi,32 + xchg edx,eax ;Shift left 32 bits + bsr ecx,edx +@@: + lea edi,[edi+ecx-31] ;Fix up exponent for normalization + not cl + shld edx,eax,cl + shl eax,cl + mov ch,bSign ;Flip sign of remainder + +PremCont: +;edx:eax = remainder, normalized +;ebp = quotient +;edi = exponent difference, zero or less +;ch = sign + or eax,eax ;Low bits zero? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + mov esi,EMSEG:[CURstk] + mov ebx,esi + NextStackElem ebx,Prem + add di,EMSEG:[ebx].wExp ;Compute result exponent + cmp di,IexpMin-IexpBias + jle PremUnderflow +SavePremResult: + mov EMSEG:[esi].lManLo,eax + xor EMSEG:[esi].bSgn,ch + mov EMSEG:[esi].lManHi,edx + and ebp,7 ;Keep last 3 bits of quotient only + ; and give write buffers a break + mov EMSEG:[esi].wExp,di + mov EMSEG:[esi].bTag,cl + mov al,MapQuo[ebp] ;Get cond. codes for this quotient + mov EMSEG:[SWcc],al + ret + + NextStackWrap ebx,Prem ;Tied to NextStackElem above + +PremUnderflow: + test EMSEG:[CWmask],Underflow ;Is exception unmasked? + jz UnmaskedPremUnder + mov cl,bTAG_DEN + jmp SavePremResult + +UnmaskedPremUnder: + add edi,UnderBias ;Additional exp. bias for unmasked resp. + or EMSEG:[CURerr],Underflow + jmp SavePremResult + +;******************************************************************************* + +PremDouble: +;edi = [CURstk] +;ebx:esi = ST(1) mantissa, ecx = ExpSgn + + add sp,4 ;Clean off return address for special + mov eax,EMSEG:[edi].lManLo + mov edx,EMSEG:[edi].lManHi + movsx edi,EMSEG:[edi].wExp + xor ebp,ebp ;Quotient, in case we skip stage 1 + sar ecx,16 ;Bring exponent down + sub edi,ecx ;Get exponent difference + jl ExitPremLoop ;If dividend is smaller, return it. + +;FPREM is performed in two stages. The first stage is used only if the +;exponent difference is greater than 31. It reduces the exponent difference +;by 32, and repeats until the difference is less than 32. Note that +;unlike the hardware FPREM instruction, we are not limited to reducing +;the exponent by only 63--we just keep looping until it's done. +; +;The second stage performs ordinary 1-bit-at-a-time long division. +;It stops when the exponent difference is zero, meaning we have an +;integer quotient and the final remainder. +; +;edx:eax = dividend +;ebx:esi = divisor +;edi = exponent difference +;ebp = 0 (initial quotient) + + cmp edi,32 ;Do we need to do stage 1? + jl FitDivisor ;No, start stage 2 + +;FPREM stage 1 +; +;Exponent difference is at least 32. Use 32-bit division to compute +;quotient and exact remainder, reducing exponent difference by 32. + +;DIV instruction will overflow if dividend >= divisor. In this case, +;subtract divisor from dividend to ensure no overflow. This will change +;the quotient, but that doesn't matter because we only need the last +;3 bits of the quotient (and we're about to calculate 32 quotient bits). +;This subtraction will not affect the remainder. + + sub eax,esi + sbb edx,ebx + jnc FpremReduce32 ;Was dividend big? + add eax,esi ;Restore dividend, it was smaller + adc edx,ebx + +;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits": +;Guess a quotient digit by dividing two MSDs of dividend by the MSD of +;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then +;this guess will be no more than 2 larger than the correct value of that +;quotient digit (and never smaller). Divisor meets magnitude condition +;because it's normalized. +; +;This loop typically takes 117 clocks. + +;edx:eax = dividend +;ebx:esi = divisor +;edi = exponent difference +;ebp = quotient (zero) + +FpremReduce32: +;We know that dividend < divisor, but it is still possible that +;high dividend == high divisor, which will cause the DIV instruction +;to overflow. + cmp edx,ebx ;Will DIV instruction overflow? + jae PremOvfl + div ebx ;Guess a quotient "digit" + +;Currently, remainder in edx = dividend - (quotient * high half divisor). +;The definition of remainder is dividend - (quotient * all divisor). So +;if we subtract (quotient * low half divisor) from edx, we'll get +;the true remainder. If it's negative, our guess was too big. + + mov ebp,eax ;Save quotient + mov ecx,edx ;Save remainder + mul esi ;Quotient * low half divisor + neg eax ;Subtract from dividend extended with 0 + sbb ecx,edx ;Subtract from remainder + mov edx,ecx ;Remainder back to edx:eax + jnc HavPremQuo ;Was quotient OK? +FpremCorrect: + dec ebp ;Quotient was too big + add eax,esi ;Add divisor back into remainder + adc edx,ebx + jnc FpremCorrect ;Repeat if quotient is still too big +HavPremQuo: + sub edi,32 ;Exponent reduced + cmp edi,32 ;Exponent difference within 31? + jl PremNormalize ;Do it a bit a time + or edx,edx ;Check for zero remainder + jnz FpremReduce32 + or eax,eax ;Remainder 0? + jz ExactPrem + xchg edx,eax ;Shift left 32 bits + sub edi,32 ;Another 32 bits reduced + cmp edi,32 + jge FpremReduce32 + xor ebp,ebp ;No quotient bits are valid + jmp PremNormalize + +PremOvfl: +;edx:eax = dividend +;ebx:esi = divisor +;On exit, ebp = second quotient "digit" +; +;Come here if divide instruction would overflow. This must mean that edx == ebx, +;i.e., the high halves of the dividend and divisor are equal. Assume a result +;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) ) +; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend +;and divisor are equal, dividend - divisor * 2^32 can be computed by +;subtracting only the low halves. When adding divisor (in ebx) to this, note +;that edx == ebx, and we want the result in edx anyway. +; +;Note also that since dividend < divisor, the +;dividend - divisor * 2^32 calculation must always be negative. Thus the +;addition of divisor back to it should generate a carry if it goes positive. + + mov ebp,-1 ;Max quotient digit + sub eax,esi ;Calculate correct remainder + add edx,eax ;Should set CY if quotient fit + mov eax,esi ;edx:eax has new remainder + jc HavPremQuo ;Remainder was positive + jmp FpremCorrect + +ExactPrem: +;eax = 0 + mov esi,EMSEG:[CURstk] + mov EMSEG:[esi].lManLo,eax + mov EMSEG:[esi].lManHi,eax + add sp,4 ;Clean off first return address + mov EMSEG:[esi].wExp,ax + mov EMSEG:[esi].bTag,bTAG_ZERO + ret + + +;FPREM stage 2 +; +;Exponent difference is less than 32. Use restoring long division to +;compute quotient bits until exponent difference is zero. Note that we +;often get more than one bit/loop: BSR is used to scan off leading +;zeros each time around. Since the divisor is normalized, we can +;instantly compute a zero quotient bit for each leading zero bit. +; +;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks +;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions +;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop) +;instead of the 112 required by stage 1! + +FpremLoop: +;edx:eax = dividend (remainder) minus divisor +;ebx:esi = divisor +;ebp = quotient +;edi = exponent difference, less than 32 +; +;If R is current remainder and d is divisor, then we have edx:eax = R - d, +;which is negative. We want 2*R - d, which is positive. +;2*R - d = 2*(R - d) + d. + add eax,eax ;2*(R - d) + adc edx,edx + add eax,esi ;2*(R-d) + d = 2*R - d + adc edx,ebx + add ebp,ebp ;Double quotient too + dec edi ;Decrement exponent difference +DivisorFit: + inc ebp ;Count one in quotient +PremNormalize: + bsr ecx,edx ;Find first 1 bit + jz PremHighZero + not cl + and cl,1FH ;Convert bit no. to shift count + shld edx,eax,cl ;Normalize + shl eax,cl + sub edi,ecx ;Reduce exponent difference + jl PremTooFar + shl ebp,cl ;Shift quotient +FitDivisor: +;Dividend could be larger or smaller than divisor + sub eax,esi + sbb edx,ebx + jnc DivisorFit +;Couldn't subtract divisor from dividend. + or edi,edi ;Is exponent difference zero or less? + jg FpremLoop + add eax,esi ;Restore dividend + adc edx,ebx + xor ecx,ecx ;Sign is positive + ret + +PremTooFar: +;Exponent difference in edi went negative when reduced by shift count in ecx. +;We need a quotient corresponding to exponent difference of zero. + add ecx,edi ;Restore exponent difference + shl ebp,cl ;Fix up quotient +ExitPremLoop: +;edx:eax = remainder, normalized +;ebp = quotient +;edi = exponent difference, zero or less + xor ecx,ecx ;Sign is positive + ret + +PremHighZero: +;High half of remainder is all zero, so we've reduced exponent difference +;by 32 bits and overshot. We need a quotient corresponding to exponent +;difference of zero, so we just shift it by the original difference. Then +;we need to normalize the low half remainder. + mov ecx,edi + shl ebp,cl ;Fix up quotient + bsr ecx,eax + jz ExactPrem + lea edi,[edi+ecx-63] ;Fix up exponent for normalization + xchg eax,edx ;Shift by 32 bits + not cl + shl edx,cl ;Normalize remainder + xor ecx,ecx ;Sign is positive + ret diff --git a/private/ntos/dll/i386/emfsqrt.asm b/private/ntos/dll/i386/emfsqrt.asm new file mode 100644 index 000000000..cc36f7b0a --- /dev/null +++ b/private/ntos/dll/i386/emfsqrt.asm @@ -0,0 +1,267 @@ + subttl emfsqrt.asm - FSQRT instruction + page +;******************************************************************************* +;emfsqrt.asm - FSQRT instruction +; by Tim Paterson +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Inputs: +; edi = [CURstk] +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;A linear approximation of the square root function is used to get the +;intial value for Newton-Raphson iteration. This approximation gives +;nearly 5-bit accuracy over the required input interval, [1,4). The +;equation for the linear approximation of y = sqrt(x) is y = mx + b, +;where m is the slope (named SQRT_COEF) and b is the y-intercept (named +;SQRT_INTERCEPT). +; +;(The values for m and b were computed with Excel Solver in two passes: +;the first pass computed them full precision, minimizing absolute error; +;the second computed only b after m was rounded to an 8-bit value.) +; +;The resulting values have the following maximum error: +; +;inp. value --> 1 2.18972 3.82505 +;---------------------------------------------------------------- +;abs. err., full prec. 0.04544 -0.03233 0.04423 +;abs. err., truncated 0.04544 -0.04546 0.04423 +; +;The three input values shown represent the left end point, the maximum +;error (derivative of absolute error == 0), and the right end point. +;The right end point is not 4 because the approximation reaches 2.000 +;at the value given--we abandon the linear approximation at that point +;and use that same value for all greater input values. This linear +;approximation is computed with 8-bit operations, so truncations can +;add a negative error. This increases maximum error only when it is +;already negative, as shown in the table. +; +;Each iteration of Newton-Raphson approximation more than doubles the +;number of bits of accuracy. Suppose the current guess is A, and it has +;an absolute error of e (i.e., A+e or A-e is the root). Then the absolute +;error after the next iteration is e^2/2A. This error is always positive. +;However, the divide instruction truncates, which introduces an error +;that is always negative. Sometimes a constant or rounding bit is added +;to balance the positive and negative errors. The maximum possible error +;is given in comments below for each iteration. (Note that when we compute +;the error from e^2/2A, A could be in the range 1 to 2--we use 1 to get +;max error.) Remember that the binary point is to the RIGHT of the MSB +;when looking at these error numbers. + + +;SQRT_INTERCEPT is used when the binary point is to the right of the MSB. +;Multiplying it by 64K would put the binary point to the left of the MSB, +;so it must be divided by two to be aligned. +SQRT_INTERCEPT equ 23185 ; 0.70755 * 65536 / 2 + +;SQRT_COEF would have the binary point to the left of the MSB if multiplied +;by 256. However, this would leave it with a leading zero, so we multiply +;it by two more to normalize it. +SQRT_COEF equ 173 ; 0.33789 * 256 * 2 + +SqrtSpcl: + cmp al,bTAG_DEN + jz SqrtDen + cmp al,bTAG_INF + jnz SpclDestNotDen +;Have infinity + or ah,ah ;Is it negative? + js ReturnIndefinite +SqrtRet: + ret + + +MaxStartRoot: +;The first iteration is calculated as (ax / bh) * 100H + bx. The first +;trial root in bx should be 10000H (which is too big). But it's very +;easy to calculate (ax / 100H) * 100H + 10000H = ax. + mov bx,ax + cmp ax,-1 ;Would subsequent DIV overflow? + jb FirstTrialRoot +;The reduced argument is so close to 4.0 that the 16-bit DIV instruction +;used in the next iteration would overflow. If the argument is 4-A +;then a guess of 2.0 is in error by approximately A/4. [This is not +;an upper bound. The error is a little by more than this by an +;addition with the magnitude of A^2. This is an insignificant amount +;when A is small.] This means that the first guess of 2.0 is quite +;accurate, and we'll use it to bypass some of the iteration steps. +;This will eliminate the DIV overflow by skipping the DIV. +; +;One iteration is performed by: (Arg/Guess + Guess)/2. When Guess = 2, +;this becomes (Arg/2 + 2)/2 = Arg/4 + 1. We get Arg/2 just by assuming +;the binary point is one bit further left; then a single right shift is +;needed to get Arg/4. By shifting in a 1 bit on the left, we account for +;adding 1 at the same time. [Note that if Arg = 4 - A, then Arg/4 + 1 +; = (4 - A)/4 + 1 = 1 - A/4 + 1 = 2 - A/4. In other words, we just +;subtract out exactly what we estimate our error to be, A/4.] +; +;Since the upper 16 bits are 0FFFFH, A <= 2^-14, so error <= 2^-16 = +; +0.00001526, -0. + mov ebx,esi ;Return root in ebx + sar ebx,1 ;Trial root = arg/2 + cmp esi,ebx ;Will 32-bit division overflow? + jb StartThirdIteration ;No, our 32-bit guess is good +;Argument is really, really close to 4.0: with an initial trial root of +;2.0, max absolute error is 2^-32 = +2.328E-10, -0. One trivial +;iteration will get us 65-bit accuracy, max abs. error = +2.71E-20, -0. + mov ebx,esi + mov eax,ecx ;65-bit root*2 in ebx:eax (MSB implied) + shl ecx,2 ;ecx = low half*4 + jmp RoundRoot + +SqrtDen: + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jnz SqrtRet ;If not, quit + +;****** +EM_ENTRY eFSQRT +eFSQRT: +;****** + mov eax,EMSEG:[edi].ExpSgn + cmp al,bTAG_ZERO + jz SqrtRet + ja SqrtSpcl + or ah,ah + js ReturnIndefinite + mov esi,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].lManLo + sar EMSEG:[edi].wExp,1 ;Divide exponent by two + mov edi,0 ;Extend mantissa + jc RootAligned ;If odd exponent, leave it normalized + shrd edi,ecx,1 + shrd ecx,esi,1 + shr esi,1 ;Denormalize, extending into edi +RootAligned: +;esi:ecx:edi has mantissa, 2 MSBs are left of binary point. Range is [1,4). + shld eax,esi,16 ;Get high word of mantissa + movzx ebx,ah ;High byte to bl +;UNDONE: MASM 6 bug!! +;UNDONE: SQRT_COEF (=0AEH) get sign extended!! + mov dx,SQRT_COEF ;UNDONE + imul bx,dx ;UNDONE +;UNDONE imul bx,SQRT_COEF ;Product in bx +;Multiply by SQRT_COEF causes binary point to shift left 1 bit. + add bx,SQRT_INTERCEPT ;5-bit approx. square root in bh + jc MaxStartRoot +;Max absolute error is +/- 0.04546 + div bh ;See how close we are + add bh,al ;quotient + divisor (always sets CY) +FirstTrialRoot: +;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead. + mov dl,1 ;Need bit set + shrd bx,dx,1 ;(quotient + divisor)/2 +;bx has 9-bit approx. square root, normalized +;Max absolute error is +0.001033, -0.003906 + movzx eax,si + shld edx,esi,16 ;dx:ax has high half mantissa + div bx ;Test our approximation + add ebx,eax ;quotient + divisor + shl ebx,15 ;Normalize (quotient + divisor)/2 +;ebx has 17-bit approx. square root, normalized +;Max absolute error is +0.000007629, -0.00001526 +;Add adjustment factor to center the error range at +/-0.00001144 + or bh,20H ;Add in 0.000003815 +StartThirdIteration: + mov edx,esi + mov eax,ecx + div ebx ;Test approximation + stc ;Set bit for rounding (= 2.328E-10) + adc ebx,eax ;quotient + divisor + round bit +;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead. + mov dl,1 ;Need bit set + shrd ebx,edx,1 ;(quotient + divisor)/2, rounded +;ebx has 32-bit approx. square root, normalized +;Max absolute error is +2.983E-10, -2.328E-10 + mov edx,esi ;Last time we need high half + mov eax,ecx + shld ecx,edi,2 ;ecx = low half*4, w/extension back in + div ebx ;Test approximation + xchg edi,eax ;Save 1st quotient, get extension + mov esi,eax + or esi,edx ;Any remainder? + jz HaveRoot ;Result is ebx:esi + div ebx ;edi:eax is 64-bit quotient + add ebx,edi ;quotient + divisor (always sets CY) +RoundRoot: + mov esi,eax ;Save low half root*2 + +;We have 65-bit root*2 in ebx:esi (eax==esi) (MSB is implied one). +;Max absolute error is +4.450E-20, -5.421E-20. This maximum error +;corresponds to just less than +/- 1 in the last (65th) bit. +; +;We have to determine if this error is positive or negative so +;we can tell if we rounded up or down (and set the status bit +;accordingly). This is done by squaring the root and comparing the +;that result with the input. +; +;Squaring the sample root requires summing partial products: +; lo*lo + lo*hi + hi*lo + hi*hi. lo*hi == hi*lo, so only one multiply +;is needed there. The low half of lo*lo isn't relevant, we know it +;is non-zero. Only the low few bits of hi*hi are needed, so we can use +;an 8-bit multiply there. Since the MSB is implied, we need to add in +;two 1*lo products (shifted up 64 bits). We only need bits 64 - 71 of +;the 130-bit product (the action happens near bit 65). What we're +;squaring is root*2, so the result is square*4. ecx already has arg*4. + + mul eax ;Low partial product of square + mov edi,edx ;Only high half counts + mov eax,ebx + mul esi ;Middle partial product of square + add eax,eax ;There are two of these + adc edx,edx + add edi,eax + adc edx,0 ;edx:edi = lo*lo + lo*hi + hi*lo + add edx,esi ;lo*implied msb + add edx,esi ;lo*implied msb again + mov al,bl + mul al ;hi*hi - only low 8 bits are valid + add al,dl ;Bits 64 - 71 of product + or al,1 ;Account for sticky bits 0 - 63 + sub cl,al ;Compare product with argument +;Sign flag set if product is larger. In this case, subtract 1 from root. + add cl,cl ;Set CY if sign is set +SubOneFromRoot: + sbb esi,0 ;Reduce root if product was too big + sbb ebx,0 +ShiftRoot: +;ebx:esi = root*2 +;Absolute error is in the range (0, -5.421E-20). This is equivalent to +;less than +1, -0 in last bit. Thus LSB is correct rounding bit as +;long as we set a sticky bit below it. +; +;Now divide root*2 by 2, preserving LSB as rounding bit and filling +;eax with 1's as sticky bits. +; +;Avoid RCR because it takes 9 clocks on 386. Use SHRD (3 clocks) instead. + mov eax,-1 + shrd eax,esi,1 ;Move round bit to MSB of eax + shrd esi,ebx,1 + shrd ebx,eax,1 ;Shift 1 into MSB of ebx +StoreRoot: + mov edi,EMSEG:[CURstk] + mov EMSEG:[Result],edi + mov ecx,EMSEG:[edi].ExpSgn +;mantissa in ebx:esi:eax, exponent in high ebx, sign in bh bit 7 + jmp EMSEG:[RoundMode] + +HaveRoot: +;esi = eax = edx = 0 + cmp edi,ebx ;Does quotient == divisor? + jz StoreRoot ;If so, we're done +;Quotient != divisor, so answer is not exact. Since remainder is zero, +;the division was exact. The only error in the result is e^2/2A, which +;is always positive. We need the error to be only negative so that +;the rounding routine can properly tell if it rounded up. + add ebx,edi ;quotient + divisor (always sets CY) + jmp SubOneFromRoot ;Reduce root to ensure negative error diff --git a/private/ntos/dll/i386/emftran.asm b/private/ntos/dll/i386/emftran.asm new file mode 100644 index 000000000..116c3a29f --- /dev/null +++ b/private/ntos/dll/i386/emftran.asm @@ -0,0 +1,1206 @@ + subttl emftran.asm - Transcendental instructions + page +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emftran.asm - Transcendental instructions +; by Tim Paterson +; +;Purpose: +; F2XM1, FPATAN, FYL2X, FYL2XP1 instructions +;Inputs: +; edi = [CURstk] +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;********************* Polynomial Coefficients ********************* + +;These polynomial coefficients were all taken from "Computer Approximations" +;by J.F. Hart (reprinted 1978 w/corrections). All calculations and +;conversions to hexadecimal were done with a character-string calculator +;written in Visual Basic with precision set to 30 digits. Once the constants +;were typed into this file, all transfers were done with cut-and-paste +;operations to and from the calculator to help eliminate any typographical +;errors. + + +tAtanPoly label word + +;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2), +;accurate to 20.78 digits over interval [0, tan(pi/12)]. + + dd 4 ;P() is degree four + +; Hart constant +; +;+.16241 70218 72227 96595 08 E0 +;Hex value: 0.A650A5D5050DE43A2C25A8C00 HFFFE + dq 0A650A5D5050DE43AH + dw bTAG_VALID,0FFFEH-1 + +;+.65293 76545 29069 63960 675 E1 +;Hex value: 0.D0F0A714A9604993AC4AC49A0 H3 + dq 0D0F0A714A9604994H + dw bTAG_VALID,03H-1 + +;+.39072 57269 45281 71734 92684 E2 +;Hex value: 0.9C4A507F16530AC3CDDEFA3DE H6 + dq 09C4A507F16530AC4H + dw bTAG_VALID,06H-1 + +;+.72468 55912 17450 17145 90416 9 E2 +;Hex value: 0.90EFE6FB30465042CF089D1310 H7 + dq 090EFE6FB30465043H + dw bTAG_VALID,07H-1 + +;+.41066 29181 34876 24224 77349 62 E2 +;Hex value: 0.A443E2004BB000B84A5154D44 H6 + dq 0A443E2004BB000B8H + dw bTAG_VALID,06H-1 + + dd 4 ;Q() is degree four + +; Hart constant +; +;+.15023 99905 56978 85827 4928 E2 +;Hex value: 0.F0624CD575B782643AFB912D0 H4 + dq 0F0624CD575B78264H + dw bTAG_VALID,04H-1 + +;+.59578 42201 83554 49303 22456 E2 +;Hex value: 0.EE504DDC907DEAEB7D7473B82 H6 + dq 0EE504DDC907DEAEBH + dw bTAG_VALID,06H-1 + +;+.86157 32305 95742 25062 42472 E2 +;Hex value: 0.AC508CA5E78E504AB2032E864 H7 + dq 0AC508CA5E78E504BH + dw bTAG_VALID,07H-1 + +;+.41066 29181 34876 24224 84140 84 E2 +;Hex value: 0.A443E2004BB000B84F542813C H6 + dq 0A443E2004BB000B8H + dw bTAG_VALID,06H-1 + + +;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3) +;= 0.26794 91924 31122 70647 25536 58494 12763 ;From Hart appendix +;Hex value: 0.8930A2F4F66AB189B517A51F2 HFFFF +Tan15Hi equ 08930A2F4H +Tan15Lo equ 0F66AB18AH +Tan15exp equ 0FFFFH-1 + +;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236 ;From Hart appendix +;Hex value: 0.DDB3D742C265539D92BA16B8 H1 +Sqrt3Hi equ 0DDB3D742H +Sqrt3Lo equ 0C265539EH +Sqrt3exp equ 01H-1 + +;pi = +3.14159265358979323846264338328 +;Hex value: 0.C90FDAA22168C234C4C6628B8 H2 +PiHi equ 0C90FDAA2H +PiLo equ 02168C235H +PiExp equ 02H-1 + +;3*pi = +9.42477796076937971538793014984 +;Hex value: 0.96CBE3F9990E91A79394C9E890 H4 +XThreePiHi equ 096CBE3F9H +XThreePiMid equ 0990E91A7H +XThreePiLo equ 090000000H +ThreePiExp equ 04H-1 + + +;This is a table of multiples of pi/6. It is used to adjust the +;final result angle after atan(). Derived from Hart appendix +;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612 +; +;When the reduced argument for atan() is very small, these correction +;constants simply become the result. These constants have all been +;rounded to nearest, but the user may have selected a different rounding +;mode. The tag byte is not needed for these constants, so its space +;is used to indicate if it was rounded. To determine if a constant +;was rounded, 7FH is subtracted from this flag; CY set means it was +;rounded up. + +RoundedUp equ 040H +RoundedDown equ 0C0H + +tAtanPiFrac label dword +;pi/2 = +1.57079632679489661923132169163 +;Hex value: 0.C90FDAA22168C234C4C6628B0 H1 + dq 0C90FDAA22168C235H + dw RoundedUp,01H-1 + +;2*pi/3 = +2.09439510239319549230842892218 +;Hex value: 0.860A91C16B9B2C232DD997078 H2 + dq 0860A91C16B9B2C23H + dw RoundedDown,02H-1 + +;none + dd 0,0,0 + +;pi/6 = +0.523598775598298873077107230544E0 +;Hex value: 0.860A91C16B9B2C232DD99707A H0 + dq 0860A91C16B9B2C23H + dw RoundedDown,00H-1 + +;pi/2 = +1.57079632679489661923132169163 +;Hex value: 0.C90FDAA22168C234C4C6628B0 H1 + dq 0C90FDAA22168C235H + dw RoundedUp,01H-1 + +;pi/3 = +1.04719755119659774615421446109 +;Hex value: 0.860A91C16B9B2C232DD997078 H1 + dq 0860A91C16B9B2C23H + dw RoundedDown,01H-1 + +;pi = +3.14159265358979323846264338328 +;Hex value: 0.C90FDAA22168C234C4C6628B8 H2 + dq 0C90FDAA22168C235H + dw RoundedUp,02H-1 + +;5*pi/6 = +2.61799387799149436538553615272 +;Hex value: 0.A78D3631C681F72BF94FFCC96 H2 + dq 0A78D3631C681F72CH + dw RoundedUp,02H-1 + +;********************* + +tExpPoly label word + +;These constants are from Hart #1324: 2^x - 1 = +; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) ) +;accurate to 21.54 digits over interval [0, 0.5]. + + dd 2 ;P() is degree two + +; Hart constant +; +;+.60613 30790 74800 42574 84896 07 E2 +;Hex value: 0.F27406FCF405189818F68BB78 H6 + dq 0F27406FCF4051898H + dw bTAG_VALID,06H-1 + +;+.30285 61978 21164 59206 24269 927 E5 +;Hex value: 0.EC9B3D5414E1AD0852E432A18 HF + dq 0EC9B3D5414E1AD08H + dw bTAG_VALID,0FH-1 + +;+.20802 83036 50596 27128 55955 242 E7 +;Hex value: 0.FDF0D84AC3A35FAF89A690CC4 H15 + dq 0FDF0D84AC3A35FB0H + dw bTAG_VALID,015H-1 + + dd 3 ;Q() is degree three. First + ;coefficient is 1.0 and is not listed. +; Hart constant +; +;+.17492 20769 51057 14558 99141 717 E4 +;Hex value: 0.DAA7108B387B776F212ECFBEC HB + dq 0DAA7108B387B776FH + dw bTAG_VALID,0BH-1 + +;+.32770 95471 93281 18053 40200 719 E6 +;Hex value: 0.A003B1829B7BE85CC81BD5309 H13 + dq 0A003B1829B7BE85DH + dw bTAG_VALID,013H-1 + +;+.60024 28040 82517 36653 36946 908 E7 +;Hex value: 0.B72DF814E709837E066855BDD H17 + dq 0B72DF814E709837EH + dw bTAG_VALID,017H-1 + + +;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808 ;From Hart appendix +;Hex value: 0.B504F333F9DE6484597D89B30 H1 +Sqrt2Hi equ 0B504F333H +Sqrt2Lo equ 0F9DE6484H +Sqrt2Exp equ 01H-1 + +;sqrt(2) - 1 = +0.4142135623730950488016887242E0 +;Hex value: 0.D413CCCFE779921165F626CC4 HFFFF +Sqrt2m1Hi equ 0D413CCCFH +Sqrt2m1Lo equ 0E7799211H +XSqrt2m1Lo equ 060000000H +Sqrt2m1Exp equ 0FFFFH-1 + +;2 - sqrt(2) = +0.5857864376269049511983112758E0 +;Hex value: 0.95F619980C4336F74D04EC9A0 H0 +TwoMinusSqrt2Hi equ 095F61998H +TwoMinusSqrt2Lo equ 00C4336F7H +TwoMinusSqrt2Exp equ 00H-1 + +;********************* + +tLogPoly label dword + +;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2), +; z = (x+1) / (x-1) accurate to 19.74 digits over interval +;[1/sqrt(2), sqrt(2)]. The original Hart coefficients were for log10(); +;the P() coefficients have been scaled by log2(10) to compute log2(). +; +;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017 ;From Hart appendix + + dd 3 ;P() is degree three + +; Original Hart constant Scaled value +; +;+.18287 59212 09199 9337 E0 +0.607500660543248917834110566373E0 +;Hex value: 0.9B8529CD54E72022A12BAEC53 H0 + dq 09B8529CD54E72023H + dw bTAG_VALID,00H-1 + +;-.41855 96001 31266 20633 E1 -13.9042489506087332809657007634 +;Hex value: 0.DE77CDBF64E8C53F0DCD458D0 H4 + dq 0DE77CDBF64E8C53FH + dw bSign shl 8 + bTAG_VALID,04H-1 + +;+.13444 58152 27503 62236 E2 +44.6619330844279438866067340334 +;Hex value: 0.B2A5D1C95708A0C9FE50F6F97 H6 + dq 0B2A5D1C95708A0CAH + dw bTAG_VALID,06H-1 + +;-.10429 11213 72526 69497 44122 E2 -34.6447606134704282123622236943 +;Hex value: 0.8A943C20526AE439A98B30F6A H6 + dq 08A943C20526AE43AH + dw bSign shl 8 + bTAG_VALID,06H-1 + + + dd 3 ;Q() is degree three. First + ;coefficient is 1.0 and is not listed. +; Hart constant +; +;-.89111 09060 90270 85654 E1 +;Hex value: 0.8E93E7183AA998D74F45CDFF0 H4 + dq 08E93E7183AA998D7H + dw bSign shl 8 + bTAG_VALID,04H-1 + +;+.19480 96618 79809 36524 155 E2 +;Hex value: 0.9BD904CCFEE118D4BEF319716 H5 + dq 09BD904CCFEE118D5H + dw bTAG_VALID,05H-1 + +;-.12006 95907 02006 34243 4218 E2 +;Hex value: 0.C01C811D2EC1B5806304B1858 H4 + dq 0C01C811D2EC1B580H + dw bSign shl 8 + bTAG_VALID,04H-1 + +;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213 ;From Hart appendix +;Hex value: 0.B8AA3B295C17F0BBBE87FED04 H1 +Log2OfEHi equ 0B8AA3B29H +Log2OfELo equ 05C17F0BCH +Log2OfEexp equ 01H-1 + + +;********************* Generic polynomial evaluation ********************* +; +;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly +; +;Inputs: +; ebx:esi,ecx = floating point number, internal format +; edi = pointer to polynomial degree and coefficients +;Outputs: +; result in ebx:esi,ecx +; edi incremented to start of last coefficient in list +; +;EvalPoly is the basic polynomial evaluator, using Horner's rule. The +;polynomial pointer in edi points to a list: the first dword in the list +;is the degree of the polynomial (n); it is followed by the n+1 +;coefficients in internal (12-byte) format. The argment for EvalPoly +;must be stored in the static FloatTemp in addition to being in +;registers. +; +;EvalPolyAdd is an alternate entry point into the middle of EvalPoly. +;It is used when the first coefficient is 1.0, so it skips the first +;multiplication. It requires that the degree of the polynomial be +;already loaded into ebp. +; +;EvalPolySetup store a copy of the argument in the static ArgTemp, +;and stores the square of the argument in the static FloatTemp. +;Then it falls into EvalPoly to evaluate the polynomial on the square. +; +;Eval2Poly evaluate two polynomials on its argument. The first +;polynomial is x * P(x^2), and its result is left at [[CURstk]]. +;The second polynomial is Q(x^2), and its result is left in registers. +;The most significant coefficient of Q() is 1. +; +;Polynomial evaluation uses a slight variation on the standard add +;and multiply routines. PolyAddDouble and PolyMulDouble both check +;to see if the argument in registers (the current accumulation) is +;zero. The argument pointed to by edi is a coefficient and is never +;zero. +; +;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped", +;i.e., redirected to special handlers for polynomial evaluation. +;[RoundMode] ordinarily points to the routine that handles the +;the current rounding mode and precision control; however, during +;polynomial evaluation, we always want full precision and round +;nearest. The normal rounding routines also store their result +;at [[Result]], but we want the result left in registers. +;[ZeroVector] exists solely so polynomial evaluation can trap +;when AddDouble results of zero. The normal response is to store +;a zero at [[Result]], but we need the zero left in registers. +;PolyRound and PolyZero handle these traps. + + +EvalPolySetup: +;Save x in ArgTemp + mov EMSEG:[ArgTemp].ExpSgn,ecx + mov EMSEG:[ArgTemp].lManHi,ebx + mov EMSEG:[ArgTemp].lManLo,esi + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + push edi ;Save pointer to polynomials +;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 + mov edx,ebx + mov edi,esi + mov eax,ecx +;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7 + call MulDoubleReg ;Compute x^2 +;Save x^2 in FloatTemp + mov EMSEG:[FloatTemp].ExpSgn,ecx + mov EMSEG:[FloatTemp].lManHi,ebx + mov EMSEG:[FloatTemp].lManLo,esi + pop edi +EvalPoly: +;ebx:esi,ecx = arg to evaluate, also in FloatTemp +;edi = pointer to degree and list of coefficients. + push edi + mov eax,cs:[edi+4].ExpSgn + mov edx,cs:[edi+4].lManHi + mov edi,cs:[edi+4].lManLo + call MulDoubleReg ;Multiply arg by first coef. + pop edi + mov ebp,cs:[edi] ;Get polynomial degree + add edi,4+Reg87Len ;Point to second coefficient + jmp EvalPolyAdd + +PolyLoop: + push ebp ;Save loop count +ifdef NT386 + mov edi,YFloatTemp +else + mov edi,offset edata:FloatTemp +endif + call PolyMulDouble + pop ebp + pop edi + add di,Reg87Len +EvalPolyAdd: + push edi + mov eax,cs:[edi].ExpSgn + mov edx,cs:[edi].lManHi + mov edi,cs:[edi].lManLo + cmp cl,bTAG_ZERO ;Adding to zero? + jz AddToZero + call AddDoubleReg ;ebp preserved +ContPolyLoop: + dec ebp + jnz PolyLoop + pop edi + ret + +AddToZero: +;Number in registers is zero, so just return value from memory. + mov ecx,eax + mov ebx,edx + mov esi,edi + jmp ContPolyLoop + + +Eval2Poly: + call EvalPolySetup + push edi +ifdef NT386 + mov edi,YArgTemp +else + mov edi,offset edata:ArgTemp +endif + call PolyMulDouble ;Multiply first result by argument + pop edi +;Save result of first polynomial at [[CURstk]] + mov edx,EMSEG:[CURstk] + mov EMSEG:[edx].ExpSgn,ecx + mov EMSEG:[edx].lManHi,ebx + mov EMSEG:[edx].lManLo,esi +;Load x^2 back into registers + mov ecx,EMSEG:[FloatTemp].ExpSgn + mov ebx,EMSEG:[FloatTemp].lManHi + mov esi,EMSEG:[FloatTemp].lManLo +;Start second polynomial evaluation + add edi,4+Reg87Len ;Point to coefficient + mov ebp,cs:[edi-4] ;Get polynomial degree + jmp EvalPolyAdd + + +PolyRound: +;This routine handles all rounding during polynomial evaluation. +;It performs 64-but round nearest, with result left in registers. +; +;Inputs: +; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7 +;Outputs: +; same, plus tag in cl. +; +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. This rounding rule is implemented by adding RoundBit-1 +;(7F..FFH), setting CY if round up. +; +;This routine needs to be reversible in case we're at the last step +;in the polynomial and final rounding uses a different rounding mode. +;We do this by copying the LSB of esi into al. While the rounding is +;reversible, you can't tell if the answer was exact. + + mov edx,esi + and dl,1 ;Look at LSB + or al,dl ;Set LSB as sticky bit + add eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up + adc esi,0 + adc ebx,0 + jc PolyBumpExponent ;Overflowed, increment exponent + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + ret + +PolyBumpExponent: + add ecx,1 shl 16 ;Mantissa overflowed, bump exponent + or ebx,1 shl 31 ;Set MSB + mov cl,bTAG_SNGL +PolyZero: +;Enter here when result is zero + ret + +;******************************************************************************* + +;FPATAN instruction + +;Actual instruction entry point is in emarith.asm + +tFpatanDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1)) + dd AtanDouble ;single single + dd AtanDouble ;single double + dd AtanZeroDest ;single zero + dd AtanSpclDest ;single special + dd AtanDouble ;double single + dd AtanDouble ;double double + dd AtanZeroDest ;double zero + dd AtanSpclDest ;double special + dd AtanZeroSource ;zero single + dd AtanZeroSource ;zero double + dd AtanZeroDest ;zero zero + dd AtanSpclDest ;zero special + dd AtanSpclSource ;special single + dd AtanSpclSource ;special double + dd AtanSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd AtanTwoInf ;Two infinites + +;Compute atan( st(1)/st(0) ). Neither st(0) or st(1) are zero or +;infinity at this point. +; +;Argument reduction starts by dividing the smaller by the larger, +;ensuring that the result x is <= 1. The absolute value of the quotient +;is used and the quadrant is fixed up later. If x = st(0)/st(1), then +;the final atan result is subtracted from pi/2 (and normalized for the +;correct range of -pi to +pi). +; +;The range of x is further reduced using the formulas: +; t = (x - k) / (1 + kx) +; atan(x) = atan(k) + atan(t) +; +;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we +;are assured that t <= tan(pi/12) = 2 - sqrt(3), and +;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12). +;Thus we can always reduce the argument to abs(t) <= tan(pi/12). +; +;Since k = 1/sqrt(3), it is convenient to multiply the numerator +;and denominator of t by 1/k, which gives +;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ). +;This is the form found in Cody and Waite and in previous versions +;of the emulator. It requires one each add, subtract, multiply, and +;divide. +; +;Hart has derived a simpler version of this formula: +;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ). +;Note that this computation requires one each add, subtract, and +;divide, but no multiply. + +;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;[edi] points to st(1), where result is returned + +AtanDouble: + mov EMSEG:[Result],edi + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + mov ah,EMSEG:[edi].bSgn ;Sign of result + mov al,ch ;Affects quadrant of result + and al,bSign ;Zero other bits, used as flags + push eax ;Save flag +;First figure out which is larger + push offset AtanQuo ;Return address for DivDouble + shld edx,ecx,16 ;Get exponent to ax + cmp dx,EMSEG:[edi].wExp ;Compare exponents + jl DivrDoubleSetFlag ;ST(0) is smaller, make it dividend + jg DivDouble ; ...is bigger, make it divisor +;Exponents are equal, compare mantissas + cmp ebx,EMSEG:[edi].lManHi + jb DivrDoubleSetFlag ;ST(0) is smaller, make it dividend + ja DivDouble ; ...is bigger, make it divisor + cmp esi,EMSEG:[edi].lManLo + jbe DivrDoubleSetFlag ;ST(0) is smaller, make it dividend + jmp DivDouble + +TinyAtan: +;Come here if the angle was reduced to zero, or the divide resulted in +;unmasked underflow so that the quotient exponent was biased. +;Note that an angle of zero means reduction was performed, and the +;result will be corrected to a non-zero value. + mov dl,[esp] ;Get flag byte + or dl,dl ;No correction needed? + jz AtanSetSign ;Just return result of divide + and EMSEG:[CURerr],not Underflow +;Angle in registers is too small to affect correction amount. Just +;load up correction angle instead of adding it in. + add dl,40H ;Change flags for correction lookup + shr dl,5-2 ;Now in bits 2,3,4 + and edx,7 shl 2 + mov ebx,[edx+2*edx+tAtanPiFrac].lManHi + mov esi,[edx+2*edx+tAtanPiFrac].lManLo + mov ecx,[edx+2*edx+tAtanPiFrac].ExpSgn + shrd eax,ecx,8 ;Copy rounding flag to high eax + jmp AtanSetSign + +AtanQuo: +;Return here after divide. Underflow flag is set only for "big underflow", +;meaning the (15-bit) exponent couldn't even be kept in 16 bits. This can +;only happen dividing a denormal by one of the largest numbers. +; +;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx + test EMSEG:[CURerr],Underflow;Did we underflow? + jnz TinyAtan +;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3) + xor cx,cx ;Use absolute value + cmp ecx,Tan15exp shl 16 + jg AtnNeedReduce + jl AtnReduced + cmp ebx,Tan15Hi + ja AtnNeedReduce + jb AtnReduced + cmp esi,Tan15Lo + jbe AtnReduced +AtnNeedReduce: + or byte ptr [esp],20H ;Note reduction in flags on stack +;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ). + mov eax,Sqrt3exp shl 16 + mov edx,Sqrt3Hi + mov edi,Sqrt3Lo + call AddDoubleReg ;x + sqrt(3) + mov edi,esi + mov esi,ebx ;Mantissa in esi:edi + mov ebx,ecx ;ExpSgn to ebx + mov ecx,(2+TexpBias) shl 16 + mov edx,1 shl 31 + xor eax,eax ;edx:edi,eax = 4.0 +;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7 +;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7 + call DivDoubleReg ;4 / ( x + sqrt(3) ) + not ch ;Flip sign + mov eax,Sqrt3exp shl 16 + mov edx,Sqrt3Hi + mov edi,Sqrt3Lo + call AddDoubleReg ;sqrt(3) - 4 / ( x + sqrt(3) ) +;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6). + cmp cl,bTAG_ZERO + jz TinyAtan +AtnReduced: +;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x +;[or 1 - x^2/3 = 1], which happens when x < 2^-32. This prevents underflow +;in computing x^2. +TinyAtanArg equ -32 + cmp ecx,TinyAtanArg shl 16 + jl AtanCorrection + mov edi,offset tAtanPoly + call Eval2Poly + mov edi,EMSEG:[CURstk] ;Point to first result + call DivDouble ;x * P(x^2) / Q(x^2) +AtanCorrection: +;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx +; +;Correct sign and add fraction of pi to account for various angle reductions: +; +; flag bit indicates correction +;---------------------------------------------------- +; 5 arg > tan(pi/12) add pi/6 +; 6 st(1) > st(0) sub from pi/2 +; 7 st(0) < 0 sub from pi +; +;This results in the following correction for the result R: +; +;bit 7 6 5 correction +;--------------------------- +; 0 0 0 none +; 0 0 1 pi/6 + R +; 0 1 0 pi/2 - R +; 0 1 1 pi/3 - R +; 1 0 0 pi - R +; 1 0 1 5*pi/6 - R +; 1 1 0 pi/2 + R +; 1 1 1 2*pi/3 + R + + mov dl,[esp] ;Get flag byte + or dl,dl ;No correction needed? + jz AtanSetSign + add dl,40H ;Set bit 7 for all -R cases + +;This changes the meaning of the flag bits to the following: +; +;bit 7 6 5 correction +;--------------------------- +; 0 0 0 pi/2 + R +; 0 0 1 2*pi/3 + R +; 0 1 0 none +; 0 1 1 pi/6 + R +; 1 0 0 pi/2 - R +; 1 0 1 pi/3 - R +; 1 1 0 pi - R +; 1 1 1 5*pi/6 - R + + xor ch,dl ;Flip sign bit in cases 4 - 7 + shr dl,5-2 ;Now in bits 2,3,4 + and edx,7 shl 2 + mov eax,[edx+2*edx+tAtanPiFrac].ExpSgn + mov edi,[edx+2*edx+tAtanPiFrac].lManLo + mov edx,[edx+2*edx+tAtanPiFrac].lManHi + call AddDoubleReg ;Add in correction angle +AtanSetSign: + pop edx ;Get flags again + mov ch,dh ;Set sign to original ST(1) +;Rounded mantissa in ebx:esi:eax, exp/sign in ecx + jmp TransUnround + + +;*** +AtanSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag +; cmp cl,bTAG_INF ;Is argument infinity? + cmp al,bTAG_INF ;Is argument infinity? + jnz SpclDest ;In emarith.asm +AtanZeroSource: +;Dividend is infinity or divisor is zero. Return pi/2 with +;same sign as dividend. + mov ecx,(PiExp-1) shl 16 + bTAG_VALID ;Exponent for pi/2 +PiMant: +;For storing multiples of pi. Exponent/tag is in ecx. + mov ch,EMSEG:[edi].bSgn ;Get dividend's sign + mov ebx,XPiHi + mov esi,XPiMid + mov eax,XPiLo +;A jump through [TransRound] is only valid if the number is known not to +;underflow. Unmasked underflow requires [RoundMode] be set. + jmp EMSEG:[TransRound] + +;*** +AtanSpclSource: + cmp cl,bTAG_INF ;Scaling by infinity? + jnz SpclSource ;in emarith.asm +AtanZeroDest: +;Divisor is infinity or dividend is zero. Return zero for +divisor, +;pi for -divisor. Result sign is same is dividend. + or ch,ch ;Check divisor's sign + mov ecx,PiExp shl 16 + bTAG_VALID ;Exponent for pi + js PiMant ;Store pi +;Result is zero + mov EMSEG:[edi].lManHi,0 + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].wExp,0 + mov EMSEG:[edi].bTAG,bTAG_ZERO + ret + +;*** +AtanTwoInf: +;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor. +;Result sign is same is dividend infinity. + or ch,ch ;Check divisor's sign + mov ecx,(PiExp-2) shl 16 + bTAG_VALID ;Exponent for pi/4 + jns PiMant ;Store pi/4 + mov ecx,(ThreePiExp-2) shl 16 + bTAG_VALID ;Exponent for 3*pi/4 + mov ch,EMSEG:[edi].bSgn ;Get dividend's sign + mov ebx,XThreePiHi + mov esi,XThreePiMid + mov eax,XThreePiLo +;A jump through [TransRound] is only valid if the number is known not to +;underflow. Unmasked underflow requires [RoundMode] be set. + jmp EMSEG:[TransRound] + +;******************************************************************************* + +ExpSpcl: +;Tagged special + cmp cl,bTAG_DEN + jz ExpDenorm + cmp cl,bTAG_INF + mov al, cl + jnz SpclDestNotDen ;Check for Empty or NAN +;Have infinity, check its sign. +;Return -1 for -infinity, no change if +infinity + or ch,ch ;Check sign + jns ExpRet ;Just return the +inifinity + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].lManHi,1 shl 31 + mov EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL ;-1.0 (exponent is zero) + ret + +ExpDenorm: + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jnz ExpCont ;Yes, continue +ExpRet: + ret + +EM_ENTRY eF2XM1 +eF2XM1: +;edi = [CURstk] + mov ecx,EMSEG:[edi].ExpSgn + cmp cl,bTAG_ZERO + jz ExpRet ;Return same zero + ja ExpSpcl +ExpCont: + +;The input range specified for the function is (-1, +1). The polynomial +;used for this function is valid only over the range [0, +0.5], so range +;reduction is needed. Range reduction is based on the identity: +; +; 2^(a+b) = 2^a * 2^b +; +;1.0 or 0.5 can be added/subtracted from the argument to bring it into +;range. We calculate 2^x - 1 with a polynomial, and then adjust the +;result according to the amount added or subtracted, as shown in the table: +; +;Arg range Adj Polynomial result Required result, 2^x - 1 +; +; (-1, -0.5] +1 P = 2^(x+1) - 1 (P - 1)/2 +; +; (-0.5, 0) +0.5 P = 2^(x+0.5) - 1 P * sqrt(2)/2 + (sqrt(2)/2 - 1) +; +; (0, 0.5) 0 P = 2^x - 1 P +; +; [0.5, 1) -0.5 P = 2^(x-0.5) - 1 P * sqrt(2) + (sqrt(2)-1) +; +;Since the valid input range does not include +1.0 or -1.0, and zero is +;handled separately, the precision exception will always be set. + + mov EMSEG:[Result],edi + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + push offset TransUnround ;Always exit through here + mov ebx,EMSEG:[edi].lManHi + mov esi,EMSEG:[edi].lManLo +;Check for small argument, so that x^2 does not underflow. Note that +;e^x = 1+x for small x, where small x means x + x^2/2 = x [or 1 + x/2 = 1], +;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x. +TinyExpArg equ -64 + cmp ecx,TinyExpArg shl 16 + jl TinyExp + cmp ecx,-1 shl 16 + bSign shl 8 ;See if positive, < 0.5 + jl ExpReduced +;Argument was not in range (0, 0.5), so we need some kind of reduction + or ecx,ecx ;Exp >= 0 means arg >= 1.0 --> too big +;CONSIDER: this returns through TransUnround which restores the rounding +;vectors, but it also randomly rounds the result becase eax is not set. + jge ExpRet ;Give up if arg out of range +;We're going to need to add/subtract 1.0 or 0.5, so load up the constant + mov edx,1 shl 31 + xor edi,edi + mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5 + mov ebp,offset ExpReducedMinusHalf + or ch,ch ;If it's positive, must be [0.5, 1) + jns ExpReduction + xor ah,ah ;edx:edi,eax = +0.5 + mov ebp,offset ExpReducedPlusHalf + cmp ecx,eax ;See if abs(arg) >= 0.5 + jl ExpReduction ;No, adjust by .5 + xor eax,eax ;edx:edi,eax = 1.0 + mov ebp,offset ExpReducedPlusOne +ExpReduction: + call AddDoubleReg ;Argument now in range [0, 0.5] + cmp cl,bTAG_ZERO ;Did reduction result in zero? + jz ExpHalf ;If so, must have been exactly 0.5 + push ebp ;Address of reduction cleanup +ExpReduced: + mov edi,offset tExpPoly + call Eval2Poly +;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) ) +;Q(x^2) is in registers, P(x^2) is at [[CURstk]] + mov edi,EMSEG:[CURstk] + mov dx,bSign shl 8 ;Subtract memory operand +;Note that Q() and P() have no roots over the input range +;(they will never be zero). + call AddDouble ;Q(x^2) - x*P(x^2) + sub ecx,1 shl 16 ;Divide by two + mov edi,EMSEG:[CURstk] + jmp DivDouble ;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) ) +;Returns to correct argument reduction correction routine or TransUnround + +TinyExp: +;Exponent is very small (and was not reduced) + mov edx,cFLDLN2hi + mov edi,cFLDLN2lo + mov eax,cFLDLN2exp shl 16 +;This could underflow (but not big time) + jmp MulDoubleReg ;Returns to TransUnround + +ExpHalf: +;Argument of exactly 0.5 was reduced to zero. Just return result. + mov ebx,Sqrt2m1Hi + mov esi,Sqrt2m1Lo + mov eax,XSqrt2m1Lo + 1 shl 31 - 1 + mov ecx,Sqrt2m1Exp shl 16 + ret ;Exit through TransUnround + +ExpReducedPlusOne: +;Correct result is (P - 1)/2 + sub ecx,1 shl 16 ;Divide by two + mov edx,1 shl 31 + xor edi,edi + mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5 + jmp AddDoubleReg + +ExpReducedPlusHalf: +;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2) + mov edx,Sqrt2Hi + mov edi,Sqrt2Lo + mov eax,Sqrt2exp-1 shl 16 ;sqrt(2)/2 + call MulDoubleReg + mov edx,TwoMinusSqrt2Hi + mov edi,TwoMinusSqrt2Lo + mov eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8 ;(2-sqrt(2))/2 + jmp AddDoubleReg + +ExpReducedMinusHalf: +;Correct result is P * sqrt(2) + (sqrt(2)-1) + mov edx,Sqrt2Hi + mov edi,Sqrt2Lo + mov eax,Sqrt2exp shl 16 + call MulDoubleReg + mov edx,Sqrt2m1Hi + mov edi,Sqrt2m1Lo + mov eax,Sqrt2m1Exp shl 16 + jmp AddDoubleReg + +;******************************************************************************* + +;Dispatch table for log(x+1) +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: + +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;Any special case routines not found in this file are in emarith.asm + +tFyl2xp1Disp label dword ;Source (ST(0)) Dest (*[di] = ST(1)) + dd LogP1Double ;single single + dd LogP1Double ;single double + dd LogP1ZeroDest ;single zero + dd LogP1SpclDest ;single special + dd LogP1Double ;double single + dd LogP1Double ;double double + dd LogP1ZeroDest ;double zero + dd LogP1SpclDest ;double special + dd XorSourceSign ;zero single + dd XorSourceSign ;zero double + dd XorDestSign ;zero zero + dd LogP1SpclDest ;zero special + dd LogSpclSource ;special single + dd LogSpclSource ;special double + dd LogSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd LogTwoInf ;Two infinites + + +LogP1Double: +;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;[edi] points to st(1), where result is returned +; +;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)] +;The approximation used (valid over exactly this range) is +; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is +; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2) +; +;We're not too picky about this range check because the function is simply +;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and +;signal Invalid if less, -infinity if equal. + or ecx,ecx ;abs(x) >= 1.0? + jge LogP1OutOfRange ;Valid range is approx [-0.3, +0.4] + mov EMSEG:[Result],edi + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + mov eax,1 shl 16 ;Exponent of 1 for adding 2.0 + push offset TotalLog ;Return address for BasicLog +; jmp BasicLog ;Fall into BasicLog +;.erre BasicLog eq $ + +;BasicLog is used by eFYL2X and eFYL2XP1. +;eax has exponent and sign to add 1.0 or 2.0 to argument +;ebx:esi,ecx has argument, non-zero, tag not set +;ST has argument to take log2 of, minus 1. (This is the actual argument +;of eFYL2XP1, or argument minus 1 of eFYL2X.) + +BasicLog: + mov edx,1 shl 31 + xor edi,edi ;edx:edi,eax = +1.0 or +2.0 + call AddDoubleReg + mov edi,EMSEG:[CURstk] ;Point to x-1 + call DivDouble ;Compute (x-1) / (x+1) +;Result in registers is z = (x-1)/(x+1). For tiny z, ln(x) = 2*z, so +; log2(x) = 2 * log2(e) * z. Tiny z is such that z + z^3/3 = z. + cmp ecx,-32 shl 16 ;Smallest exponent to bother with + jl LogSkipPoly + mov edi,offset tLogPoly + call Eval2Poly + mov edi,EMSEG:[CURstk] ;Point to first result, r * P(r^2) + jmp DivDouble ;Compute r * P(r^2) / Q(r^2) + +LogSkipPoly: +;Multiply r by 2 * log2(e) + mov edx,Log2OfEHi + mov edi,Log2OfELo + mov eax,(Log2OfEexp+1) shl 16 + jmp MulDoubleReg + +LogP1OutOfRange: +;Input range isn't valid, so we can return anything we want--EXCEPT, for +;numbers < -1 we must signal Invalid Operation, and Divide By Zero for +;-1. Otherwise, we return an effective log of one by just leaving the +;second operand as the return value. +; +;Exponent in ecx >= 0 ( abs(x) >= 1 ) + or ch,ch ;Is it positive? + jns LogP1Ret ;If so, skip it + and ecx,0FFFFH shl 16 ;Look at exponent only: 0 for -1.0 + sub ebx,1 shl 31 ;Kill MSB + or ebx,esi + or ebx,ecx + jnz ReturnIndefinite ;Must be < -1.0 + jmp DivideByMinusZero + +LogP1Ret: + ret + +;*** +LogP1ZeroDest: + or ch,ch ;Is it negative? + jns LogP1Ret ;If not, just leave it zero + or ecx,ecx ;abs(x) >= 1.0? + jl XorDestSign ;Flip sign of zero +;Argument is <= -1 + jmp ReturnIndefinite ;Have 0 * log( <=0 ) + +;*** +LogP1SpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF ;Is argument infinity? + jnz SpclDest ;In emarith.asm +;Multiplying log(x+1) * infinity. +;If x > 0, return original infinity. +;If -1 <= x < 0, return infinity with sign flipped. +;If x < -1 or x == 0, invalid operation. + cmp cl,bTAG_ZERO + jz ReturnIndefinite + or ch,ch ;Is it positive? + jns LogP1Ret + test ecx,0FFFFH shl 16 ;Is exponent zero? + jl XorDestSign + jg ReturnIndefinite + sub ebx,1 shl 31 ;Kill MSB + or ebx,esi + jnz ReturnIndefinite ;Must be < -1.0 + jmp XorDestSign + +;*** +LogSpclSource: + cmp cl,bTAG_INF ;Is argument infinity? + jnz SpclSource ;in emarith.asm + or ch,ch ;Is it negative infinity? + js ReturnIndefinite + jmp MulByInf + +;*** +LogTwoInf: + or ch,ch ;Is it negative infinity? + js ReturnIndefinite + jmp XorDestSign + +;******************************************************************************* + +;Dispatch table for log(x) +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: + +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;Any special case routines not found in this file are in emarith.asm + +tFyl2xDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1)) + dd LogDouble ;single single + dd LogDouble ;single double + dd LogZeroDest ;single zero + dd LogSpclDest ;single special + dd LogDouble ;double single + dd LogDouble ;double double + dd LogZeroDest ;double zero + dd LogSpclDest ;double special + dd DivideByMinusZero ;zero single + dd DivideByMinusZero ;zero double + dd ReturnIndefinite ;zero zero + dd LogSpclDest ;zero special + dd LogSpclSource ;special single + dd LogSpclSource ;special double + dd LogSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd LogTwoInf ;Two infinites + + +LogDouble: +;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7 +;[edi] points to st(1), where result is returned +; +;Must reduce the argument to the range [1/sqrt(2), sqrt(2)] + or ch,ch ;Is it positive? + js ReturnIndefinite ;Can't take log of negative number + mov EMSEG:[Result],edi + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + shld eax,ecx,16 ;Save exponent in ax as int part of log2 + xor ecx,ecx ;Zero exponent: 1 <= x < 2 + cmp ebx,Sqrt2Hi ;x > sqrt(2)? + jb LogReduced + ja LogReduceOne + cmp esi,Sqrt2Lo + jb LogReduced +LogReduceOne: + sub ecx,1 shl 16 ;1/sqrt(2) < x < 1 + inc eax +LogReduced: + push eax ;Save integer part of log2 + mov ebp,ecx ;Save reduced exponent (tag is wrong!) + mov edx,1 shl 31 + mov eax,bSign shl 8 ;Exponent of 0, negaitve + xor edi,edi ;edx:edi,eax = -1.0 + call AddDoubleReg + cmp cl,bTAG_ZERO ;Was it exact power of two? + jz LogDone ;Skip log if power of two +;Save (x - 1), reload x with reduced exponent + mov edi,EMSEG:[CURstk] ;Point to original x again + xchg EMSEG:[edi].lManHi,ebx + xchg EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].ExpSgn,ecx + mov ecx,ebp ;Get reduced exponent + xor eax,eax ;Exponent of 0, positive + call BasicLog +LogDone: + pop eax ;Get integer part back + cwde + or eax,eax ;Is it zero? + jz TotalLog +;Next 3 instructions take abs() of integer + cdq ;Extend sign through edx + xor eax,edx ;Complement... + sub eax,edx ; and increment if negative + bsr dx,ax ;Look for MSB to normalize integer +;Bit number in dx ranges from 0 to 15 + mov cl,dl + not cl ;Convert to shift count + shl eax,cl ;Normalize +.erre TexpBias eq 0 + rol edx,16 ;Move exponent high, sign low + or ebx,ebx ;Was log zero? + jz ExactPower + xchg edx,eax ;Exp/sign to eax, mantissa to edx + xor edi,edi ;Extend with zero + call AddDoubleReg +TotalLog: +;Registers could be zero if input was exactly 1.0 + cmp cl,bTAG_ZERO + jz ZeroLog +TotalLogNotZero: + mov edi,EMSEG:[Result] ;Point to second arg + push offset TransUnround + jmp MulDouble + +ExactPower: +;Arg was a power of two, so log is exact (but not zero). + mov ebx,eax ;Mantissa to ebx + mov ecx,edx ;Exponent to ecx + xor esi,esi ;Extend with zero +;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx +; +;The result log is exact, so we don't want TransUnround, which is designed +;to ensure the result is never exact. Instead we set the [RoundMode] +;vector to [TransRound] before the final multiply. + mov eax,EMSEG:[TransRound] + mov EMSEG:[RoundMode],eax + mov edi,EMSEG:[Result] ;Point to second arg + push offset RestoreRound ;Return addr. for MulDouble in emtrig.asm + jmp MulDouble + +ZeroLog: + mov eax,EMSEG:[SavedRoundMode] + mov EMSEG:[RoundMode],eax + mov EMSEG:[ZeroVector],offset SaveResult + jmp SaveResult + +;*** +LogZeroDest: + or ch,ch ;Is it negative? + js ReturnIndefinite ;Can't take log of negative numbers +;See if log is + or - so we can get correct sign of zero + or ecx,ecx ;Is exponent >= 0? + jge LogRet ;If so, keep present zero sign +FlipDestSign: + not EMSEG:[edi].bSgn + ret + +;*** +LogSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF ;Is argument infinity? + jnz SpclDest ;In emarith.asm +;Multiplying log(x) * infinity. +;If x > 1, return original infinity. +;If 0 <= x < 1, return infinity with sign flipped. +;If x < 0 or x == 1, invalid operation. + cmp cl,bTAG_ZERO + jz FlipDestSign + or ch,ch ;Is it positive? + js ReturnIndefinite + test ecx,0FFFFH shl 16 ;Is exponent zero? + jg LogRet ;x > 1, just return infinity + jl FlipDestSign + sub ebx,1 shl 31 ;Kill MSB + or ebx,esi + jz ReturnIndefinite ;x == 1.0 +LogRet: + ret diff --git a/private/ntos/dll/i386/emload.asm b/private/ntos/dll/i386/emload.asm new file mode 100644 index 000000000..c2e68c561 --- /dev/null +++ b/private/ntos/dll/i386/emload.asm @@ -0,0 +1,416 @@ + subttl emload.asm - FLD and FILD instructions + page +;******************************************************************************* +;emload.asm - FLD and FILD instructions +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FLD and FILD instructions +;Inputs: +; edi = [CURstk] +; dseg:esi = pointer to memory operand +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + + PrevStackWrap edi,LdStk ;Tied to PrevStackElem below + +;******* +EM_ENTRY eFLDreg +eFLDreg: +;******* +; edi = [CURstk] +; esi = pointer to st(i) from instruction field + + PrevStackElem edi,LdStk ;Point to receiving location + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty? + jnz FldErr + mov ecx,EMSEG:[esi].ExpSgn + cmp cl,bTAG_EMPTY + jz FldErr + mov ebx,EMSEG:[esi].lManHi + mov esi,EMSEG:[esi].lManLo + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].lManHi,ebx + mov EMSEG:[edi].ExpSgn,ecx + ret + + +;This is common code that stores a value into the stack after being loaded +;into registers by the appropriate routine. + + PrevStackWrap edi,Load ;Tied to PrevStackElem below + +FldCont: +;mantissa in ebx:esi, exp/sign in ecx +;edi = [CURstk] + PrevStackElem edi,Load ;Point to receiving location + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty? + jnz FldErr + cmp cl,bTAG_NAN ;Returning a NAN? + jz FldNAN +SaveStack: + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].lManHi,ebx + mov EMSEG:[edi].ExpSgn,ecx + ret + +FldErr: + or EMSEG:[SWcc],C1 ;Signal overflow + mov EMSEG:[CURerr],StackFlag;Kills possible denormal exception +Unsupported: + call ReturnIndefinite ;in emarith.asm + jz FldExit ;Unmasked, do nothing + mov EMSEG:[CURstk],edi ;Update top of stack +FldExit: + ret + +FldNAN: +;Is it a signaling NAN? + test ebx,1 shl 30 ;Check for SNAN + jnz SaveStack ;If QNAN, just use it as result + or EMSEG:[CURerr],Invalid ;Flag the error + or ebx,1 shl 30 ;Make it into a QNAN + test EMSEG:[CWmask],Invalid ;Is it masked? + jnz SaveStack ;If so, update with masked response + ret + + +;**************** +;Load Single Real +;**************** + +EM_ENTRY eFLD32 +eFLD32: + push offset FldCont ;Return address + ;Fall into Load32Real +Load32Real: +;dseg:esi points to IEEE 32-bit real number +;On exit: +; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +;preserves edi. + + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ecx,dseg:[esi] ;Get number + mov ebx,ecx ;Save copy of mantissa + shl ebx,8 ;Normalize + shr ecx,7 ;Bring exponent down + and ecx,0FFH shl 16 ;Look at just exponent + mov ch,dseg:[esi+3] ;Get sign again + jz short ZeroOrDenorm32 ;Exponent is zero + xor esi,esi ;Zero out the low bits + or ebx,1 shl 31 ;Set implied bit + cmp ecx,SexpMax shl 16 + jge NANorInf ;Max exp., must be NAN or Infinity + add ecx,(TexpBias-SexpBias) shl 16 ;Change to extended format bias + mov cl,bTAG_SNGL + ret + +ZeroOrDenorm32: +;Exponent is zero. Number is either zero or denormalized + xor esi,esi ;Zero out the low bits + and ebx,not (1 shl 31) ;Keep just mantissa + jnz Norm32 + mov cl,bTAG_ZERO + ret + +Norm32: + add ecx,(TexpBias-SexpBias+1-31) shl 16 ;Fix up bias + jmp FixDenorm + + +NANorInf: +;Shared by single and double real + and ecx,bSign shl 8 ;Save only sign in ch + or ecx,TexpMax shl 16 + bTAG_NAN ;Max exp. + cmp ebx,1 shl 31 ;Only 1 bit set means infinity + jnz @F + or esi,esi + jnz @F + mov cl,bTAG_INF +@@: + ret + +;**************** +;Load Double Real +;**************** + +EM_ENTRY eFLD64 +eFLD64: + push offset FldCont ;Return address + ;Fall into Load64Real +Load64Real: +;dseg:esi points to IEEE 64-bit real number +;On exit: +; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +;preserves edi. + + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ecx,dseg:[esi+4] ;Get sign, exp., and high mantissa + mov ebx,ecx ;Save copy of mantissa + shr ecx,4 ;Bring exponent down + and ecx,7FFH shl 16 ;Look at just exponent + mov ch,dseg:[esi+7] ;Get sign again + mov esi,dseg:[esi] ;Get low 32 bits of op + jz short ZeroOrDenorm64 ;Exponent is zero + shld ebx,esi,31-20 + shl esi,31-20 ;Normalize + or ebx,1 shl 31 ;Set implied bit + cmp ecx,DexpMax shl 16 + jge NANorInf ;Max exp., must be NAN or Infinity + add ecx,(TexpBias-DexpBias) shl 16 ;Change to extended format bias +SetNormTag: + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + ret + +ZeroOrDenorm64: +;Exponent is zero. Number is either zero or denormalized + and ebx,0FFFFFH ;Keep just mantissa + jnz ShortNorm64 ;Are top 20 bits zero? + or esi,esi ;Are low 32 bits zero too? + jnz LongNorm64 + mov cl,bTAG_ZERO + ret + +LongNorm64: + xchg ebx,esi ;Shift up 32 bits + sub ecx,32 shl 16 ;Correct exponent +ShortNorm64: + add ecx,(TexpBias-DexpBias+12-31) shl 16 ;Fix up bias +FixDenorm: + or EMSEG:[CURerr],Denormal ;Set Denormal Exception + bsr edx,ebx ;Scan for MSB +;Bit number in edx ranges from 0 to 31 + mov cl,dl + not cl ;Convert bit number to shift count + shld ebx,esi,cl + shl esi,cl + shl edx,16 ;Move exp. adjustment to high end + add ecx,edx ;Adjust exponent + jmp SetNormTag + + +;****************** +;Load Short Integer +;****************** + +EM_ENTRY eFILD16 +eFILD16: + push offset FldCont ;Return address + ;Fall into Load16Int +Load16Int: +;dseg:esi points to 16-bit integer +;On exit: +; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +;preserves edi. + + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ax,dseg:[esi] +NormInt16: + xor esi,esi ;Extend with zero + cwd ;extend sign through dx + xor ax,dx + sub ax,dx ;Take ABS() of integer + bsr cx,ax ;Find MSB + jz ZeroInt +;Bit number in cx ranges from 0 to 15 + not ecx ;Convert to shift count + shl eax,cl ;Normalize + not ecx +.erre TexpBias eq 0 + shl ecx,16 ;Move exponent to high half + mov ch,dh ;Set sign + mov ebx,eax ;Mantissa to ebx + mov cl,bTAG_SNGL + ret + +ZeroInt: + xor ebx,ebx + mov ecx,ebx + mov cl,bTAG_ZERO + ret + + +;****************** +;Load Long Integer +;****************** + +EM_ENTRY eFILD32 +eFILD32: + push offset FldCont ;Return address + ;Fall into Load32Int +Load32Int: +;dseg:esi points to 32-bit integer +;On exit: +; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +;preserves edi. + + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov eax,dseg:[esi] + xor esi,esi ;Extend with zero + or eax,eax ;It it zero? + jz ZeroInt + cdq ;extend sign through edx + xor eax,edx + sub eax,edx ;Take ABS() of integer + mov ebx,eax ;Mantissa to ebx +;BSR uses 3 clocks/bit, so speed it up by checking the top half +;This saves 36 clocks on 386 (42 on 486sx) +;Cost is 13 clocks on 386 if high word isn't zero (5 on 486sx) +.erre TexpBias eq 0 + xor eax,eax ;Initialize exponent + cmp ebx,0FFFFH ;Upper bits zero? + ja @F + shl ebx,16 + sub eax,16 +@@: + bsr ecx,ebx ;Find MSB + add eax,ecx ;Compute expoment + not cl ;Convert bit number to shift count + shl ebx,cl ;Normalize + shrd ecx,eax,16 ;Move exponent to high half of ecx + mov ch,dh ;Set sign + mov cl,bTAG_SNGL + ret + + +;***************** +;Load Quad Integer +;***************** + +EM_ENTRY eFILD64 +eFILD64: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ebx,dseg:[esi+4] ;Get high 32 bits + mov eax,ebx ;Make copy of sign + mov esi,dseg:[esi] ;Get low 32 bits + mov ecx,ebx + or ecx,esi ;Is it zero? + jz ZeroQuad +NormQuadInt: +;Entry point from eFBLD +;eax bit 31 = sign +;ebx:esi = integer +;edi = [CURstk] +.erre TexpBias eq 0 + mov ax,32 ;Initialize exponent + or ebx,ebx ;Check sign + jz LongNormInt + jns FindBit + not ebx + neg esi ;CY set if non-zero + sbb ebx,-1 ;Add one if esi == 0 + jnz FindBit ;Check for high bits zero +LongNormInt: + xchg ebx,esi ;Normalize 32 bits + xor ax,ax ;Reduce exponent by 32 +FindBit: +;BSR uses 3 clocks/bit, so speed it up by checking the top half +;This saves 35 clocks on 386 (41 on 486sx) +;Cost is 11 clocks on 386 if high word isn't zero (4 on 486sx) + cmp ebx,0FFFFH ;Upper bits zero? + ja @F + shld ebx,esi,16 + shl esi,16 + sub eax,16 +@@: + bsr ecx,ebx ;Find MSB + add eax,ecx ;Compute expoment + not cl ;Convert bit number to shift count + shld ebx,esi,cl ;Normalize + shl esi,cl + mov ecx,eax ;Move sign and exponent to ecx + rol ecx,16 ;Swap sign and exponent halves + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + jmp FldCont + +ZeroQuad: + mov cl,bTAG_ZERO + jmp FldCont + + +;**************** +;Load Temp Real +;**************** + + PrevStackWrap edi,Ld80 ;Tied to PrevStackElem below + +EM_ENTRY eFLD80 +eFLD80: +;This is not considered an "arithmetic" operation (like all the others are), +;so SNANs do NOT cause an exception. However, unsupported formats do. + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + PrevStackElem edi,Ld80 ;Point to receiving location + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is it empty? + jnz FldErr +LoadTempReal: + mov ebx,dseg:[esi+4] ;Get high half of mantissa + mov cx,dseg:[esi+8] ;Get exponent and sign + mov esi,dseg:[esi] ;Get low half of mantissa + mov eax,ecx + and ch,7FH ;Mask off sign bit + shl ecx,16 ;Move exponent to high end + mov ch,ah ;Restore sign + jz ZeroOrDenorm80 +;Check for unsupported format: unnormals (MSB not set) + or ebx,ebx + jns Unsupported + sub ecx,(IexpBias-TexpBias) shl 16 ;Correct the bias + cmp ecx,TexpMax shl 16 + jge NANorInf80 +SetupTag: + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + jmp SaveStack + +NANorInf80: + mov cl,bTAG_NAN + cmp ebx,1 shl 31 ;Only 1 bit set means infinity + jnz SaveStack + or esi,esi + jnz SaveStack + mov cl,bTAG_INF + jmp SaveStack + +ZeroOrDenorm80: +;Exponent is zero. Number is either zero or denormalized + or ebx,ebx + jnz ShortNorm80 ;Are top 32 bits zero? + or esi,esi ;Are low 32 bits zero too? + jnz LongNorm80 + mov cl,bTAG_ZERO + jmp SaveStack + +;This code accepts and works correctly with pseudo-denormals (MSB already set) +LongNorm80: + xchg ebx,esi ;Shift up 32 bits + sub ecx,32 shl 16 ;Correct exponent +ShortNorm80: + add ecx,(TexpBias-IexpBias+1-31) shl 16 ;Fix up bias + bsr edx,ebx ;Scan for MSB +;Bit number in edx ranges from 0 to 31 + mov cl,dl + not cl ;Convert bit number to shift count + shld ebx,esi,cl + shl esi,cl + shl edx,16 ;Move exp. adjustment to high end + add ecx,edx ;Adjust exponent + jmp SetUpTag diff --git a/private/ntos/dll/i386/emlsbcd.asm b/private/ntos/dll/i386/emlsbcd.asm new file mode 100644 index 000000000..f07d35b1e --- /dev/null +++ b/private/ntos/dll/i386/emlsbcd.asm @@ -0,0 +1,279 @@ + subttl emlsbcd.asm - FBSTP and FBLD instructions + page +;******************************************************************************* +;emlsbcd.asm - FBSTP and FBLD instructions +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FBSTP and FBLD instructions. +; +; These routines convert between 64-bit integer and 18-digit packed BCD +; format. They work by splitting the number being converted in half +; and converting the two halves separately. This works well because +; 9 decimal digits fit nicely within 30 binary bits, so converion of +; each half is strictly a 32-bit operation. +; +;Inputs: +; edi = [CURstk] +; dseg:esi = pointer to memory operand +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;****** +eFBLD: +;****** + mov eax,dseg:[esi+5] ;Get high 8 digits + or eax,eax ;Anything there? + jz HighDigitsZero + mov ecx,8 + call ReadDigits ;Convert first 8 digits to binary + mov eax,dseg:[esi+1] ;Get next 8 digits + xor edi,edi + shld edi,eax,4 ;Shift ninth digit into edi + imul ebx,10 + add edi,ebx ;Accumulate ninth digit +SecondNineDigits: + xor ebx,ebx ;In case eax==0 + shl eax,4 ;Keep digits left justified + jz LastTwoDigits + mov ecx,7 + call ReadDigits ;Convert next 7 digits to binary +LastTwoDigits: + mov al,dseg:[esi] ;Get last two digits + shl eax,24 ;Left justify + mov ecx,2 + call InDigitLoop ;Accumulate last two digits +;edi = binary value of high 9 digits +;ebx = binary value of low 9 digits + mov eax,1000000000 ;One billion: shift nine digits left + mul edi ;Left shift 9 digits. 9 cl. if edi==0 + add ebx,eax ;Add in low digits + adc edx,0 +BcdReadyToNorm: +;edx:ebx = integer converted to binary + mov eax,dseg:[esi+6] ;Get sign to high bit of eax + mov esi,ebx + mov ebx,edx + mov edi,EMSEG:[CURstk] +;mantissa in ebx:esi, sign in high bit of eax +;edi = [CURstk] + jmp NormQuadInt ;in emload.asm + +HighDigitsZero: + mov eax,dseg:[esi+1] ;Get next 8 digits + or eax,eax ;Anything there? + jz CheckLastTwo + xor edi,edi + shld edi,eax,4 ;Shift ninth digit into edi + jmp SecondNineDigits + +CheckLastTwo: + mov bl,dseg:[esi] ;Get last two digits + or bl,bl + jz ZeroBCD + mov al,bl + shr al,4 ;Bring down upper digit + imul eax,10 + and ebx,0FH ;Keep lowest digit only + add ebx,eax + xor edx,edx + jmp BcdReadyToNorm + +ZeroBCD: + mov ecx,bTAG_ZERO ;Exponent is zero + mov ch,dseg:[esi+9] ;Get sign byte to ch + xor ebx,ebx + mov esi,ebx +;mantissa in ebx:esi, exp/sign in ecx +;edi = [CURstk] + jmp FldCont ;in emload.asm + + +;*** ReadDigits +; +;Inputs: +; eax = packed BCD digits, left justified, non-zero +; ecx = no. of digits, 7 or 8 +;Outputs: +; ebx = number + +SkipZeroDigits: + sub ecx,3 + shl eax,12 +ReadDigits: +;We start by scanning off leading zeros. This costs 16 cl./nybble in +;the ScanZero loop. To reduce this cost for many leading zeros, we +;check for three leading zeros at a time. Adding this test saves +;26 cl. for 3 leading zeros, 57 cl. for 6 leading zeros, at a cost +;of only 5 cl. if less than 3 zeros. We choose 3 at a time so we +;can repeat it once (there are never more than 7 zeros). + test eax,0FFF00000H ;Check first 3 nybbles for zero + jz SkipZeroDigits + xor ebx,ebx +ScanZero: +;Note that bsr is 3 cl/bit, or 12 cl/nybble. Add in the overhead and +;this loop of 16 cl/nybble is cheaper for the 1 - 3 digits it does. + dec ecx + shld ebx,eax,4 ;Shift digit into ebx + rol eax,4 ;Left justify **Doesn't affect ZF!** + jz ScanZero ;Skip to next digit if zero + jecxz ReadDigitsX +InDigitLoop: +;eax = digits to convert, left justified +;ebx = result accumulation +;ecx = number of digits to convert + xor edx,edx + shld edx,eax,4 ;Shift digit into edx + shl eax,4 ;Keep digits left justified + imul ebx,10 ;Only 10 clocks on 386! + add ebx,edx ;Accumulate number + dec ecx + jnz InDigitLoop +ReadDigitsX: + ret + +;******************************************************************************* + +ChkInvalidBCD: + ja SetInvalidBCD + cmp edi,0A7640000H ;(1000000000*1000000000) and 0ffffffffh + jb ValidBCD +SetInvalidBCD: + mov EMSEG:[CURerr],Invalid +InvalidBCD: + test EMSEG:[CWmask],Invalid ;Is it masked? + jz ReadDigitsX ;No--leave memory unchanged +;Store Indefinite + mov dword ptr dseg:[esi],0 + mov dword ptr dseg:[esi+4],0 + mov word ptr dseg:[esi+8],-1 ;0FF00000000H for packed BCD indefinite + jmp PopStack ;in emstore.asm + +;****** +eFBSTP: +;****** + call RoundToInteger ;Get integer in ebx:edi, sign in ch + jc InvalidBCD + cmp ebx,0DE0B6B3H ;(1000000000*1000000000) shr 32 + jae ChkInvalidBCD +ValidBCD: + and ch,bSign + mov dseg:[esi+9],ch ;Fill in sign byte + mov edx,ebx + mov eax,edi ;Get number to edx:eax for division + mov ebx,1000000000 + div ebx ;Break into two 9-digit halves + xor ecx,ecx ;Initial digits + mov edi,eax ;Save quotient + mov eax,edx + or eax,eax + jz SaveLowBCD + call WriteDigits + shrd ecx,eax,4 ;Pack 8th digit + xor al,al + shl eax,20 ;Move digit in ah to high end +SaveLowBCD: + mov dseg:[esi],ecx ;Save low 8 digits + mov ecx,eax ;Get ready for next 8 digits + mov eax,edi + or eax,eax + jz ZeroHighBCD + call WriteDigits + shl ah,4 ;Move digit to upper nybble + or al,ah ;Combine last two digits +SaveHighBCD: + mov dseg:[esi+4],ecx ;Save lower 8 digits + mov dseg:[esi+8],al + jmp PopStack + +ZeroHighBCD: + shr ecx,28 ;Position 9th digit + jmp SaveHighBCD + + +;*** WriteDigits +; +;Inputs: +; eax = binary number < 1,000,000,000 and > 0 +; ecx = Zero or had one BCD digit left justified +;Purpose: +; Convert binary integer to BCD. +; +; The time required for the DIV instruction is dependent on operand +; size, at 6 + (no. of bits) clocks for 386. (In contrast, multiply +; by 10 as used in FBLD/ReadDigits above takes the same amount of +; time regardless of operand size--only 10 clocks.) +; +; The easy way to do this conversion would be to repeatedly do a +; 32-bit division by 10 (at 38 clocks/divide). Instead, the number +; is broken down so that mostly 8-bit division is used (only 14 clocks). +; AAM (17 clocks) is also used to save us from having to load the +; constant 10 and zero ah. AAM is faster than DIV on the 486sx. +; +;Outputs: +; ecx has seven more digits packed into it (from left) +; ah:al = most significant two digits (unpacked) +;esi,edi preserved + +WriteDigits: +;eax = binary number < 1,000,000,000 + cdq ;Zero edx + mov ebx,10000 + div ebx ;Break into 4-digit and 5-digit pieces + mov bl,100 + or edx,edx + jz ZeroLowDigits + xchg edx,eax ;Get 4-digit remainder to eax +;Compute low 4 digits +; 0 < eax < 10000 + div bl ;Get two 2-digit pieces. 14cl on 386 + mov bh,al ;Save high 2 digits + mov al,ah ;Get low digits + aam + shl ah,4 ;Move digit to upper nybble + or al,ah + shrd ecx,eax,8 + mov al,bh ;Get high 2 digits + aam + shl ah,4 ;Move digit to upper nybble + or al,ah + shrd ecx,eax,8 +;Compute high 5 digits + mov eax,edx ;5-digit quotient to eax + or eax,eax + jz ZeroHighDigits +ConvHigh5: + cdq ;Zero edx + shld edx,eax,16 ;Put quotient in dx:ax + xor bh,bh ;bx = 100 + div bx ;Get 2- and 3-digit pieces. 22cl on 386 + xchg edx,eax ;Save high 3 digits, get log 2 digits + aam + shl ah,4 ;Move digit to upper nybble + or al,ah + shrd ecx,eax,8 + mov eax,edx ;Get high 3 digits + mov bl,10 + div bl + mov bl,ah ;Remainder is next digit + shrd ecx,ebx,4 + aam ;Get last two digits +;Last two digits in ah:al + ret + +ZeroLowDigits: + shr ecx,16 + jmp ConvHigh5 + +ZeroHighDigits: + shr ecx,12 + ret diff --git a/private/ntos/dll/i386/emlsenv.asm b/private/ntos/dll/i386/emlsenv.asm new file mode 100644 index 000000000..a3b725d9d --- /dev/null +++ b/private/ntos/dll/i386/emlsenv.asm @@ -0,0 +1,457 @@ + subttl emlsenv.asm - Emulator Save/Restore + page +;*** +;emlsenv.asm - Emulator Save/Restore +; +; +; Copyright (c) Microsoft Corporation 1991 +; +; All Rights Reserved +; +;Purpose: +; FLDCW, FSTCW, FSTSW, FSTENV, FLDENV, FSAVE, FRSTOR instructions +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +; +;******************************************************************************* + + +;When setting the control word, the [RoundMode] vector must be set +;according to the rounding and precision modes. + +tRoundMode label dword + irp RC,<near,down,up,chop> + irp PC,<24,24,53,64> + dd Round&&PC&&RC + endm + endm + + +EM_ENTRY eFLDCW +eFLDCW: +;Uses only eax and ebx + mov ax, dseg:[esi] ; Fetch control word from user memory +SetControlWord: + and ax,0F3FH ; Limit to valid values + mov EMSEG:[ControlWord], ax ; Store in the emulated control word + not al ;Flip mask bits for fast compare + and al,3FH ;Limit to valid mask bits + mov EMSEG:[ErrMask],al + and eax,(RoundControl + PrecisionControl) shl 8 +.erre RoundControl eq 1100B +.erre PrecisionControl eq 0011B + shr eax,6 ;Put PC and RC in bits 2-5 + mov ebx,tRoundMode[eax] ;Get correct RoundMode vector + mov EMSEG:[RoundMode],ebx + mov EMSEG:[SavedRoundMode],ebx + and eax,RoundControl shl (8-6) ;Mask off precision control + mov ebx,tRoundMode[eax+PC64 shl (8-6)];Get correct RoundMode vector + mov EMSEG:[TransRound],ebx ;Round mode w/o precision + ret + + +EM_ENTRY eFSTCW +eFSTCW: +;Uses only eax + mov ax, EMSEG:[ControlWord] ; Fetch user control word + mov dseg:[esi], ax ; Store into user memory + ret + + +EM_ENTRY eFSTSW +eFSTSW: +;Uses only eax and ebx + call GetStatusWord ; Fetch emulated Status word + mov dseg:[esi], ax ; Store into user memory + ret + + +eFSTSWax: +;Uses only eax and ebx + call GetStatusWord ; Fetch emulated Status word + mov [esp+4].regAX,ax + ret + + +EM_ENTRY eFDECSTP +eFDECSTP: +;edi = [CURstk] + cmp edi,BEGstk + jbe DecWrap + sub EMSEG:[CURstk],Reg87Len + ret + +DecWrap: + mov EMSEG:[CURstk],INITstk + ret + + +EM_ENTRY eFINCSTP +eFINCSTP: +;edi = [CURstk] + cmp edi,INITstk + jae IncWrap + add EMSEG:[CURstk],Reg87Len + ret + +IncWrap: + mov EMSEG:[CURstk],BEGstk + ret + + +eFCLEX: + mov EMSEG:[SWerr],0 + and [esp+4].OldLongStatus,0FFFF00FFH ; clear saved SWerr + ret + + +;*** eFSTENV - emulate FSTENV [address] +; +; ARGUMENTS +; dseg:esi = where to store environment +; +; +; DESCRIPTION +; This routine emulates an 80387 FSTENV (store environment) +; + +EM_ENTRY eFSTENV +eFSTENV: + mov ax,[esp+4].OldStatus + mov EMSEG:[StatusWord],ax +SaveEnv: + xor ax,ax + mov dseg:[esi.reserved1],ax + mov dseg:[esi.reserved2],ax + mov dseg:[esi.reserved3],ax + mov dseg:[esi.reserved4],ax + mov dseg:[esi.reserved5],ax + mov ax,EMSEG:[ControlWord] + mov dseg:[esi.E32_ControlWord],ax + call GetEMSEGStatusWord + mov dseg:[esi.E32_StatusWord],ax + call GetTagWord + mov dseg:[esi.E32_TagWord],ax + mov ax,cs + mov dseg:[esi.E32_CodeSeg],ax + mov ax,ss + mov dseg:[esi.E32_DataSeg],ax + mov eax,EMSEG:[PrevCodeOff] + mov dseg:[esi.E32_CodeOff],eax + mov eax,EMSEG:[PrevDataOff] + mov dseg:[esi.E32_DataOff],eax + mov EMSEG:[CWmask],03FH ;Set all mask bits + mov EMSEG:[ErrMask],0 + ret + + +;*** eFSAVE - emulate FSAVE [address] +; +; ARGUMENTS +; dseg:esi = where to store environment +; +; +; DESCRIPTION +; This routine emulates an 80387 FSAVE (store environment) +; Once the data is stored an finit is executed. +; +; REGISTERS +; destroys ALL. + +EM_ENTRY eFSAVE +eFSAVE: + mov ax,[esp+4].OldStatus + mov EMSEG:[StatusWord],ax + mov eax,[esp+4].OldCodeOff + mov EMSEG:[PrevCodeOff],eax + push offset eFINIT ; After fsave we must do a finit +SaveState: ; Enter here for debugger save state + call SaveEnv + add esi,size Env80x87_32 ;Skip over environment + mov ebp,NumLev ;Save entire stack + mov edi,EMSEG:[CURstk] +FsaveStoreLoop: + mov eax,EMSEG:[edi].ExpSgn + call StoreTempReal ;in emstore.asm + add esi,10 + + mov edi,EMSEG:[CURstk] + NextStackElem edi,FSave + mov EMSEG:[CURstk],edi + + dec ebp + jnz FsaveStoreLoop + ret + +WrapFSave: ; tied to NextStackElem above + mov edi, BEGstk + mov EMSEG:[CURstk],edi + dec ebp + jnz FsaveStoreLoop + ret + + +;*** eFRSTOR - emulate FRSTOR [address] +; +; ARGUMENTS +; dseg:esi = where to get the environment +; +; DESCRIPTION +; This routine emulates an 80387 FRSTOR (restore state) + + NextStackWrap edi,Frstor + +EM_ENTRY eFRSTOR +eFRSTOR: +;First we set up the status word so that [CURstk] is initialized. +;The floating-point registers are stored in logical ST(0) - ST(7) order, +;not physical register order. We don't do a full load of the environment +;because we're not ready to use the tag word yet. + + and [esp+4].[OldLongStatus], NOT(LongSavedFlags) ;clear saved codes, errs + mov ax, dseg:[esi.E32_StatusWord] + call SetEmStatusWord ;Initialize [CURstk] + add esi,size Env80x87_32 ;Skip over environment + +;Load of temp real has one difference from real math chip: it is an invalid +;operation to load an unsupported format. By ensuring the exception is +;masked, we will convert unsupported format to Indefinite. Note that the +;mask and [CURerr] will be completely restored by the FLDENV at the end. + + mov EMSEG:[CWmask],3FH ;Mask off invalid operation exception + mov edi,EMSEG:[CURstk] + mov ebp,NumLev +FrstorLoadLoop: + push esi + call LoadTempReal ;In emload.asm + pop esi + add esi,10 ;Point to next temp real + NextStackElem edi,Frstor + dec ebp + jnz FrstorLoadLoop + sub esi,NumLev*10+size Env80x87_32 ;Point to start of env. + jmp eFLDENV ;Fall into eFLDENV + + +;*** eFLDENV - emulate FLDENV [address] +; +; ARGUMENTS +; dseg:si = where to store environment +; +; This routine emulates an 80387 FLDENV (load environment) + +EM_ENTRY eFLDENV +eFLDENV: + and [esp+4].[OldLongStatus], NOT(LongSavedFlags) ;clear saved codes, errs + mov ax, dseg:[esi.E32_StatusWord] + call SetEmStatusWord ; set up status word + mov ax, dseg:[esi.E32_ControlWord] + call SetControlWord + mov ax, dseg:[esi.E32_TagWord] + call UseTagWord + mov eax, dseg:[esi.E32_CodeOff] + mov EMSEG:[PrevCodeOff], eax + mov eax, dseg:[esi.E32_DataOff] + mov EMSEG:[PrevDataOff], eax + ret + + +;*** GetTagWord - figures out what the tag word is from the numeric stack +; and returns the value of the tag word in ax. +; + +GetTagWord: + push esi + xor eax, eax + mov ecx, NumLev ; get tags for regs. 0, 7 - 1 + mov esi,INITstk +GetTagLoop: + mov bh, EMSEG:[esi.bTag] ; The top 2 bits of Tag are the X87 tag bits. + shld ax, bx, 2 + sub esi, Reg87Len + loop GetTagLoop + rol ax, 2 ; This moves Tag(0) into the low 2 bits + pop esi + ret + + +;*** UseTagWord - Set up tags using tag word from environment +; +; ARGUMENTS +; ax - should contain the tag word +; +; Destroys ax,bx,cx,dx,di + +UseTagWord: + ror ax, 2 ; mov Tag(0) into top bits of ax + mov edi,INITstk + mov ecx, NumLev +UseTagLoop: + mov dl,bTAG_EMPTY + cmp ah, 0c0h ;Is register to be tagged Empty? + jae SetTag ;Yes, go mark it + mov dl,EMSEG:[edi].bTag ;Get current tag + cmp dl,bTAG_EMPTY ;Is register currently Empty? + je SetTagNotEmpty ;If so, go figure out tag for it +SetTag: + mov EMSEG:[edi].bTag,dl +UseTagLoopCheck: + sub edi, Reg87Len + shl eax, 2 + loop UseTagLoop + ret + +SetTagEmpty: + mov EMSEG:[edi.bTag], bTAG_EMPTY + jmp UseTagLoopCheck + +SetTagNotEmpty: +;Register is currently tagged empty, but new tag word says it is not empty. +;Figure out a new tag for it. The rules are: +; +;1. Everything is either normalized or zero--unnormalized formats cannot +;get in. So if the high half mantissa is zero, the number is zero. +; +;2. Although the exponent bias is different, NANs and Infinities are in +;standard IEEE format - exponent is TexpMax, mantissa indicates NAN vs. +;infinity (mantissa for infinity is 800..000H). +; +;3. Denormals have an exponent less than TexpMin. +; +;4. If the low half of the mantissa is zero, it is tagged bTAG_SNGL +; +;5. Everything else is bTAG_VALID + + mov ebx,EMSEG:[edi].lManHi + mov dl,bTAG_ZERO ;Try zero first + or ebx,ebx ;Is mantissa zero? + jz SetTag + mov edx,EMSEG:[edi].ExpSgn + mov dl,bTAG_DEN + cmp edx,TexpMin shl 16 ;Is it denormal? + jl SetTag + cmp EMSEG:[edi].lManLo,0 ;Is low half zero? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz dl ;if low half==0 then dl=0 else dl=1 + cmp edx,TexpMax shl 16 ;Is it NAN or Infinity? + jl SetTag ;If not, it's valid +.erre (bTAG_VALID - bTAG_SNGL) shl TAG_SHIFT eq (bTAG_NAN - bTAG_INF) + shl dl,TAG_SHIFT + add dl,bTAG_INF - bTAG_SNGL +;If the low bits were zero we have just changed bTAG_SNGL to bTAG_INF +;If the low bits weren't zero, we changed bTAG_VALID to bTAG_NAN +;See if infinity is really possible: is high half 80..00H? + cmp ebx,1 shl 31 ;Is it infinity? + jz SetTag ;Store tag for infinity or NAN + mov dl,bTAG_NAN + jmp SetTag + + +;*** GetStatusWord - +; +; User status word returned in ax. +; Destroys ebx only. + +GetStatusWord: + mov eax, EMSEG:[CURstk] + sub eax, BEGstk + mov bl,Reg87Len + div bl + inc eax ; adjust for emulator's stack layout + and eax, 7 ; eax is now the stack number + shl ax, 11 + or ax,[esp+8].OldStatus ; or in the rest of the status word. + ret + + +;*** GetEMSEGStatusWord - +; +; User status word returned in ax. +; Destroys ebx only. +; Uses status word in per-thread data area, otherwise +; identical to GetStatusWord + +EM_ENTRY eGetStatusWord +GetEMSEGStatusWord: + mov eax, EMSEG:[CURstk] + sub eax, BEGstk + mov bl,Reg87Len + div bl + inc eax ; adjust for emulator's stack layout + and eax, 7 ; eax is now the stack number + shl ax, 11 + or ax, EMSEG:[StatusWord] ; or in the rest of the status word. + ret + + +;*** SetEmStatusWord - +; +; Given user status word in ax, set into emulator. +; Destroys ebx only. + + +SetEmStatusWord: + and ax,7F7FH + mov bx,ax + and bx,3FH ; set up CURerr in case user + mov EMSEG:[CURerr],bl ; wants to force an exception + mov ebx, eax + and ebx, not (7 shl 11) ; remove stack field. + mov EMSEG:[StatusWord], bx + + sub ah, 8 ; adjust for emulator's stack layout + and ah, 7 shl 3 + mov al, ah + shr ah, 1 + add al, ah ; stack field * 3 * 4 +.erre Reg87Len eq 12 + and eax, 255 ; eax is now 12*stack number + add eax, BEGstk + mov EMSEG:[CURstk], eax + ret + + +public _SaveEm87Context +_SaveEm87Context PROC + + push ebp + mov ebp, esp + push ebx + push edi + push esi + mov esi, [ebp+8] + call SaveState + test EMSEG:[CURErr], Summary + jne RetSaveEmIdle + mov eax, Em87Busy + jmp RetSaveEm +RetSaveEmIdle: + mov eax, Em87Idle +RetSaveEm: + pop esi + pop edi + pop ebx + pop ebp + ret +_SaveEm87Context ENDP + + +public _RestoreEm87Context +_RestoreEm87Context PROC + push ebp + mov ebp, esp + push ebx + push edi + push esi + mov esi, [ebp+8] + call eFRSTOR + pop esi + pop edi + pop ebx + pop ebp + ret +_RestoreEm87Context ENDP diff --git a/private/ntos/dll/i386/emround.asm b/private/ntos/dll/i386/emround.asm new file mode 100644 index 000000000..34704c5bd --- /dev/null +++ b/private/ntos/dll/i386/emround.asm @@ -0,0 +1,712 @@ + subttl emround.asm - Rounding and Precision Control and FRNDINT + page +;******************************************************************************* +;emround.asm - Rounding and Precision Control +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; Rounding and precision control. The correct routine is jumped +; to through the [RoundMode] vector. +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; 02/28/92 JWM Minor bug fix in NotNearLow +; +;******************************************************************************* + + +RndIntSpcl: + cmp cl,bTAG_INF + jz RndIntX ;Leave infinity unchanged + cmp cl,bTAG_DEN + jnz SpclDestNotDen ;Handle NAN & empty - in emarith.asm +;Handle denormal + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is it masked? + jnz NormRndInt ;If so, ignore denormalization +RndIntX: + ret + +;******** +EM_ENTRY eFRNDINT +eFRNDINT: +;******** +;edi points to top of stack + mov ecx,EMSEG:[edi].ExpSgn + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz RndIntX + ja RndIntSpcl + cmp ecx,63 shl 16 ;Is it already integer? + jge RndIntX +NormRndInt: + mov ebx,EMSEG:[edi].lManHi + mov esi,EMSEG:[edi].lManLo + mov EMSEG:[Result],edi ;Save result pointer + xor eax,eax ;Extend mantissa + push offset SaveResult + jmp RoundToBit + +;******************************************************************************* + +ResultOverflow: +;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl. +;We were all ready to save the rounded result, but the exponent turned out +;to be too large. + or EMSEG:[CURerr],Overflow + sub ecx,UnderBias shl 16 ;Unmasked response + test EMSEG:[CWmask],Overflow ;Is exception unmasked? + jz SaveResult ;If so, we're ready +;Produce masked overflow response + mov ebx,1 shl 31 ;Assume infinity + xor esi,esi + mov cl,bTAG_INF + mov al,EMSEG:[CWcntl] ;Get rounding control + mov ah,al + and ah,RCchop ;Rounding control only +;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and + +;i.e., RCup & sign OR RCdown & not sign +.erre RCchop eq RCup + RCdown ;Always return max value +.erre RCnear eq 0 ;Never return max value + sar ch,7 ;Expand sign through whole byte +.erre (RCdown and bSign) eq 0 ;Don't want to change real sign + xor ch,RCdown ;Flip sign for RCdown bit + and ah,ch ;RCup & sign OR RCdown & not sign + jnz SaveMax + and ecx,0FFFFH + or ecx,TexpMax shl 16 + jmp SaveResult ;Save Infinity +SaveMax: +;Get max value for current precision + mov ebx,0FFFFFF00H ;Max value for 24 bits + and ecx,bSign shl 8 ;Preserve only sign + or ecx,(IexpMax-IexpBias-1) shl 16 + bTAG_VALID ;Set up max value + and al,PrecisionControl +.erre PC24 eq 0 + jz SaveResult ;Save 24-bit max value + dec esi ;esi == -1 + mov ebx,esi + cmp al,PC53 + jnz SaveResult ;Save 64-bit max value + mov esi,0FFFFF800H + jmp SaveResult ;Save 53-bit max value + +;******************************************************************************* +; +;64-bit rounding routines +; + +;*********** +Round64down: +;*********** + cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Test for Underflow + jl RndDenorm64 + or eax,eax ;Exact result? + jz SaveValidResult + or EMSEG:[CURerr],Precision ;Set flag on inexact result +;Chop if positive, increase mantissa if negative + test ch,bSign + jz SaveValidResult ;Positive, so chop + jmp RoundUp64 ;Round up if negative + +RndDenorm64: + test EMSEG:[CWmask],Underflow ;Is exception unmasked? + jz RndSetUnder +Denormalize: +;We don't really store in denormalized format, but we need the number +;to be rounded as if we do. If the exponent were -IexpBias, we would +;lose 1 bit of precision; as it gets more negative, we lose more bits. +;We'll do this by adjusting the exponent so that the bits we want to +;keep look like integer bits, and performing round-to-integer. + add ecx,(IexpBias+62) shl 16 ;Adjust exponent so we're integer + call RoundToBit +;Set underflow exception if precision exception is set + mov al,EMSEG:[CURerr] + and al,Precision + ror al,Precision-Underflow ;Move Precision bit to Underflow pos. + or EMSEG:[CURerr],al ;Signal Underflow if inexact + cmp cl,bTAG_ZERO + jz SaveResult + sub ecx,(IexpBias+62) shl 16;Restore unbiased exponent + cmp ecx,TexpMin shl 16 ;Did we round out of denorm? + jae SaveResult + mov cl,bTAG_DEN + jmp SaveResult + +RndSetUnder: +;Underflow exception not masked. Adjust exponent and try again. + or EMSEG:[CURerr],Underflow + add ecx,UnderBias shl 16 + jmp EMSEG:[RoundMode] ;Try again with revised exponent + +;*********** +Round64near: +;*********** +;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7 + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm64 + or eax,eax ;Exact result? + jz short SaveValidResult + or EMSEG:[CURerr],Precision ;Set flag on inexact result + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. This rounding rule is implemented by adding RoundBit-1 +;(7F..FFH), setting CY if round up. + + bt esi,0 ;Is mantissa even or odd? (set CY) + adc eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up + jnc SaveValidResult +RoundUp64: + mov EMSEG:[SWcc],RoundUp + add esi,1 + adc ebx,0 + jc BumpExponent ;Overflowed, increment exponent + +SaveValidResult: ;A jump to here requires 9 clocks + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + cmp ecx,TexpMax shl 16 ;Test for overflow + jge ResultOverflow + +SaveResult: ;A jump to here requires 10 clocks +;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl + mov edi,EMSEG:[Result] +SaveResultEdi: + mov EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].lManHi,ebx +SaveExpSgn: + mov EMSEG:[edi].ExpSgn,ecx + ret + +;*********** +Round64up: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm64 + or eax,eax ;Exact result? + jz short SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result +;Chop if negative, increase mantissa if positive + cmp ch,bSign ;No CY iff sign bit is set + jc RoundUp64 ;Round up if positive + jmp short SaveValidResult + +;*********** +Round64chop: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm64 + or eax,eax ;Exact result? + jz short SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result + jmp short SaveValidResult + +;******************************************************************************* +; +;53-bit rounding routines +; + +;*********** +Round53down: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm53 + mov edx,esi ;Get low bits + and edx,(1 shl 11) - 1 ;Mask to last 11 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result +;Chop if positive, increase mantissa if negative + and esi,not ((1 shl 11)-1) ;Mask off low 11 bits + test ch,bSign + jz SaveValidResult ;Positive, go chop + jmp RoundUp53 + +RndDenorm53: + test EMSEG:[CWmask],Underflow;Is exception unmasked? + jz RndSetUnder +;We don't really store in denormalized format, but we need the number +;to be rounded as if we do. If the exponent were -IexpBias, we would +;lose 1 bit of precision; as it gets more negative, we lose more bits. +;We'll do this by adjusting the exponent so that the bits we want to +;keep look like integer bits, and performing round-to-integer. + add ecx,(IexpBias+51) shl 16 ;Adjust exponent so we're integer + call RoundToBit +;Set underflow exception if precision exception is set + mov al,EMSEG:[CURerr] + and al,Precision + ror al,Precision-Underflow ;Move Precision bit to Underflow pos. + or EMSEG:[CURerr],al ;Signal Underflow if inexact + cmp cl,bTAG_ZERO + jz SaveResult + sub ecx,(IexpBias+51) shl 16;Restore unbiased exponent + cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Did we round out of denorm? + jae SaveResult + mov cl,bTAG_DEN + jmp SaveResult + +;*********** +Round53near: +;*********** +;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7 + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm53 + mov edx,esi ;Get low bits + and edx,(1 shl 11) - 1 ;Mask to last 11 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. + + mov edx,esi + and esi,not ((1 shl 11)-1) ;Mask off low 11 bits + test edx,1 shl 10 ;Is round bit set? + jz SaveValidResult + and edx,(3 shl 10)-1 ;Keep only sticky bits and LSB + or eax,edx ;Combine with other sticky bits + jz SaveValidResult +RoundUp53: + mov EMSEG:[SWcc],RoundUp + add esi,1 shl 11 ;Round + adc ebx,0 + jnc SaveValidResult +BumpExponent: + add ecx,1 shl 16 ;Mantissa overflowed, bump exponent + or ebx,1 shl 31 ;Set MSB + jmp SaveValidResult + +;*********** +Round53up: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm53 + mov edx,esi ;Get low bits + and edx,(1 shl 11) - 1 ;Mask to last 11 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result +;Chop if negative, increase mantissa if positive + and esi,not ((1 shl 11)-1) ;Mask off low 11 bits + test ch,bSign + jz RoundUp53 ;Round up if positive + jmp SaveValidResult + +;*********** +Round53chop: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm53 + mov edx,esi ;Get low bits + and edx,(1 shl 11) - 1 ;Mask to last 11 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result + and esi,not ((1 shl 11)-1) ;Mask off low 11 bits + jmp SaveValidResult + +;******************************************************************************* +; +;24-bit rounding routines +; + +;*********** +Round24down: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm24 + or eax,esi ;Low dword is just sticky bits + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result +;Chop if positive, increase mantissa if negative + xor esi,esi + and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits + test ch,bSign + jz SaveValidResult ;Chop if positive + jmp RoundUp24 + +RndDenorm24: + test EMSEG:[CWmask],Underflow;Is exception unmasked? + jz RndSetUnder +;We don't really store in denormalized format, but we need the number +;to be rounded as if we do. If the exponent were -IexpBias, we would +;lose 1 bit of precision; as it gets more negative, we lose more bits. +;We'll do this by adjusting the exponent so that the bits we want to +;keep look like integer bits, and performing round-to-integer. + add ecx,(IexpBias+22) shl 16 ;Adjust exponent so we're integer + call RoundToBit +;Set underflow exception if precision exception is set + mov al,EMSEG:[CURerr] + and al,Precision + ror al,Precision-Underflow ;Move Precision bit to Underflow pos. + or EMSEG:[CURerr],al ;Signal Underflow if inexact + cmp cl,bTAG_ZERO + jz SaveResult + sub ecx,(IexpBias+22) shl 16;Restore unbiased exponent + cmp ecx,(IexpMin-IexpBias+1) shl 16 ;Did we round out of denorm? + jae SaveResult + mov cl,bTAG_DEN + jmp SaveResult + +;*********** +Round24near: +;*********** +;mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7 + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm24 + or eax,esi ;Low dword is just sticky bits + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result + xor esi,esi + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. + + mov edx,ebx + and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits + test dl,1 shl 7 ;Round bit set? + jz SaveValidResult + and edx,(3 shl 7)-1 ;Mask to LSB and sticky bits + or eax,edx ;Combine all sticky bits + jz SaveValidResult +RoundUp24: + mov EMSEG:[SWcc],RoundUp + add ebx,1 shl 8 + jnc SaveValidResult + jmp BumpExponent ;Overflowed, increment exponent + +;*********** +Round24up: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm24 + or eax,esi ;Low dword is just sticky bits + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result +;Chop if negative, increase mantissa if positive + xor esi,esi + and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits + test ch,bSign + jz RoundUp24 ;Round up if positive + jmp SaveValidResult + +;*********** +Round24chop: +;*********** + cmp ecx,TexpMin shl 16 ;Test for Underflow + jl RndDenorm24 + or eax,esi ;Low dword is just sticky bits + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,eax ;Throwing away any bits? + jz SaveValidResult + or EMSEG:[CURerr],Precision;Set flag on inexact result + xor esi,esi + and ebx,not ((1 shl 8)-1) ;Mask off low 8 bits + jmp SaveValidResult + +;******************************************************************************* + +;*** RoundToInteger +; +;This routine is used by FISTP Int64 and BSTP. Unlike RoundToBit, this +;unnormalizes the number into a 64-bit integer. +; +;Inputs: +; edi = pointer to number to round in stack +;Outputs: +; CY set if invalid operation +; ebx:edi = rounded integer if CY clear +; ch = sign if CY clear +;Note: +; FIST/FISTP/BSTP exception rules are used: If the number is too big, +; Invalid Operation occurs. Denormals are ignored. +; +;esi preserved + +RoundSpcl64Int: + cmp cl,bTAG_DEN + jz NormRound64Int ;Ignore denormal + cmp cl,bTAG_EMPTY + jnz RoundInvalid ;All other specials are invalid + mov EMSEG:[CURerr],StackFlag+Invalid + stc ;Flag exception to caller + ret + +RoundInvalid: +;Overflow on integer store is invalid according to IEEE + mov EMSEG:[CURerr],Invalid + stc ;Flag exception to caller + ret + +RoundToInteger: + mov ebx,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl + mov al,ch ;Save sign bit + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz RoundIntX ;Just return zero + ja RoundSpcl64Int +NormRound64Int: + xor edx,edx + sar ecx,16 ;Bring exponent down + cmp ecx,-1 ;Is it less than 1? + jle Under64Int + cmp ecx,63 + jg RoundInvalid + sub ecx,63 + neg ecx ;cl = amount to shift right + mov ch,al ;Get sign out of al + xor eax,eax + cmp cl,32 ;Too big for one shift? + jl ShortShft64 +;32-bit shift right + xchg edx,edi + xchg ebx,edi ;ebx=0 now + shrd eax,edx,cl +;Max total shift is 63 bits, so we know that the LSB of eax is still zero. +;We can rotate this zero to the MSB so the sticky bits in eax can be combined +;with those in edx without affecting the rounding bit in the MSB of edx. + ror eax,1 ;MSB is now zero +ShortShft64: +;Shift count in cl is modulo-32 + shrd edx,edi,cl + shrd edi,ebx,cl + shr ebx,cl + or edx,eax ;Collapse sticky bits into one dword + jz RoundIntX ;No sticky or round bits, so don't round +;Result will not be exact--check rounding mode +Round64Int: + mov EMSEG:[CURerr],Precision;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearest64Int ;Not just round-to-nearest + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. + + bt edi,0 ;Look at LSB (for round even) + adc edx,(1 shl 31)-1 ;CY set if round up + jnc RoundIntX + mov EMSEG:[SWcc],RoundUp + add edi,1 ;Round + adc ebx,0 + jc RoundInvalid +RoundIntX: + ret ;CY clear, no Invalid exception + +Shift64Round: + or edi,edi + setnz dl ;Set sticky bit in edx + xor edi,edi ;Mantissa is all zero + jmp Round64Int + +Under64Int: +;ZF set if exponent is -1 + xchg ebx,edx ;64-bit right shift + mov ch,al ;Restore sign to ch + jz Shift64Round ;Exp. is -1, could need to round up + xor edi,edi ;Mantissa is all zero + mov EMSEG:[CURerr],Precision;Set flag on inexact result +NotNearest64Int: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov al,EMSEG:[CWcntl] ;Get rounding control +.erre (not RCup and RoundControl) eq RCdown + sar ch,7 ;Expand sign through whole byte + xor al,ch ;Flip round mode if - + and al,RoundControl + cmp al,RCup ;Rounding up? + jnz RoundIntOk ;No, chop it + mov EMSEG:[SWcc],RoundUp + add edi,1 + adc ebx,0 + jc RoundInvalid +RoundIntOk: + clc + ret + +;******************************************************************************* + +;*** RoundToBit +; +;This is a relatively low performance routine used by FRNDINT and to +;generate internal-format denormals. It can round to any bit position. +; +;Inputs: +; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7 +;Purpose: +; Round number to integer. Zero exponent means number is in the +; range [1,2), so only the MSB will survive (MSB-1 is round bit). +; Larger exponents keep more bits; 63 would mean no rounding. +;Outputs: +; mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl +; +;Does NOT detect overflow. + +NoSigBits: +;Exponent was negative: no integer part + and ecx,bSign shl 8 ;Zero exponent, preserve sign + mov cl,bTAG_ZERO + or EMSEG:[CURerr],Precision;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearNoSig ;Not just round-to-nearest + cmp edx,-1 ;Exponent of -1 ==> range [.5,1) + je HalfBitRound +RndIntToZero: + xor ebx,ebx + mov esi,ebx ;Just return zero + ret + +NotNearNoSig: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov al,EMSEG:[CWcntl] ;Get rounding control + sar ch,7 ;Expand sign through whole byte + xor al,ch ;Flip rounding bits if negative + and al,RoundControl + cmp al,RCup ;Rounding up? + jnz RndIntToZero ;No, chop it +RndIntToOne: + mov ebx,1 shl 31 + xor esi,esi + mov cl,bTAG_SNGL + mov EMSEG:[SWcc],RoundUp + ret + +HalfBitRound: + add ebx,ebx ;Shift off MSB (round bit) + or ebx,esi + or ebx,eax + jnz RndIntToOne + ret ;Return zero + +;********** +RoundToBit: +;********** + mov edx,ecx ;Make copy of exponent + sar edx,16 ;Bring rounding exponent down + jl NoSigBits + mov cl,dl + cmp cl,32 ;Rounding in low word? + jae RoundLow +;When cl = 31, the RoundBit is in the low half while the LSB is in the +;high half. We must preserve the RoundBit when we move it to eax. + xchg eax,esi ;Low half becomes sticky bits + or ah,al ;Preserve lowest bits in ah + add esi,-1 ;Set CY if any original sticky bits + sbb al,al ;Put original sticky bits in al + mov esi,ebx + xor ebx,ebx ;Shift mantissa right 32 bits +RoundLow: + mov edx,(1 shl 31) - 1 + shr edx,cl ;Make mask +;Note in the case of cl = 31, edx is now zero. + mov edi,esi + and edi,edx + or edi,eax ;Any bits being lost? + jz RndSetTag ;All done + inc edx ;Mask for LSB + or EMSEG:[CURerr],Precision;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearLow ;Not just round-to-nearest + mov edi,edx ;Save LSB mask + shr edi,1 ;Mask for round bit + jc SplitRound ;Round bit in eax? + test esi,edi ;Round bit set? + jz MaskOffLow + dec edi ;Mask for sticky bits + or edi,edx ;Sticky bits + LSB + and edi,esi + or edi,eax ;Any sticky bits set? + jz MaskOffLow +RoundUpThenMask: + mov EMSEG:[SWcc],RoundUp + add esi,edx ;Round up + adc ebx,0 + jc RoundBumpExp +MaskOffLow: + dec edx ;Mask for round & sticky bits + not edx + and esi,edx ;Zero out low bits +RndSetTag: + or ebx,ebx ;Is it normalized? + jns RoundedHighHalf + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + ret + +SplitRound: +;Rounding high half in esi on rounding bit in eax + bt esi,0 ;Look at LSB + adc eax,(1 shl 31) - 1 ;Set CY if round up + jc RoundUpThenMask + or ebx,ebx ;Will set ZF for jnz below +RoundedHighHalf: +;Rounding occured in high half, which had been moved low. +;Move it back up high. +; +;ZF set here on content of ebx. If not zero, rounding high half in esi +;rippled forward into zero in ebx. + mov cl,bTAG_SNGL + jnz RndIntNorm ;Present high half should be zero + xchg ebx,esi ;Shift left 32 bits + ret + +RndIntNorm: +;Rounded up high half of mantissa, which rolled over to 0. + add ecx,1 shl 16 ;Increase exponent + mov ebx,1 shl 31 ;Restore MSB + ret ;Tag already set to SNGL + +RoundBumpExp: +;Mantissa was FFFFF... and rolled over to 0 when we rounded + add ecx,1 shl 16 ;Increase exponent + mov ebx,1 shl 31 ;Restore MSB + jmp MaskOffLow + +NotNearLow: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov al,EMSEG:[CWcntl] ;Get rounding control + sar ch,7 ;Expand sign through whole byte +.erre (not RCup and RoundControl) eq RCdown + xor al,ch ;Flip rounding bits if negative + and al,RoundControl + cmp al,RCup ;Rounding up? + jz RoundUpThenMask ;yes + jmp MaskOffLow ;No, chop it diff --git a/private/ntos/dll/i386/emsincos.asm b/private/ntos/dll/i386/emsincos.asm new file mode 100644 index 000000000..8cd3bc0ac --- /dev/null +++ b/private/ntos/dll/i386/emsincos.asm @@ -0,0 +1,571 @@ +; SCCSID = @(#)emsincos.asm 13.5 90/03/27 + page ,132 + subttl emsincos - fsin, fcos and fsincos +;*** +;emulator.asm - 80387 emulator +; +; IBM/Microsoft Confidential +; +; Copyright (c) IBM Corporation 1987, 1989 +; Copyright (c) Microsoft Corporation 1987, 1989 +; +; All Rights Reserved +; +;Purpose: +; Code for fsin, fcos and fsincos +; +;Revision History: +; See emulator.hst +; +;******************************************************************************* + +lab eFsincosStackOver + or SEL[CURerr], StackFlag or Invalid + test SEL[CWmask], Invalid + JSZ eFsincosStackOverRet + + mov SEL[rsi.lMan0], 0 ; st(0) = Ind + mov SEL[rsi.lMan1], 0c0000000h + mov SEL[rsi.wExp], 7fffh - IexpBias + mov SEL[rsi.bTag], bTAG_NAN + mov SEL[rsi.bFlags], bSign + + mov SEL[rdi.lMan0], 0 ; st(-1) = Ind + mov SEL[rdi.lMan1], 0c0000000h + mov SEL[rdi.wExp], 7fffh - IexpBias + mov SEL[rdi.bTag], bTAG_NAN + mov SEL[rdi.bFlags], bSign + + mov SEL[CURstk], rdi ; push stack +lab eFsincosStackOverRet + ret + + +lab eFSINCOS + mov esi, SEL[CURStk] ; esi = st(0) + mov edi, esi + PrevStackElem edi ; edi = st(-1) + + cmp SEL[edi.bTag], bTAG_EMPTY + JSNE eFsincosStackOver + + cmp SEL[esi.bTag], bTAG_NAN + JSNE eFsincosNotSNaN + + test SEL[esi.bMan7], 40h + JSNZ eFsincosNotSNaN + + test SEL[CWmask], Invalid + JSNZ eFsincosNotSNaN + + or SEL[CURerr], Invalid + ret + +lab eFsincosNotSNaN +ifdef NT386 + push eax + mov eax, dword ptr SEL[rsi] + mov dword ptr SEL[rdi], eax + mov eax, dword ptr SEL[rsi+4] + mov dword ptr SEL[rdi+4], eax + mov eax, dword ptr SEL[rsi+8] + mov dword ptr SEL[rdi+8], eax + add rsi, Reg87Len + add rdi, Reg87Len + pop eax +else + push ds ; Copy current stack into st(-1) + pop es + movsd + movsd + movsd +endif + + call eFSIN + PUSHST + call eFCOS + + ret + + +lab eFcosSpecial + mov esp, ebp + pop ebp + + mov SEL[RESULT], esi + + mov al, SEL[esi.bTag] + cmp al, bTAG_ZERO + JSNE eFcosInf + +lab eFcosRetOne + mov SEL[esi.lMan0], 0 + mov SEL[esi.lMan1], 080000000h + mov SEL[esi.wExp], 3fffh - IexpBias + mov SEL[esi.bFlags], 0 + mov SEL[esi.bTag], bTAG_VALID + ret + +lab eFcosInf + cmp al, bTAG_INF + JE RetIndInv + +lab eFcosNaN + jmp OneArgOpNaNRet + + +cProc eFCOS,<PLM,PUBLIC>,<> + + localT temp + localB SignFlag + +cBegin + mov esi, SEL[CURstk] + + cmp SEL[esi.bTag], bTAG_VALID + jne eFcosSpecial + + or SEL[CURerr], Precision + + and SEL[esi].bFlags, not bSign ; st(0) = fabs( st(0) ); + + call SinCosReduce ; Set ah to condition code. + + add SEL[esi].wExp, IExpBias + + push SEL[esi].wExp + push SEL[esi].lMan1 + push SEL[esi].lMan0 + lea ecx, [temp] + push ecx + + mov bl, ah ; if octant 2, 3, 4, or 5 then final + and bl, bOCT2 or bOCT4 ; result must be negative + mov [SignFlag], bl + + test ah, bOCT1 or bOCT2 ; if octant is 1, 2, 5, 6 then must + jpo CosCallSin ; do sin() + + call __FASTLDCOS + jmp short CosCopyRes + +CosCallSin: + call __FASTLDSIN + +CosCopyRes: + mov eax, dword ptr [temp] + mov SEL[esi].lMan0, eax + mov eax, dword ptr [temp+4] + mov SEL[esi].lMan1, eax + + mov ax, word ptr [temp+8] + sub ax, IExpBias + mov SEL[esi].wExp, ax + + cmp [SignFlag], 0 + jpe CosDone + + or SEL[esi].bFlags, bSign ; Make result negative. +CosDone: + +cEnd + + + + + +lab eFsinSpecial + mov esp, ebp + pop ebp + + mov al, SEL[esi.bTag] + cmp al, bTAG_ZERO + JSNE eFsinInf + +lab eFsinZero + ret + +lab eFsinInf + cmp al, bTAG_INF + JE RetIndInv + +lab eFsinNaN + jmp OneArgOpNaNRet + + +cProc eFSIN,<PLM,PUBLIC>,<> + + localT temp + localB SignFlag + +cBegin + mov esi, SEL[CURstk] + + cmp SEL[esi.bTag], bTAG_VALID + jne eFsinSpecial + + or SEL[CURerr], Precision + + mov al, SEL[esi].bFlags + and SEL[esi].bFlags, not bSign + + shl al, 1 ; shift sign into carry. + sbb cl, cl ; set cl to -1 if argument is negative. + + push ecx + call SinCosReduce ; Set ah to condition code. + pop ecx + + cmp SEL[esi].bTag, bTAG_ZERO + je SinDone + + add SEL[esi].wExp, IExpBias + + push SEL[esi].wExp + push SEL[esi].lMan1 + push SEL[esi].lMan0 + lea ebx, [temp] + push ebx + + mov bl, ah ; if octant 4, 5, 6 or 7 then final + and bl, bOCT4 ; result must be negative + + neg cl ; set cl to odd parity if arg was < 0.0 + xor bl, cl ; set bl to odd parity if result must be negative + + mov [SignFlag], bl + + test ah, bOCT1 or bOCT2 ; if octant is 1, 2, 5, 6 then must + jpo SinCallCos ; do cos() + + call __FASTLDSIN + jmp short SinCopyResult + +SinCallCos: + call __FASTLDCOS + +SinCopyResult: + mov eax, dword ptr [temp] + mov SEL[esi].lMan0, eax + mov eax, dword ptr [temp+4] + mov SEL[esi].lMan1, eax + + mov ax, word ptr [temp+8] + sub ax, IExpBias + mov SEL[esi].wExp, ax + + cmp [SignFlag], 0 + jpe SinDone + + or SEL[esi].bFlags, bSign ; Make result negative. +SinDone: + +cEnd + + + +lab SinCosReduce + mov SEL[TEMP1.bFlags], 0 ; TEMP1 = pi/4 + mov SEL[TEMP1.bTag], bTAG_VALID + mov SEL[TEMP1.wExp], 3ffeh-IExpBias + mov SEL[TEMP1.wMan3], 0c90fh + mov SEL[TEMP1.wMan2], 0daa2h + mov SEL[TEMP1.wMan1], 2168h + mov SEL[TEMP1.wMan0], 0c235h + +ifdef NT386 + mov edi, TEMP1 +else + mov edi, edataOFFSET TEMP1 +endif + + push esi + call InternFPREM ; rsi = st(0), rdi = st(0) + pop esi + + mov ah, SEL[SWcc] + + test ah, bOCT1 ; check for even octant + jz EvenOct ; yes + + add SEL[esi.wExp], IExpBias ; convert to true long double + + push ds + push esi + push cs + push ecodeOFFSET PIBY4 + push ds + push esi + push -1 + call __FASTLDADD ; st(0) = pi/4 - st(0) + mov ah, SEL[SWcc] + + sub SEL[esi.wExp], IExpBias ; convert to squirly emulator long double + +EvenOct: + retn + + + +labelW PIBY4 + dw 0c235h, 02168h, 0daa2h, 0c90fh, 3ffeh + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; BUGBUG tedm: NT masm can't handle floating-point constants ; +; because strtod and _strtold C-runtimes aren't ; +; there. So the constants below must be pre- ; +; assembled and defined as a byte stream. ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +ifdef NOTDEF + +staticT FourByPI, +0.1273239544735162686151e+01 + +staticT SinP0, +0.7853981633974483096141845e+00 +staticT SinP1, -0.8074551218828078152025820e-01 +staticT SinP2, +0.2490394570192716275251900e-02 +staticT SinP3, -0.3657620418214640005290000e-04 +staticT SinP4, +0.3133616889173253480000000e-06 +staticT SinP5, -0.1757247417617080600000000e-08 +staticT SinP6, +0.6948152035052200000000000e-11 +staticT SinP7, -0.2022531292930000000000000e-13 + +staticT CosP0, +0.99999999999999999996415e+00 +staticT CosP1, -0.30842513753404245242414e+00 +staticT CosP2, +0.15854344243815410897540e-01 +staticT CosP3, -0.32599188692668755044000e-03 +staticT CosP4, +0.35908604458858195300000e-05 +staticT CosP5, -0.24611363826370050000000e-07 +staticT CosP6, +0.11500497024263000000000e-09 +staticT CosP7, -0.38577620372000000000000e-12 + +else + +staticB FourByPI, <02Ah,015h,044h,04Eh,06Eh,083h,0F9h,0A2h,0FFh,03Fh> + +staticB SinP0 , <035h,0C2h,068h,021h,0A2h,0DAh,00Fh,0C9h,0FEh,03Fh> +staticB SinP1 , <0DAh,095h,0F2h,02Dh,031h,0E7h,05Dh,0A5h,0FBh,0BFh> +staticB SinP2 , <0E9h,0C6h,056h,0ADh,03Bh,0E3h,035h,0A3h,0F6h,03Fh> +staticB SinP3 , <0D5h,0E7h,05Dh,015h,073h,066h,069h,099h,0F0h,0BFh> +staticB SinP4 , <0BCh,032h,069h,0E1h,042h,01Ah,03Ch,0A8h,0E9h,03Fh> +staticB SinP5 , <021h,077h,004h,05Fh,0A1h,0A5h,083h,0F1h,0E1h,0BFh> +staticB SinP6 , <0FCh,01Ah,0D1h,006h,0CCh,063h,077h,0F4h,0D9h,03Fh> +staticB SinP7 , <04Ah,003h,086h,040h,07Ch,065h,02Ch,0B6h,0D1h,0BFh> + +staticB CosP0 , <0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FFh,0FEh,03Fh> +staticB CosP1 , <02Fh,0F2h,02Eh,0F2h,04Dh,0E6h,0E9h,09Dh,0FDh,0BFh> +staticB CosP2 , <02Fh,04Eh,0D5h,0DAh,040h,0F8h,0E0h,081h,0F9h,03Fh> +staticB CosP3 , <09Dh,0DEh,06Ah,0E4h,0F1h,0E3h,0E9h,0AAh,0F3h,0BFh> +staticB CosP4 , <031h,01Eh,0F9h,081h,041h,083h,0FAh,0F0h,0ECh,03Fh> +staticB CosP5 , <076h,0B1h,000h,0A4h,01Eh,0F6h,068h,0D3h,0E5h,0BFh> +staticB CosP6 , <0D8h,005h,06Fh,08Ah,0EAh,00Ah,0E6h,0FCh,0DDh,03Fh> +staticB CosP7 , <003h,0D5h,00Ah,0ACh,0CCh,035h,02Ch,0D9h,0D5h,0BFh> + +endif + +cProc __FASTLDSIN,<PLM,PUBLIC>,<isi,idi> + + parmT x + parmI RetOff + + localT x2 + localT poly + localI count + +cBegin + + lea isi, [x] ; x = x * (4/PI) + push ss + push isi + + push ss + push isi + + mov iax, codeOFFSET FourByPI + push cs + push iax + + call __FASTLDMULT + + + lea idi, [x2] ; x2 = x * x + push ss + push idi + + push ss + push isi + + push ss + push isi + + call __FASTLDMULT + +if 0 + push ss + pop es + lea idi, [poly] + mov isi, codeOFFSET SinP7 + movsw + movsw + movsw + movsw + movsw +endif + mov eax, dword ptr [SinP7] ; poly = SinP7 + mov dword ptr [poly], eax + mov eax, dword ptr [SinP7+4] + mov dword ptr [poly+4], eax + mov ax, word ptr [SinP7+8] + mov word ptr [poly+8], ax + + lea isi, [poly] + mov idi, codeOFFSET SinP6 + + mov [count], 7 + +SinPolyLoop: + push ss + push isi ; poly = poly * x2 + + push ss + push isi + + lea iax, [x2] + push ss + push iax + + call __FASTLDMULT + + + push ss + push isi ; poly = poly + SinP[n] + + push ss + push isi + + push cs + push idi + + xor iax, iax + push iax + call __FASTLDADD + + sub idi, 10 + + dec [count] + jnz SinPolyLoop + + push ss + push [RetOff] ; return x * poly + + lea iax, [x] + push ss + push iax + + push ss + push isi + + call __FASTLDMULT + + mov iax, [RetOff] + mov idx, ss +cEnd + + + + +cProc __FASTLDCOS,<PLM,PUBLIC>,<isi,idi> + + parmT x + parmI RetOff + + localT x2 + localI count + +cBegin + + lea isi, [x] ; x = x * (4/PI) + push ss + push isi + + push ss + push isi + + mov iax, codeOFFSET FourByPI + push cs + push iax + + call __FASTLDMULT + + + lea idi, [x2] ; x2 = x * x + push ss + push idi + + push ss + push isi + + push ss + push isi + + call __FASTLDMULT + +if 0 + push ss ; (return) = CosP7 + pop es + mov idi, [RetOff] + mov isi, codeOFFSET CosP7 + movsw + movsw + movsw + movsw + movsw +endif + mov isi, [RetOff] + mov eax, dword ptr [CosP7] + mov dword ptr ss:[isi], eax + mov eax, dword ptr [CosP7+4] + mov dword ptr ss:[isi+4], eax + mov ax, word ptr [CosP7+8] + mov word ptr ss:[isi+8], ax + + mov idi, codeOFFSET CosP6 + + mov [count], 7 + +CosPolyLoop: + push ss + push isi ; (return) = (return) * x2 + + push ss + push isi + + lea iax, [x2] + push ss + push iax + + call __FASTLDMULT + + + push ss + push isi ; (return) = (return) + SinP[n] + + push ss + push isi + + push cs + push idi + + xor iax, iax + push iax + + call __FASTLDADD + + + sub idi, 10 + + dec [count] + jnz CosPolyLoop + + mov iax, isi + mov idx, ss +cEnd diff --git a/private/ntos/dll/i386/emstack.inc b/private/ntos/dll/i386/emstack.inc new file mode 100644 index 000000000..a60c03aff --- /dev/null +++ b/private/ntos/dll/i386/emstack.inc @@ -0,0 +1,72 @@ + subttl emstack.asm - Emulator Stack Management Macros + page +;*** +;emstack.asm - Emulator Stack Management Area +; +; Microsoft Confidential +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; Handles emulator stack. +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;POPSTret: pops the stack and returns. Uses esi. + +POPSTret macro reg + local stackwrap +IFB <reg> + mov esi,EMSEG:[CURstk] +_popreg equ esi +ELSE +_popreg equ reg +ENDIF + mov EMSEG:[_popreg].bTag,bTAG_EMPTY + NextStackElem _popreg,stackwrap + mov EMSEG:[CURstk],_popreg + ret + +Wrap&stackwrap: + mov EMSEG:[CURstk],BEGstk + ret + endm + +;NextStackElem: Given pST(0) = [CURstk] in reg, returns pST(1) +;Requires NextStackWrap macro with same arguments + +NextStackElem macro reg,stackwrap + cmp reg,INITstk ;JWM + jae Wrap&stackwrap + add reg,Reg87Len +Cont&stackwrap: + endm + +NextStackWrap macro reg,stackwrap +Wrap&stackwrap: + mov reg,BEGstk ;JWM + jmp Cont&stackwrap + endm + + +;PrevStackElem: Given pST(0) = [CURstk] in reg, returns new pST(0) +;after a push onto on the stack. +;Requires PrevStackWrap macro with same arguments + +PrevStackElem macro reg,stackwrap + cmp reg,BEGstk ;JWM + jbe Wrap&stackwrap + sub reg,Reg87Len +Cont&stackwrap: + endm + +PrevStackWrap macro reg,stackwrap +Wrap&stackwrap: + mov reg,INITstk ;JWM + jmp Cont&stackwrap + endm diff --git a/private/ntos/dll/i386/emstore.asm b/private/ntos/dll/i386/emstore.asm new file mode 100644 index 000000000..aadeb6520 --- /dev/null +++ b/private/ntos/dll/i386/emstore.asm @@ -0,0 +1,803 @@ + subttl emstore.asm - FST, FSTP, FIST, FISTP instructions + page +;******************************************************************************* +;emstore.asm - FST, FSTP, FIST, FISTP instructions +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Purpose: +; FST, FSTP, FIST, FISTP instructions +;Inputs: +; edi = [CURstk] +; dseg:esi = pointer to memory destination +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;****** +EM_ENTRY eFSTP +eFSTP: +;****** +; edi = [CURstk] +; esi = pointer to st(i) from instruction field + + cmp EMSEG:[edi].bTag,bTAG_EMPTY + jz short efstp_StackError +;UNDONE: temporary hack to preserve condition codes + mov ax,[esp+4].OldStatus + mov EMSEG:[StatusWord],ax +;UNDONE: end of hack + +;A common use of this instruction is FSTP st(0) just to pop the stack. +;We check for this case and optimize it. + cmp esi,edi + jz short JustPop +;Copy the register + mov eax,EMSEG:[edi].ExpSgn + mov EMSEG:[esi].ExpSgn,eax + mov eax,EMSEG:[edi].lManHi + mov EMSEG:[esi].lManHi,eax + mov eax,EMSEG:[edi].lManLo + mov EMSEG:[esi].lManLo,eax +JustPop: + POPSTret edi + +efstp_StackError: + mov EMSEG:[CURerr],Invalid+StackFlag + ret + + +;****** +EM_ENTRY eFST +eFST: +;****** +; edi = [CURstk] +; esi = pointer to st(i) from instruction field + + cmp EMSEG:[edi].bTag,bTAG_EMPTY + jz StackError ;In emarith.asm +;Copy the register + mov eax,EMSEG:[edi].ExpSgn + mov EMSEG:[esi].ExpSgn,eax + mov eax,EMSEG:[edi].lManHi + mov EMSEG:[esi].lManHi,eax + mov eax,EMSEG:[edi].lManLo + mov EMSEG:[esi].lManLo,eax +DontPop: + ret + + +;Come here if the instruction wants to pop the stack + +PopStackChk: + jc DontPop ;Get unmasked error? +PopStack: + mov edi,EMSEG:[CURstk] + POPSTret edi + + +StoreSpcl64: + cmp cl,bTAG_DEN + jz Denorm64 +.erre bTAG_NAN lt bTAG_EMPTY +.erre bTAG_NAN gt bTAG_INF + cmp cl,bTAG_NAN + mov ecx,DexpMax shl 16 ;Insert special exponent for NAN/Inf. + jb StoreIEEE64 ;Go handle infinity + ja Empty64 +;Have a NAN. + test ebx,1 shl 30 ;Check for SNAN + jnz StoreIEEE64 ;Go store QNAN + or ebx,1 shl 30 ;Make SNAN into a QNAN + mov EMSEG:[CURerr],Invalid ;Flag the exception + test EMSEG:[CWmask],Invalid ;Is it masked? + jnz StoreIEEE64 ;If so, update with masked response + stc ;Don't pop stack + ret + +Empty64: +;It's empty--signal invalid operation + mov EMSEG:[CURerr],StackFlag+Invalid + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing64 ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov dword ptr dseg:[esi],0 + mov dword ptr dseg:[esi+4],0FFF80000H ;64-bit IEEE indefinite + ret ;CY clear + +Denorm64: + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is it masked? + jnz NormStore64 ;If so, ignore denormalization +DoNothing64: + stc ;Don't pop stack + ret + +;***************** +;Store Double Real +;***************** + +EM_ENTRY eFSTP64 +eFSTP64: + push offset PopStackChk ;Return here after store + +EM_ENTRY eFST64 +eFST64: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ebx,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl +;memory destination is dseg:esi + mov al,ch ;Save sign bit + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz short SignAndStore64 ;Just set sign and exit + ja StoreSpcl64 +NormStore64: +;Note that we could have a denormal exception at this point. +;Thus any additional exceptions must OR into [CURerr], not MOV. + xor cx,cx + add ecx,(DexpBias-TexpBias) shl 16 ;Correct bias + jl short Under64 + cmp ecx,DexpMax shl 16 ;Exponent too big? + jge Over64 + test edi,(1 shl 11) - 1 ;Any bits to round? + jz short StoreIEEE64 + +Round64: + or EMSEG:[CURerr],Precision ;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearest64 ;Not just round-to-nearest + test edi,1 shl 10 ;Check rounding bit + jz short StoreIEEE64 ;If zero, don't round up + test edi,(3 shl 10)-1 ;Test LSB and sticky bits + jnz RoundUp64b + +StoreIEEE64: + or ecx, ecx ;now that value is rounded, + je short Under64 ;check exponent for underflow + +StoreIEEE64Continue: + and ebx,not (1 shl 31) ;Clear MSB--it's implied in IEEE64 + shrd edi,ebx,11 + shr ebx,11 ;Move mantissa down + shl ecx,4 ;Exponent up to position + or ebx,ecx ;Combine exponent +SignAndStore64: + and al,bSign ;Just sign bit + shl eax,24 ;Sign to MSB + or ebx,eax ;Combine sign + mov dseg:[esi],edi + mov dseg:[esi+4],ebx +;CY clear indicate no error + ret + +SetUnderflow: + or EMSEG:[CURerr],Underflow ;Unmasked underflow--do nothing +DoNothing: + stc ;Indicate nothing was done + ret + +Under64: + dec cl ; Is cx == 1? + jz short StoreIEEE64Continue ; Yes, we've alread been here + + test EMSEG:[CWmask],Underflow ;Is underflow masked? + jz SetUnderflow ;No, do nothing more +;Produce masked underflow response +;Note that the underflow exception does not occur if the number can be +;represented exactly as a denormal. + + sar ecx,16 ;Bring exponent down + cmp ecx,DexpMin-52 ;Allow for shift down to rounding bit + jl BigUnder64 ;Too small, just make it zero +.erre DexpMin eq 0 + neg ecx ;Use as shift count + inc ecx ;Shift by at least one + xor edx,edx ;Place for sticky bits + cmp cl,32 ;Long shift? + jb ShortDenorm + neg edi ;CY set if non-zero + sbb edx,edx ;-1 if bits shifted off, else zero + mov edi,ebx + xor ebx,ebx ;32-bit right shift +ShortDenorm: +;Shift count is modulo-32 + shrd edx,edi,cl + shrd edi,ebx,cl + shr ebx,cl + cmp edx,1 ;CY set if zero, else clear + sbb edx,edx ;Zero if bits shifted off, else -1 + inc edx ;1 if bits shifted off, else zero + or edi,edx ;Collapse sticky bits into edi + + mov ecx, 1 ;Biased exponent is zero, put 1 into CL (noticed by Under64) + test edi,(1 shl 11) - 1 ;Any bits to round? + jz StoreIEEE64 ;If not, no exception + or EMSEG:[CURerr],Underflow + jmp Round64 + +Over64: + test EMSEG:[CWmask],Overflow ;Is overflow masked? + jz SetOverflow ;No, do nothing more +;Produce masked overflow response + or EMSEG:[CURerr],Overflow+Precision + mov ebx,DexpMax shl 20 + xor edi,edi ;ebx:edi = positive infinity + mov ah,EMSEG:[CWcntl] ;Get rounding control +;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and + +;i.e., RCup & sign OR RCdown & not sign +.erre RCchop eq RCup + RCdown ;Always return max value +.erre RCnear eq 0 ;Never return max value + sar al,7 ;Expand sign through whole byte +.erre (RCdown and bSign) eq 0 ;Don't want to change real sign + xor al,RCdown ;Flip sign for RCdown bit + and ah,al ;RCup & sign OR RCdown & not sign + test ah,RoundControl ;Look only at RC bits + jz SignAndStore64 ;Return infinity + dec ebx + dec edi ;Max value == infinity-1 + jmp SignAndStore64 + +SetOverflow: + or EMSEG:[CURerr],Overflow + stc ;Indicate nothing was done + ret + +BigUnder64: + or EMSEG:[CURerr],Underflow+Precision + xor ebx,ebx + mov edi,ebx ;Set it to zero + mov ecx,ebx ;Including exponent +NotNearest64: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov ah,EMSEG:[CWcntl] ;Get rounding control + sar al,7 ;Expand sign through whole byte +.erre (not RCup and RoundControl) eq RCdown + xor ah,al ;Flip rounding bits if negative + and ah,RoundControl + cmp ah,RCup + jnz StoreIEEE64 ;No, chop it + +RoundUp64b: + mov EMSEG:[SWcc],RoundUp + add edi,1 shl 11 ;Round up + adc ebx,0 + jnc StoreIEEE64 + + add ecx,1 shl 16 ;Mantissa overflowed, bump exponent + cmp ecx,DexpMax shl 16 ;Exponent too big? + jge Over64 + jmp StoreIEEE64 + +;******************************************************************************* + +StoreSpcl32: + cmp cl,bTAG_DEN + jz Denorm32 +.erre bTAG_NAN lt bTAG_EMPTY +.erre bTAG_NAN gt bTAG_INF + cmp cl,bTAG_NAN + mov ecx,SexpMax shl 16 ;Insert special exponent + jb StoreIEEE32 + ja Empty64 +;Have a NAN. + test ebx,1 shl 30 ;Check for SNAN + jnz StoreIEEE32 ;Go store QNAN + or ebx,1 shl 30 ;Make SNAN into a QNAN + mov EMSEG:[CURerr],Invalid ;Flag the exception + test EMSEG:[CWmask],Invalid ;Is it masked? + jnz StoreIEEE32 ;If so, update with masked response + stc ;Don't pop stack + ret + +Empty32: +;It's empty--signal invalid operation + mov EMSEG:[CURerr],StackFlag+Invalid + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing32 ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov dword ptr dseg:[esi],0FFC00000H ;32-bit IEEE indefinite + ret ;CY clear + +Denorm32: + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is it masked? + jnz NormStore32 ;If so, ignore denormalization +DoNothing32: + stc ;Don't pop stack + ret + +;***************** +;Store Single Real +;***************** + +EM_ENTRY eFSTP32 +eFSTP32: + push offset PopStackChk ;Return here after store + +EM_ENTRY eFST32 +eFST32: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ebx,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl +;memory destination is dseg:esi + mov al,ch ;Save sign bit + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz SignAndStore32 ;Just set sign and exit + ja StoreSpcl32 +NormStore32: +;Note that we could have a denormal exception at this point. +;Thus any additional exceptions must OR into [CURerr], not MOV. + xor cx,cx + add ecx,(SexpBias-TexpBias) shl 16 ;Correct bias + jle Under32 + cmp ecx,SexpMax shl 16 ;Exponent too big? + jge Over32 +;See if we need to round + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,edi ;Throwing away any bits? + jz StoreIEEE32 +;Result will not be exact--check rounding mode +Round32: + or EMSEG:[CURerr],Precision ;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearest32 ;Not just round-to-nearest + test bl,1 shl 7 ;Round bit set? + jz StoreIEEE32 + mov edx,ebx + and edx,(3 shl 7)-1 ;Mask to LSB and sticky bits + or edx,edi ;Combine with remaining sticky bits + jz StoreIEEE32 + mov EMSEG:[SWcc],RoundUp + add ebx,1 shl 8 ;Round up + jc AddOneExp32 +StoreIEEE32: + and ebx,not (1 shl 31) ;Clear MSB--it's implied in IEEE32 + shr ebx,8 ;Move mantissa down + shl ecx,7 ;Exponent up to position + or ebx,ecx ;Combine exponent +SignAndStore32: + and al,bSign ;Just sign bit + shl eax,24 ;Sign to MSB + or ebx,eax ;Combine sign + mov dseg:[esi],ebx +;CY clear indicate no error + ret + +Under32: + test EMSEG:[CWmask],Underflow ;Is underflow masked? + jz SetUnderflow ;No, do nothing more +;Produce masked underflow response +;Note that the underflow exception does not occur if the number can be +;represented exactly as a denormal. + sar ecx,16 ;Bring exponent down + cmp ecx,SexpMin-23 ;Allow for shift down to rounding bit + jl BigUnder32 ;Too small, just make it zero +.erre SexpMin eq 0 + neg ecx ;Use as shift count + inc ecx ;Shift by at least one + xor edx,edx ;Place for sticky bits + shrd edx,ebx,cl + shr ebx,cl + xor ecx,ecx ;Biased exponent is zero + or edi,edx ;Combine sticky bits + mov edx,ebx ;Get low bits + and edx,(1 shl 8) - 1 ;Mask to last 8 bits + or edx,edi ;Throwing away any bits? + jz StoreIEEE32 + or EMSEG:[CURerr],Underflow + jmp Round32 + +AddOneExp32: + add ecx,1 shl 16 ;Mantissa overflowed, bump exponent + cmp ecx,SexpMax shl 16 ;Exponent too big? + jl StoreIEEE32 +Over32: + test EMSEG:[CWmask],Overflow ;Is overflow masked? + jz SetOverflow ;No, do nothing more +;Produce masked overflow response + or EMSEG:[CURerr],Overflow+Precision + mov ebx,SexpMax shl 23 + mov ah,EMSEG:[CWcntl] ;Get rounding control +;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and + +;i.e., RCup & sign OR RCdown & not sign +.erre RCchop eq RCup + RCdown ;Always return max value +.erre RCnear eq 0 ;Never return max value + sar al,7 ;Expand sign through whole byte +.erre (RCdown and bSign) eq 0 ;Don't want to change real sign + xor al,RCdown ;Flip sign for RCdown bit + and ah,al ;RCup & sign OR RCdown & not sign + test ah,RoundControl ;Look only at RC bits + jz SignAndStore32 ;Return infinity + dec ebx ;Max value == infinity-1 + jmp SignAndStore32 + +BigUnder32: + or EMSEG:[CURerr],Underflow+Precision + xor ebx,ebx ;Set it to zero + xor ecx,ecx ;Exponent too +NotNearest32: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov ah,EMSEG:[CWcntl] ;Get rounding control + sar al,7 ;Expand sign through whole byte +.erre (not RCup and RoundControl) eq RCdown + xor ah,al ;Flip rounding bits if negative + and ah,RoundControl + cmp ah,RCup + jnz StoreIEEE32 ;No, chop it + mov EMSEG:[SWcc],RoundUp + add ebx,1 shl 8 ;Round up + jnc StoreIEEE32 + jmp AddOneExp32 + +;******************************************************************************* + +StoreSpcl32Int: + cmp cl,bTAG_DEN + jz NormStore32Int ;Ignore denormal + cmp cl,bTAG_EMPTY + jnz Over32Int ;All other specials are invalid + mov EMSEG:[CURerr],StackFlag+Invalid + jmp Invalid32Int + +DoNothing32Int: + stc ;Don't pop stack + ret + +CheckMax32: + ja Over32Int + test al,bSign ;Is it negative? + jnz Store32Int ;If so, answer is OK +Over32Int: +;Overflow on integer store is invalid according to IEEE + mov EMSEG:[CURerr],Invalid ;Must remove precision exception +Invalid32Int: + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing32Int ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov dword ptr dseg:[esi],80000000H ;32-bit integer indefinite + ret ;CY clear + +;****************** +;Store Long Integer +;****************** + +EM_ENTRY eFISTP32 +eFISTP32: + push offset PopStackChk ;Return here after store + +EM_ENTRY eFIST32 +eFIST32: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ebx,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl +;memory destination is dseg:esi + mov al,ch ;Save sign bit + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz Store32Int ;Just store zero and exit + ja StoreSpcl32Int +NormStore32Int: + xor edx,edx + sar ecx,16 ;Bring exponent down + cmp ecx,-1 ;Is it less than 1? + jle Under32Int + cmp ecx,31 + jg Over32Int + sub ecx,31 + neg ecx ;cl = amount to shift right + shrd edx,edi,cl + shrd edi,ebx,cl ;Collect round and sticky bits + shr ebx,cl ;Align integer +;See if we need to round + mov ecx,edi + or ecx,edx ;Throwing away any bits? + jz StoreIEEE32Int +;Result will not be exact--check rounding mode +Round32Int: + mov EMSEG:[CURerr],Precision ;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearest32Int ;Not just round-to-nearest + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. + + bt ebx,0 ;Look at LSB (for round even) + adc edx,-1 ;CY set if sticky bits <>0 + adc edi,(1 shl 31)-1 ;CY set if round up + jnc StoreIEEE32Int + mov EMSEG:[SWcc],RoundUp + inc ebx + jz Over32Int +StoreIEEE32Int: + cmp ebx,1 shl 31 ;Check for max value + jae CheckMax32 +SignAndStore32Int: + shl eax,24 ;Sign to MSB + cdq ;Extend sign through edx + xor ebx,edx ;Complement + sub ebx,edx ; and increment if negative + clc +Store32Int: + mov dseg:[esi],ebx +;CY clear indicates no error + ret + +Under32Int: +;ZF set if exponent is -1 + xchg edx,edi ;32-bit right shift + xchg edi,ebx ;ebx = 0 now + jz Round32Int ;If exponent was -1, ready to round + mov EMSEG:[CURerr],Precision ;Set flag on inexact result +NotNearest32Int: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov ah,EMSEG:[CWcntl] ;Get rounding control + sar al,7 ;Expand sign through whole byte +.erre (not RCup and RoundControl) eq RCdown + xor ah,al ;Flip rounding bits if negative + and ah,RoundControl + cmp ah,RCup ;Rounding up? + jnz StoreIEEE32Int ;No, chop it + mov EMSEG:[SWcc],RoundUp + inc ebx + jnc StoreIEEE32Int + jmp Over32Int + +;******************************************************************************* + +StoreSpcl16Int: + cmp cl,bTAG_DEN + jz NormStore16Int ;Ignore denormal + cmp cl,bTAG_EMPTY + jnz Over16Int ;All other specials are invalid + mov EMSEG:[CURerr],StackFlag+Invalid + jmp Invalid16Int + +DoNothing16Int: + stc ;Don't pop stack + ret + +CheckMax16: + ja Over16Int + test al,bSign ;Is it negative? + jnz Store16Int ;If so, answer is OK +Over16Int: +;Overflow on integer store is invalid according to IEEE + mov EMSEG:[CURerr],Invalid +Invalid16Int: + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing16Int ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov word ptr dseg:[esi],8000H ;16-bit integer indefinite + ret ;CY clear + +;******************* +;Store Short Integer +;******************* + +EM_ENTRY eFISTP16 +eFISTP16: + push offset PopStackChk ;Return here after store + +EM_ENTRY eFIST16 +eFIST16: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov ebx,EMSEG:[edi].lManHi + mov ecx,EMSEG:[edi].ExpSgn + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high ecx, sign in ch bit 7, tag in cl +;memory destination is dseg:esi + mov al,ch ;Save sign bit + cmp cl,bTAG_ZERO +.erre bTAG_VALID lt bTAG_ZERO +.erre bTAG_SNGL lt bTAG_ZERO + jz Store16Int ;Just store zero and exit + ja StoreSpcl16Int +NormStore16Int: + xor edx,edx + sar ecx,16 ;Bring exponent down + cmp ecx,-1 ;Is it less than 1? + jle Under16Int + cmp ecx,15 + jg Over16Int + sub ecx,31 + neg ecx ;cl = amount to shift right + shrd edx,edi,cl + shrd edi,ebx,cl ;Collect round and sticky bits + shr ebx,cl ;Align integer +;See if we need to round + mov ecx,edi + or ecx,edx ;Throwing away any bits? + jz StoreIEEE16Int +;Result will not be exact--check rounding mode +Round16Int: + mov EMSEG:[CURerr],Precision ;Set flag on inexact result + test EMSEG:[CWcntl],RoundControl ;Check rounding control bits +.erre RCnear eq 0 + jnz NotNearest16Int ;Not just round-to-nearest + +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. + + bt ebx,0 ;Look at LSB (for round even) + adc edx,-1 ;CY set if sticky bits <>0 + adc edi,(1 shl 31)-1 ;CY set if round up + jnc StoreIEEE16Int + mov EMSEG:[SWcc],RoundUp + inc ebx +StoreIEEE16Int: + cmp ebx,1 shl 15 ;Check for max value + jae CheckMax16 +SignAndStore16Int: + shl eax,24 ;Sign to MSB + cdq ;Extend sign through edx + xor ebx,edx ;Complement + sub ebx,edx ; and increment if negative + clc +Store16Int: + mov dseg:[esi],bx +;CY clear indicates no error + ret + +Under16Int: +;ZF set if exponent is -1 + xchg edx,edi ;16-bit right shift + xchg edi,ebx ;ebx = 0 now + jz Round16Int ;If exponent was -1, ready to round + mov EMSEG:[CURerr],Precision ;Set flag on inexact result +NotNearest16Int: +;We want to increase the magnitude if RCup and +, or RCdown and - + mov ah,EMSEG:[CWcntl] ;Get rounding control + sar al,7 ;Expand sign through whole byte +.erre (not RCup and RoundControl) eq RCdown + xor ah,al ;Flip rounding bits if negative + and ah,RoundControl + cmp ah,RCup ;Rounding up? + jnz StoreIEEE16Int ;No, chop it + mov EMSEG:[SWcc],RoundUp + inc ebx + jnc StoreIEEE16Int + jmp Over16Int + +;******************************************************************************* + +;****************** +;Store Quad Integer +;****************** + +EM_ENTRY eFISTP64 +eFISTP64: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + call RoundToInteger + jc Invalid64Int +;Have integer in ebx:edi +;Sign in ch + cmp ebx,1 shl 31 ;Check for max value + jae CheckMax64 + or ch,ch ;Check sign + jns Store64Int +;64-bit negation + not ebx + neg edi + sbb ebx,-1 +Store64Int: + mov dseg:[esi],edi + mov dseg:[esi+4],ebx + jmp PopStack + +CheckMax64: + ja Over64Int + test al,bSign ;Is it negative? + jnz Store64Int ;If so, answer is OK +Over64Int: +;Overflow on integer store is invalid according to IEEE + mov EMSEG:[CURerr],Invalid +Invalid64Int: + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing80 ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov dword ptr dseg:[esi],0 + mov dword ptr dseg:[esi+4],80000000H ;64-bit integer indefinite + jmp PopStack + +;******************************************************************************* + +Empty80: +;It's empty--signal invalid operation + mov EMSEG:[CURerr],StackFlag+Invalid + test EMSEG:[CWmask],Invalid ;Is it masked? + jz DoNothing80 ;No--leave memory unchanged +;Store Indefinite +;For FSTP, we go ahead and do the pop even though it's empty + mov dword ptr dseg:[esi],0 + mov dword ptr dseg:[esi+4],0C0000000H + mov word ptr dseg:[esi+8],0FFFFH ;80-bit IEEE indefinite + jmp PopStack + +DoNothing80: + ret + +;*************** +;Store Temp Real +;*************** + +EM_ENTRY eFSTP80 +eFSTP80: + mov EMSEG:[PrevDataOff],esi ;Save operand pointer + mov eax,EMSEG:[edi].ExpSgn + cmp al,bTAG_EMPTY + jz Empty80 + + push offset PopStack + +StoreTempReal: + mov ebx,EMSEG:[edi].lManHi + mov edi,EMSEG:[edi].lManLo +;mantissa in ebx:edi, exponent in high eax, sign in ah bit 7, tag in al +;memory destination is dseg:esi + mov ecx,eax ;get copy of sign and tag + shr ecx,16 ;Bring exponent down + cmp al,bTAG_ZERO + jz StoreIEEE80 ;Skip bias if zero + add ecx,IexpBias-TexpBias ;Correct bias + cmp al,bTAG_DEN + jz Denorm80 +StoreIEEE80: + and eax,bSign shl 8 + or ecx,eax ;Combine sign with exponent + mov dseg:[esi],edi + mov dseg:[esi+4],ebx + mov dseg:[esi+8],cx + +; jmp PopStack + ret + +Denorm80: +;Must change it to a denormal + dec ecx + neg ecx ;Use as shift count + cmp cl,32 ;Long shift? + jae LongDenorm + shrd edi,ebx,cl + shr ebx,cl + xor ecx,ecx ;Exponent is zero + jmp StoreIEEE80 + +LongDenorm: +;edi must be zero if we have 32 bits to shift + xchg ebx,edi ;32-bit right shift + shr edi,cl ;shift count is modulo-32 + xor ecx,ecx ;Exponent is zero + jmp StoreIEEE80 diff --git a/private/ntos/dll/i386/emtrig.asm b/private/ntos/dll/i386/emtrig.asm new file mode 100644 index 000000000..53d981aca --- /dev/null +++ b/private/ntos/dll/i386/emtrig.asm @@ -0,0 +1,863 @@ + subttl emtrig.asm - Trig functions sine, cosine, tangent + page +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emtrig.asm - Trig functions sine, cosine, tangent +; by Tim Paterson +; +;Purpose: +; FCOS, FPTAN, FSIN, FSINCOS instructions +;Inputs: +; edi = [CURstk] +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +;XPi is the 66-bit value of Pi from the Intel manual +XPiHi equ 0C90FDAA2H +XPiMid equ 02168C234H +XPiLo equ 0C0000000H ;Extension of pi +PiOver4exp equ -1 ;Pi/4 ~= 3/4, so exponent is -1 + +TinyAngleExp equ -32 ;Smallest angle we bother with +MaxAngleExp equ 63 ;Angle that's too big + +Trig1Result: +;Trig function reduction routine used by functions returning 1 value +;(FSIN and FCOS) +;edi = [CURstk] = argument pointer +;Argument has already been checked for zero. +;ZF = (tag == bTAG_ZERO) + jb TrigPrem +;Tagged special + mov al,EMSEG:[edi].bTAG + cmp al,bTAG_DEN + jz TrigDenorm + add sp,4 ;Don't return to caller + cmp al,bTAG_INF + jnz SpclDestNotDen ;Check for Empty or NAN + mov EMSEG:[SWcc],C2 ;Can't reduce infinity + jmp ReturnIndefinite + +TrigDenorm: + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is denormal exception masked? + jnz TrigPrem ;Yes, continue + add sp,4 ;Don't return to caller +TrigRet: + ret + + +Trig2Inf: + mov EMSEG:[SWcc],C2 ;Can't reduce infinity + jmp Trig2Indefinite + +Trig2StackOver: + mov EMSEG:[SWcc],C1 ;Signal overflow +Trig2StackUnder: + mov EMSEG:[CURerr],Invalid+StackFlag +Trig2Indefinite: + add sp,4 ;Don't return to caller + call ReturnIndefinite + jz TrigRet ;Unmasked, don't change registers +;Produce masked response + mov EMSEG:[CURstk],esi ;Push stack + mov edi,esi + jmp ReturnIndefinite + +Trig2Special: + cmp al,bTAG_DEN + jz TrigDenorm + cmp al,bTAG_INF + jz Trig2Inf +;Must be a NAN + add sp,4 ;Don't return to caller + call DestNAN + jz TrigRet ;Unmasked, don't change registers +;Produce masked response + mov EMSEG:[CURstk],esi ;Push stack + mov eax,EMSEG:[edi].ExpSgn + mov EMSEG:[esi].ExpSgn,eax + mov eax,EMSEG:[edi].lManHi + mov EMSEG:[esi].lManHi,eax + mov eax,EMSEG:[edi].lManLo + mov EMSEG:[esi].lManLo,eax + ret + +Trig2Zero: + add sp,4 ;Don't return to caller + mov EMSEG:[CURstk],esi + mov edi,esi +;Amazing coincidence: both FSINCOS and FPTAN return the same result for +;a zero argument: +; FSINCOS returns ST(0) = cos(0) = 1, ST(1) = sin(0) = 0. +; FPTAN returns ST(0) = 1 always, ST(1) = tan(0) = 0. +;Return zero has same sign as argument zero, so we don't need to touch +;it -- just push +1.0. + jmp ReturnOne + +TrigOutOfRange: + mov EMSEG:[SWcc],C2 ;Signal argument not reduced + add sp,4 + ret + +PrevStackWrap esi,Trig2 ;Tied to PrevStackElem below + +Trig2Result: +;Trig function reduction routine used by functions returning 2 values +;(FSINCOS and FPTAN) +;edi = [CURstk] = argument pointer + mov esi,edi + PrevStackElem esi,Trig2 ;esi points to second result location + mov al,EMSEG:[edi].bTAG ;Get tag + cmp al,bTAG_EMPTY ;Stack underflow if empty + jz Trig2StackUnder + cmp EMSEG:[esi].bTAG,bTAG_EMPTY ;Stack overflow if not empty + jnz Trig2StackOver + cmp al,bTAG_ZERO ;Is it Special? + ja Trig2Special + jz Trig2Zero +;Fall into TrigPrem + +;**** +;TrigPrem +; +;This routine reduces an angle in radians to the range [0, pi/4]. +;Angles in odd-numbered octants have been subtracted from pi/4. +;It uses a 66-bit value for pi, as required by the 387. +;TrigPrem uses the same two-stage algorithm as FPREM (see +;emfprem.asm). However, it is limited to an argument < 2^63. +; +;Inputs: +; edi = [CURstk] +;Outputs: +; ebx:esi = remainder, normalized +; high ecx = exponent, cl = tag +; al = octant +; edi = [CURstk] + +TrigPrem: + mov EMSEG:[Result],edi + mov eax,EMSEG:[edi].lManLo + mov edx,EMSEG:[edi].lManHi + movsx ebx,EMSEG:[edi].wExp + cmp ebx,MaxAngleExp + jge TrigOutOfRange + xor edi,edi ;Extend dividend + xor esi,esi ;Quotient, in case we skip stage 1 +.erre PiOver4exp eq -1 + inc ebx ;Subtract exponent of pi/4 + jl ExitTrigPrem ;If dividend is smaller, return it. +;We now know that 0 <= ExpDif < 64, so it fits in bl. + cmp bl,31 ;Do we need to do stage 1? + jl FitPi ;No, start stage 2 + +;FPREM stage 1 +; +;Exponent difference is at least 31. Use 32-bit division to compute +;quotient and exact remainder, reducing exponent difference by 31. +; +;edx:eax = dividend +;ebx = exponent difference + +;Shift dividend right one bit to be sure DIV instruction won't overflow +;This means we'll be reducing the exponent difference by 31, not 32 + xor ebp,ebp ;Dividend extension + shrd ebp,eax,1 + shrd eax,edx,1 + shr edx,1 + + sub bl,31 ;Exponent reduced + mov ecx,XPiHi + div ecx ;Guess a quotient "digit" + +;Check out our guess. +;Currently, remainder in edx = (high dividend) - (quotient * high pi). +;(High dividend is the upper 64 bits--ebp has 1 bit.) The definition +;of remainder is (all dividend) - (quotient * all pi). So if we +;subtract (quotient * low pi) from edx:ebp, we'll get the true +;remainder. If it's negative, our guess was too big. + + mov esi,eax ;Save quotient + mov ecx,edx ;Save remainder + +;The pi/4 we use has two bits set below the first 64 bits. This means +;we must add another 3/4 of the quotient into the amount to subtract, +;which we'll compute by rounding the low 32 bits up 1, then subtracting +;1/4 of quotient. But since we're computing the amount to subtract from +;the remainder, we'll add the 1/4 of the quotient to the remainder instead +;of subtracting it from the amount to subtract. + +.erre XPiLo eq (3 shl 30) + mov eax,XPiMid+1 + mul esi ;Quotient * low pi +;Note that ebp is either 0 or 800...00H + shr ebp,30 ;Move down to low end + shld ebp,esi,30 ;Move back up, adding 1/4 of quotient + mov edi,esi ;Another copy of quotient + shl edi,30 ;Keep last two bits +;edx:eax has amount to subtract to get correct remainder from ecx:ebp:edi + sub ebp,eax + sbb ecx,edx ;Subtract from remainder + mov eax,ebp + mov edx,ecx ;Remainder back to edx:eax:edi + jnc TrigPremNorm ;Was quotient OK? +TrigCorrect: + dec esi ;Quotient was too big + add edi,XPiLo + adc eax,XPiMid ;Add divisor back into remainder + adc edx,XPiHi + jnc TrigCorrect ;Repeat if quotient is still too big + jmp TrigPremNorm + +;FPREM stage 2 +; +;Exponent difference is less than 32. Use restoring long division to +;compute quotient bits until exponent difference is zero. Note that we +;often get more than one bit/loop: BSR is used to scan off leading +;zeros each time around. Since the divisor is normalized, we can +;instantly compute a zero quotient bit for each leading zero bit. + +TrigPremLoop: +;edx:eax:edi = dividend (remainder) minus pi/4 +;esi = quotient +;ebx = exponent difference +; +;If D is current dividend and p is pi/4, then we have edx:eax:edi = D - p, +;which is negative. We want 2*D - p, which is positive. +;2*D - p = 2*(D - p) + p. + add edi,edi ;2*(D - p) + adc eax,eax + adc edx,edx + + add edi,XPiLo ;2*(D-p) + p = 2*D - p + adc eax,XPiMid + adc edx,XPiHi + + add esi,esi ;Double quotient too + dec ebx ;Decrement exponent difference +PiFit: + inc esi +TrigPremNorm: + bsr ecx,edx ;Find first 1 bit + jz TrigPremZero + not cl + and cl,1FH ;Convert bit no. to shift count + sub ebx,ecx ;Reduce exponent difference + jl TrigTooFar + shld edx,eax,cl + shld eax,edi,cl + shl edi,cl ;Finish normalize shift + shl esi,cl ;Shift quotient +FitPi: +;Dividend could be larger or smaller than divisor + sub edi,XPiLo + sbb eax,XPiMid + sbb edx,XPiHi + jnc PiFit +;Couldn't subtract pi/2 from dividend. +;edx:eax:edi = dividend - pi/4, which is negative + or ebx,ebx ;Is exponent difference zero? + jg TrigPremLoop +;If quotient (octant number) is odd, we have subtracted an odd number of +;pi/4's. However, simple angle reductions work in multiples of pi/2. +;We will keep the extra pi/4 we just subtracted if the octant was odd. +;This will give a result range of [-pi/4, pi/4]. + test esi,1 ;Is octant odd? + jz EvenOctant +NegPremResult: +;-pi/4 < dividend < 0. Negate this since we use sign-magnitude representation. + not edx ;96-bit negation + not eax + neg edi + sbb eax,-1 + sbb edx,-1 +;May need to normalize + bsr ecx,edx + jz TrigNorm32 + lea ebx,[ebx+ecx-31] ;Fix up exponent for normalization + not cl ;Convert bit no. to shift count +TrigShortNorm: + shld edx,eax,cl + shld eax,edi,cl + shl edi,cl ;Finish normalize shift +RoundPrem: +;Must round 66-bit result to 64 bits. +;To perform "round even" when the round bit is set and the sticky bits +;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB +;is set, that will always force a round up (to even) if the round bit is +;set. If the LSB is zero, then the sticky bits remain zero and we always +;round down. This rounding rule is implemented by adding RoundBit-1 +;(7F..FFH), setting CY if round up. + bt eax,0 ;Is mantissa even or odd? (set CY) + adc edi,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up + adc eax,0 + adc edx,0 +ExitTrigPrem: +;edx:eax = remainder, normalized +;esi = quotient +;ebx = exponent difference, zero or less +.erre PiOver4exp eq -1 + dec ebx ;True exponent +.erre bTAG_SNGL eq 0 + shrd ecx,ebx,16 ;Exponent to high ecx + mov ebx,edx ;High mant. to ebx + xchg esi,eax ;Low mant. to esi, octant to eax + or esi,esi ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz cl ;if low half==0 then cl=0 else cl=1 + mov edi,EMSEG:[CURstk] + test EMSEG:[edi].bSgn,bSign ;Was angle negative? + jnz FlipOct ;Yes, flip octant over + ret + +FlipOct: +;Angle was negative. Subtract octant from 7. + neg al + add al,7 + ret + +EvenOctant: +;Restore dividend + add edi,XPiLo + adc eax,XPiMid + adc edx,XPiHi + jmp RoundPrem + +TrigTooFar: +;Exponent difference in ebx went negative when reduced by shift count in ecx. +;We need a quotient corresponding to exponent difference of zero. + add ecx,ebx ;Compute previous exponent difference + shl esi,cl ;Fix up quotient + sub ecx,ebx ;Restore shift count + test esi,1 ;Is octant odd? + jz TrigShortNorm ;No, go normalize + xor ebx,ebx ;Restore old exponent difference (zero) +SubPiOver4: +;We are here if exponent difference was zero and octant is odd. +;As noted above, we need to reduce the angle by a multiple of pi/2, +;not pi/4. We will subtract one more pi/4, which will make the +;result range [-pi/4, pi/4]. + sub edi,XPiLo + sbb eax,XPiMid + sbb edx,XPiHi + jmp NegPremResult + +TrigPremZero: +;High dword of remainder is all zero, so we've reduced exponent difference +;by 32 bits and overshot. We need a quotient corresponding to exponent +;difference of zero, so we just shift it by the original difference. Then +;we need to normalize the rest of the remainder. + mov ecx,ebx ;Get exponent difference + shl esi,cl ;Fix up quotient + test esi,1 ;Is octant odd? + jnz SubPiOver4 ;Yes, go subtract another pi/4 +TrigNorm32: + bsr ecx,eax + jz TinyTrig + lea ebx,[ebx+ecx-31-32] ;Fix up exponent for normalization + mov edx,eax + mov eax,edi ;Shift left by 32 bits + not cl ;Convert bit no. to shift count + shld edx,eax,cl ;Normalize remainder + shl eax,cl + jmp ExitTrigPrem + +TinyTrig: +;Upper 64 bits of remainder are all zero. We are assured that the extended +;remainder is never zero, though. + mov edx,edi ;Shift left 64 bits + bsr ecx,edi + lea ebx,[ebx+ecx-31-64] ;Fix up exponent for normalization + not cl ;Convert bit no. to shift count + shl edx,cl ;Normalize + jmp ExitTrigPrem + +;******************************************************************************* + +EM_ENTRY eFCOS +eFCOS: + and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2 + cmp EMSEG:[edi].bTAG,bTAG_ZERO + jz ReturnOne + call Trig1Result +;ebx:esi,ecx = reduced argument +;eax = octant + mov ch,80H ;Assume negative + test al,110B ;Negative in octants 2 - 5 + jpo @F ;Occurs when 1 of these bits are set + xor ch,ch ;Actually positve +@@: + test al,011B ;Look for octants 0,3,4,7 + jpo TakeSine ;Use sine if not +TakeCosine: + cmp ecx,TinyAngleExp shl 16 ;Is angle really small? + jl CosReturnOne ;cos(x) = 1 for tiny x +CosNotTiny: + mov edi,offset tCosPoly +;Note that argument needs to be saved in ArgTemp (by EvalPolySetup) in case +;we were called from eFSINCOS and we'll need the arg for the sine. Argument +;is not needed for cosine, however (just its square). + call EvalPolySetup ;In emftran.asm + mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out +TransUnround: +;The last operation performed a simple round nearest, without setting the +;C1 status bit if round up occured. We reverse this last rounding now +;so we can do the user's selected rounding mode. We also ensure that +;the answer is never exact. + sub eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up + jz UnroundExact ;Answer looks exact, but it's not + sbb esi,0 + sbb ebx,0 + jns PolyDropExponent ;We had rounded up exponent too +FinalTransRound: +;A jump through [TransRound] is only valid if the number is known not to +;underflow. Unmasked underflow requires [RoundMode] be set. + mov edx,EMSEG:[TransRound] + mov EMSEG:[RoundMode],edx + call edx ;Perform user's rounding +RestoreRound: +;Restore rounding vectors + mov EMSEG:[ZeroVector],offset SaveResult + mov eax,EMSEG:[SavedRoundMode] + mov EMSEG:[RoundMode],eax + ret + +UnroundExact: + inc eax ;Let's say our answer is a bit small + jmp FinalTransRound + +PolyDropExponent: + sub ecx,1 shl 16 ;Decrement exponent + or ebx,1 shl 31 ;Set MSB + jmp FinalTransRound + + +SinRet: + ret + +SaveTinySin: +;Argument in ebx:esi,ecx is small enough so that sin(x) = x, which happens +;when x - x^3/6 = x [or 1 - x^2/6 = 1]. Note that the infinitely precise +;result is slightly less than the argument. To get the correct answer for +;any rounding mode, we decrement the argument and set up for rounding. + mov eax,-1 ;Set up rounding bits + sub esi,1 + sbb ebx,0 ;Drop mantissa by one + js FinalTransRound ;Still normalized? +;mantissa must have been 800..000H, set it to 0FFF...FFFH and drop exponent + mov ebx,eax ;ebx = -1 + sub ecx,1 shl 16 ;Drop exponent by one + jmp FinalTransRound + + +EM_ENTRY eFSIN +eFSIN: + and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2 + cmp EMSEG:[edi].bTAG,bTAG_ZERO + jz SinRet ;Return zero for zero argument + call Trig1Result + mov ch,al + shl ch,7-2 ;Move bit 2 to bit 7 as sign bit +ReducedSine: +;ebx:esi,ecx = reduced argument +;ch = correct sign +;eax = octant + test al,011B ;Look for octants 0,3,4,7 + jpo TakeCosine ;Use cosine if not +TakeSine: + cmp ecx,TinyAngleExp shl 16 ;Is angle really small? + jl SaveTinySin ;sin(x) = x for tiny x + +;The polynomial for sine is sin(x) = x * P(x^2). However, the degree zero +;coefficient of P() is 1, so P() = R() + 1, where R() has no degree zero +;term. Thus sin(x) = x * [R(x^2) + 1] = x * R(x^2) + x. +; +;What's important here is that adding 1 to R(x^2) can blow away a lot of +;precision just before we do that last multiply by x. Note that x < pi/4 < 1, +;so that x^2 is often << 1. The precision is lost when R(x^2) is shifted +;right to align its binary point with 1.0. This can cause a loss of at +;least 1 bit of precision after the final multiply by x in addition to +;rounding errors. +; +;To avoid this precision loss, we use the alternate form given above, +;sin(x) = x * R(x^2) + x. Instead of adding 1.0 and multiplying by x, +;we multiply by x and add x--exactly the same level of difficulty. But +;the mulitply has all of R(x^2)'s precision available. +; +;Because the polynomial R() has no zero-degree term, we give EvalPoly +;one degree less (so we don't have to add zero as the last term). +;Then we have to multiply once more by x^2 since we left the loop early. + +SineNotTiny: + mov edi,offset tSinPoly + call EvalPolySetup ;In emftran.asm +SineFinish: + +ifdef NT386 + mov edi,YFloatTemp +else + mov edi,offset edata:FloatTemp +endif + call PolyMulDouble ;Last coefficient in R(x^2) + +ifdef NT386 + mov edi,YArgTemp ;Point to original x +else + mov edi,offset edata:ArgTemp ;Point to original x +endif + + call PolyMulDouble ;Compute x * R(x^2) + +ifdef NT386 + mov edi,YArgTemp ;Point to original x +else + mov edi,offset edata:ArgTemp ;Point to original x +endif + + push offset TransUnround + jmp PolyAddDouble ;Compute x * R(x^2) + x + + +EM_ENTRY eFPTAN +eFPTAN: + and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2 + call Trig2Result + push offset TanPushOne ; Push 1.0 when we're all done +;ebx:esi,ecx = reduced argument +;eax = octant + mov ch,al + shl ch,7-1 ;Move bit 1 to bit 7 as sign bit +;Note that ch bit 6 now has even/odd octant, which we'll need when we're +;done to see if we should take reciprocal. + cmp ecx,TinyAngleExp shl 16 ;Is angle really small? + jl TinyTan + mov edi,offset tTanPoly + call Eval2Poly ;In emftran.asm + mov edi,EMSEG:[CURstk] ;Point to first result + push offset TransUnround ;Return address of divide + test EMSEG:[ArgTemp].bSgn,0C0H ;Check low 2 bits of octant +;Given the reduced input range, the result can never overflow or underflow. +;It is must then be safe to assume neither operand is zero. + jpe DivDouble ;Tan() octants 0,3,4,7 + jmp DivrDouble ;CoTan() + +TinyTan: + test ch,0C0H ;Check low 2 bits of octant + jpe SaveTinySin ;Octants 0,3,4,7: tan(x) = x for tiny x +;Need reciprocal of reduced argument + mov edi,esi + mov esi,ebx ;Mantissa in esi:edi + mov ebx,ecx ;ExpSgn to ebx + mov edx,1 shl 31 ;Load 1.0 + xor eax,eax +.erre TexpBias eq 0 + xor ecx,ecx ;Sign and exponent are zero +;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7 +;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7 + push offset TransUnround ;Return address of divide +;Note that this can never overflow, because the reduced argument is never +;smaller than about 2^-65. + jmp DivDoubleReg + + +PrevStackWrap edi,Tan ;Tied to PrevStackElem below + +TanPushOne: + PrevStackElem edi,Tan ;edi points to second result location + mov EMSEG:[CURstk],edi +ReturnOne: + mov EMSEG:[edi].lManLo,0 + mov EMSEG:[edi].lManHi,1 shl 31 + mov EMSEG:[edi].ExpSgn,(0-TexpBias) shl 16 + bTAG_SNGL + ret + + +PrevStackWrap edi,SinCos ;Tied to PrevStackElem below + +eFSINCOS: + and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2 + call Trig2Result +;Figure out signs + mov ch,al ;Start with sign of sine + shl ch,7-2 ;Move bit 2 to bit 7 as sign bit + mov ah,80H ;Assume sign of cosine is negative + test al,110B ;Negative in octants 2 - 5 + jpo @F ;Occurs when 1 of these bits are set + xor ah,ah ;Actually positve +@@: +;ch = sign of sine +;ah = sign of cosine + cmp ecx,TinyAngleExp shl 16 ;Is angle really small? + jl TinySinCos + push eax ;Save octant and sign of cosine + call ReducedSine ;On exit, edi = [CURstk] + pop eax +;The Sin() funcion restored the rounding vectors to normal. Set them back. + mov EMSEG:[RoundMode],offset PolyRound + mov EMSEG:[ZeroVector],offset PolyZero + PrevStackElem edi,SinCos ;edi points to second result location + mov EMSEG:[CURstk],edi + mov EMSEG:[Result],edi +;Load x^2 back into registers + mov ecx,EMSEG:[FloatTemp].ExpSgn + mov ebx,EMSEG:[FloatTemp].lManHi + mov esi,EMSEG:[FloatTemp].lManLo + mov EMSEG:[ArgTemp].bSgn,ah ;Save sign + test al,011B ;Look for octants 0,3,4,7 + jpo FastSine ;Use sine if not + mov edi,offset tCosPoly + call EvalPoly ;In emftran.asm + mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out + jmp TransUnround + +FastSine: + mov edi,offset tSinPoly + push offset SineFinish + jmp EvalPoly ;In emftran.asm + +TinySinCos: +;ch = sign of sine +;ah = sign of cosine +;ebx:esi,high ecx = reduced argument +;edi = [CURstk] + test al,011B ;Look for octants 0,3,4,7 + jpo TinyCosSin ;Take cosine first if not + push eax + call SaveTinySin ;For sine, arg is result + pop ecx +;edi = [CURstk] +;ch = sign of cosine +;Set cosine to 1.0 + PrevStackElem edi,TinySinCos ;edi points to second result location + mov EMSEG:[CURstk],edi + mov EMSEG:[Result],edi +CosReturnOne: +;Cosine is nearly equal to 1.0. Put in next smaller value and round it. + mov ebx,-1 + mov esi,ebx ;Set mantissa to -1 + mov eax,ebx ;Set up rounding bits +.erre TexpBias eq 0 + and ecx,bSign shl 8 ;Keep only sign + sub ecx,1 shl 16 ;Exponent of -1 +;A jump through [TransRound] is only valid if the number is known not to +;underflow. Unmasked underflow requires [RoundMode] be set. + jmp EMSEG:[TransRound] + + PrevStackWrap edi,TinySinCos + + PrevStackWrap edi,TinyCosSin + +TinyCosSin: +;Sine is nearly 1.0, cosine is argument +; +;ch = sign of sine +;ah = sign of cosine +;ebx:esi,high ecx = reduced argument +;edi = [CURstk] + xchg ah,ch ;Cosine sign to ch, sine sign to ah + push edi ;Save place for sine + PrevStackElem edi,TinyCosSin ;edi points to second result location + mov EMSEG:[CURstk],edi + mov EMSEG:[Result],edi + push eax + call SaveTinySin ;For sine, arg is result + pop ecx +;ch = sign of sine + pop EMSEG:[Result] ;Set up location for sine + jmp CosReturnOne + +;******************************************************************************* + +;********************* Polynomial Coefficients ********************* + +;These polynomial coefficients were all taken from "Computer Approximations" +;by J.F. Hart (reprinted 1978 w/corrections). All calculations and +;conversions to hexadecimal were done with a character-string calculator +;written in Visual Basic with precision set to 30 digits. Once the constants +;were typed into this file, all transfers were done with cut-and-paste +;operations to and from the calculator to help eliminate any typographical +;errors. + + +tCosPoly label word + +;These constants are derived from Hart #3824: cos(x) = P(x^2), +;accurate to 19.45 digits over interval [0, pi/4]. The original +;constants in Hart required that the argument x be divided by pi/4. +;These constants have been scaled so this is no longer required. +;Scaling is done by multiplying the constant by a power of 4/pi. +;The power is given in the table. + + dd 7 ;Degree seven + +; Original Hart constant power Scaled constant +; +;-0.38577 62037 2 E-12 14 -0.113521232057839395845871741043E-10 +;Hex value: 0.C7B56AF786699CF1BD13FD290 HFFDC + dq 0C7B56AF786699CF2H + dw (bSign shl 8)+bTAG_VALID,0FFDCH-1 + +;+0.11500 49702 4263 E-9 12 +0.208755551456778828747793797596E-8 +;Hex value: 0.8F74AA3CCE49E68D6F5444A18 HFFE4 + dq 08F74AA3CCE49E68DH + dw bTAG_VALID,0FFE4H-1 + +;-0.24611 36382 63700 5 E-7 10 -0.275573128656960822243472872247E-6 +;Hex value: 0.93F27B7F10CC8A1703EFC8A04 HFFEB + dq 093F27B7F10CC8A17H + dw (bSign shl 8)+bTAG_VALID,0FFEBH-1 + +;+0.35908 60445 88581 953 E-5 8 +0.248015872828994630247806807317E-4 +;Hex value: 0.D00D00CD6BB3ECD17E10D5830 HFFF1 + dq 0D00D00CD6BB3ECD1H + dw bTAG_VALID,0FFF1H-1 + +;-0.32599 18869 26687 55044 E-3 6 -0.138888888888589604343951947246E-2 +;Hex value: 0.B60B60B609B165894CFE522AC HFFF7 + dq 0B60B60B609B16589H + dw (bSign shl 8)+bTAG_VALID,0FFF7H-1 + +;+0.15854 34424 38154 10897 54 E-1 4 +0.416666666666664302573692446873E-1 +;Hex value: 0.AAAAAAAAAAA99A1AF53042B08 HFFFC + dq 0AAAAAAAAAAA99A1BH + dw bTAG_VALID,0FFFCH-1 + +;-0.30842 51375 34042 45242 414 E0 2 -0.499999999999999992843582920899E0 +;Hex value: 0.FFFFFFFFFFFFFEF7F98D3BFA8 HFFFF + dq 0FFFFFFFFFFFFFEF8H + dw (bSign shl 8)+bTAG_VALID,0FFFFH-1 + +;+0.99999 99999 99999 99996 415 E0 0 (no change) +;Hex value 0.FFFFFFFFFFFFFFFF56B402618 H0 + dq 0FFFFFFFFFFFFFFFFH + dw bTAG_VALID,00H-1 + + +tSinPoly label word + +;These constants are derived from Hart #3044: sin(x) = x * P(x^2), +;accurate to 20.73 digits over interval [0, pi/4]. The original +;constants in Hart required that the argument x be divided by pi/4. +;These constants have been scaled so this is no longer required. +;Scaling is done by multiplying the constant by a power of 4/pi. +;The power is given in the table. + + dd 7-1 ;Degree seven, but the last coefficient + ;is 1.0 and is not listed here. + +; Original Hart constant power Scaled constant +; +;-0.20225 31292 93 E-13 15 -0.757786788401271156262125540409E-12 +;Hex value: 0.D54C4AF2B524F0F2D6411C90A HFFD8 + dq 0D54C4AF2B524F0F3H + dw (bSign shl 8)+bTAG_VALID,0FFD8H-1 + +;+0.69481 52035 0522 E-11 13 +0.160583476232246065559545749398E-9 +;Hex value: 0.B0903AF085DA66030F16E43BC HFFE0 + dq 0B0903AF085DA6603H + dw bTAG_VALID,0FFE0H-1 + +;-0.17572 47417 61708 06 E-8 11 -0.250521047382673309542092418731E-7 +;Hex value: 0.D73229320D2AF05971AC96FF4 HFFE7 + dq 0D73229320D2AF059H + dw (bSign shl 8)+bTAG_VALID,0FFE7H-1 + +;+0.31336 16889 17325 348 E-6 9 +0.275573192133901687156480447942E-5 +;Hex value: 0.B8EF1D2984D2FBA28A9CC9DEE HFFEE + dq 0B8EF1D2984D2FBA3H + dw bTAG_VALID,0FFEEH-1 + +;-0.36576 20418 21464 00052 9 E-4 7 -0.198412698412531058609618529749E-3 +;Hex value: 0.D00D00D00C3FDDD7916E5CB28 HFFF4 + dq 0D00D00D00C3FDDD8H + dw (bSign shl 8)+bTAG_VALID,0FFF4H-1 + +;+0.24903 94570 19271 62752 519 E-2 5 +0.83333333333333203341753387264E-2 +;Hex value: 0.8888888888884C95D619A0343 HFFFA + dq 08888888888884C96H + dw bTAG_VALID,0FFFAH-1 + +;-0.80745 51218 82807 81520 2582 E-1 3 -0.166666666666666666281276062229E0 +;Hex value: 0.AAAAAAAAAAAAAA8E3AD80EAB8 HFFFE + dq 0AAAAAAAAAAAAAA8EH + dw (bSign shl 8)+bTAG_VALID,0FFFEH-1 + +;+0.78539 81633 97448 30961 41845 E0 1 +0.99999999999999999999812025812E0 +;Hex value: 0.FFFFFFFFFFFFFFFFF71F88110 H0 +; dq 8000000000000000H ;This constant of 1.0 omitted here. +; dw bTAG_VALID,0 ; It is handled in code. + + +tTanPoly label word + +;These constants are derived from Hart #4286: tan(x) = x * P(x^2) / Q(x^2), +;accurate to 19.94 digits over interval [0, pi/4]. The original +;constants in Hart required that the argument x be divided by pi/4. +;These constants have been scaled so this is no longer required. +;Scaling is done by multiplying the constant by the same power of 4/pi +;as the power of x the constant is used on. However, the highest +;degree coefficient of Q() is 1, and after scaling this way it would +;become (4/pi)^8. In order to keep this coefficient equal to one, +;we scale everything again by (pi/4)^8. This scaling is partially +;canceled by the original scaling by powers of 4/pi, and the net +;resulting power of pi/4 is given in the table. + + + dd 3 ;First poly is degree 3 + +; Original Hart constant power Scaled constant +; +;-.45649 31943 86656 31873 96113 7 E2 1 -35.8528916474714232910463077546 +;Hex value: 0.8F695C6D93AF6F97B6E022AB3 H6 + dq 08F695C6D93AF6F98H + dw (bSign shl 8)+bTAG_VALID,06H-1 + +;+.14189 85425 27617 78388 00394 831 E5 3 +6874.60229709782436592720603503 +;Hex value: 0.D6D4D181240D0D08C88DF4AA6 HD + dq 0D6D4D181240D0D09H + dw bTAG_VALID,0DH-1 + +;-.89588 84400 67680 41087 29639 541 E6 5 -267733.884797157298951145495276 +;Hex value: 0.82BABC504220C62B1D0722684 H13 + dq 082BABC504220C62BH + dw (bSign shl 8)+bTAG_VALID,013H-1 + +;+.10888 60043 72816 87521 38857 983 E8 7 +2007248.9111748838841548144685 +;Hex value: 0.F506874A160EB9C0994AADD6A H15 + dq 0F506874A160EB9C1H + dw bTAG_VALID,015H-1 + + + + dd 4 ;Second poly is degree 4 +;NOTE: Eval2Poly assumes the first coefficient is 1.0, so it is omitted + +; Original Hart constant power Scaled constant +; +;-.10146 56190 25288 53387 54401 947 E4 2 -625.890950057027419879480354834 +;Hex value: 0.9C790553635355A95241A5324 HA + dq 09C790553635355A9H + dw (bSign shl 8)+bTAG_VALID,0AH-1 + +;+.13538 27128 05119 09382 89294 872 E6 4 +51513.6992033752080924797647367 +;Hex value: 0.C939B2FEFE0DC585E649870FE H10 + dq 0C939B2FEFE0DC586H + dw bTAG_VALID,010H-1 + +;-.39913 09518 03516 51504 43427 94 E7 6 -936816.855188785264866481436899 +;Hex value: 0.E4B70DAEDA6F89E5A7CE626FA H14 + dq 0E4B70DAEDA6F89E6H + dw (bSign shl 8)+bTAG_VALID,014H-1 + +;+.13863 79666 35676 29165 33913 361 E8 8 +2007248.91117488388417770850458 +;Hex value: 0.F506874A160EB9C0CCD8313BC H15 + dq 0F506874A160EB9C1H + dw bTAG_VALID,015H-1 diff --git a/private/ntos/dll/i386/emulator.asm b/private/ntos/dll/i386/emulator.asm new file mode 100644 index 000000000..73a2c1c36 --- /dev/null +++ b/private/ntos/dll/i386/emulator.asm @@ -0,0 +1,267 @@ + page 78,132 + title emulator - 80387 emulator for flat 32-bit OS +;******************************************************************************* +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;emulator.asm - 80387 emulator +; by Tim Paterson +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; [] 11/13/92 JWM Bug fixes for esp-indexed addressing, handling of denormals. +; [] 01/18/93 JWM Bug fixes for preservation of condition & error codes. +; +;******************************************************************************* + + .386p + .387 + .model flat,Pascal + option oldstructs ;JWM + +;******************************************************************************* +; +; Define segments. +; +;******************************************************************************* + + +;These equates give access to the program that's using floating point. +dseg equ ss ;Segment of program's data +cseg equ es ;Segment of program's code + +edata segment dword public 'FAR_DATA' +edata ends + +ecode segment dword public 'CODE' +ecode ends + + +assume cs:ecode + +ifdef NT386 +assume ds:nothing +assume fs:edata +else +assume ds:edata +assume fs:nothing +endif + +assume es:nothing +assume gs:nothing +assume ss:nothing + +ifdef NT386 + include ks386.inc + include nt386npx.inc + include callconv.inc + include ..\..\vdm\i386\vdmtb.inc +endif ; NT386 + +;******************************************************************************* +; +; List external functions. +; +;******************************************************************************* + +ifdef NT386 + EXTRNP _NtRaiseException,3 + EXTRNP _RtlRaiseStatus,1 + EXTRNP _ZwRaiseException,3 + EXTRNP _NpxNpSkipInstruction,1 +endif ; NT386 + +ifdef _DOS32EXT + extern _SelKrnGetEmulData:NEAR + extern DOS32RAISEEXCEPTION:NEAR +endif ; _DOS32EXT + +ifdef _CRUISER + extern DOS32IRAISEEXCEPTION:near +endif ; CRUISER + + +;******************************************************************************* +; +; Segment override macro (for NT) +; +;******************************************************************************* + +ifdef NT386 + EMSEG EQU FS +else + EMSEG EQU DS +endif + +;;******************************************************************************* +;; +;; Include some more macros and constants. +;; +;;******************************************************************************* +; + include em387.inc + include emstack.inc ; stack management macros +;************************************************************************** +;************************************************************************** +;************************************************************************** +subttl emulator.asm - Emulator Task DATA Segment +page +;*********************************************************************; +; ; +; Emulator Task DATA Segment ; +; ; +;*********************************************************************; + +edata segment + +ifdef NT386 + db size EmulatorTebData dup (?) ; Make space for varibles +else ; ifdef NT386 + +Numlev equ 8 ; Number of stack registers + +InitControlWord equ 37FH ; Default - Round near, + ; 64 bits, all exceptions masked + +RoundMode dd ? ;Address of rounding routine +SavedRoundMode dd ? ;For restoring RoundMode +ZeroVector dd ? ;Address of sum-to-zero routine +TransRound dd ? ;Round mode w/o precision +Result dd ? ;Result pointer + +PrevCodeOff dd ? +PrevDataOff dd ? + +(See note below on 'Emulator stack area') +CURstk dd ? + +XBEGstk db (Numlev-1)*Reg87Len dup(?) ;Allocate register 1 - 7 + +BEGstk EQU offset edata:XBEGstk +INITstk EQU offset edata:XINITstk +ENDstk EQU offset edata:XENDstk + +FloatTemp db Reg87Len dup(?) +ArgTemp db Reg87Len dup(?) + +public Trap7Handler +Trap7Handler dd 0 + +;We're DWORD aligned at this point + +LongStatusWord label dword ;Combined Einstall, CURerr, StatusWord +.erre Einstall eq $ +.erre StatusWord eq $+1 +.erre CURerr eq $+3 + +Einstall db 0 ; Emulator installed flag + +StatusWord label word + SWerr db ? ; Initially no exceptions (sticky flags) +CurErrCond label word ; Combined error and condition codes + SWcc db ? ; Condition codes from various operations + + CURerr db ? ; initially 8087 exception flags clear + ; this is the internal flag reset after + ; each operation to detect per instruction + ; errors + +LongControlWord label dword ;Combined ControlWord and ErrMask +.erre ControlWord eq $ +.erre ErrMask eq $+2 + +ControlWord label word + CWmask db ? ; exception masks + CWcntl db ? ; arithmetic control flags + + ErrMask db ? + dummy db ? + +endif ; ifdef NT386 else + +;******************************************************************************* +; +; Emulator stack area +; +;The top of stack pointer CURstk is initialized to the last register +;in the list; on a real 8087, this corresponds to hardware register 0. +;The stack grows toward lower addresses, so the first push (which is +;hardware register 7) is stored into the second-to-last slot. This gives +;the following relationship between hardware registers and memory +;locations: +; +; BEGstk --> | reg 1 | (lowest memory address) +; | reg 2 | +; | reg 3 | +; | reg 4 | +; | reg 5 | +; | reg 6 | +; | reg 7 | +; | reg 0 | <-- Initial top of stack (empty) +; ENDstk --> +; +;This means that the wrap-around case on decrementing CURstk will not +;occur until the last (8th) item is pushed. +; +;Note that the physical register numbers are only used in regard to +;the tag word. All other operations are relative the current top. + + +edata ends + +subttl emulator.asm +page +;*********************************************************************; +; ; +; Start of Code Segment ; +; ; +;*********************************************************************; + + +ecode segment + + public __fpemulatorbegin +__fpemulatorbegin equ $ ; emulator really starts here + + include emfinit.asm + include emerror.asm ; error handler + include emdisp.asm ; dispatch tables + + include emf386.asm ; Flat 386 emulation entry + include emdecode.asm ; instruction decoder + + include emarith.asm ; arithmetic dispatcher + include emfadd.asm ; add and subtract + include emfmul.asm ; multiply + include emfdiv.asm ; division + include emround.asm ; rounding + include emload.asm ; load memory operands + include emstore.asm ; store memory operands + include emfmisc.asm ; miscellaneous instructions + include emfcom.asm ; compare + include emfconst.asm ; constant loading + include emlsbcd.asm ; packed BCD conversion + include emxtract.asm ; xtract and scale + include emfprem.asm ; partial remainder + include emtrig.asm ; trig instructions + include emftran.asm ; transcendentals + include emlsenv.asm + include emfsqrt.asm ; square root +ifndef NT386 + include emccall.asm +endif + +UNUSED: +eFSETPM: +eFNOP: +eFENI: +eFDISI: + ret ;Return to EMLFINISH + + + public __fpemulatorend +__fpemulatorend equ $ ; emulator ends here + +ecode ends +END diff --git a/private/ntos/dll/i386/emxtract.asm b/private/ntos/dll/i386/emxtract.asm new file mode 100644 index 000000000..ac8f7a6d1 --- /dev/null +++ b/private/ntos/dll/i386/emxtract.asm @@ -0,0 +1,309 @@ + subttl emxtract - FXTRACT and FSCALE instructions + page +;******************************************************************************* +;emxtract - FXTRACT and FSCALE instructions +; +; Microsoft Confidential +; +; Copyright (c) Microsoft Corporation 1991 +; All Rights Reserved +; +;Inputs: +; edi = [CURstk] +; +;Revision History: +; +; [] 09/05/91 TP Initial 32-bit version. +; +;******************************************************************************* + + +XtractStackOver: + mov EMSEG:[SWcc],C1 ;Flag stack overflow +XtractEmpty: +;Result is two Indefinites (if exception masked) + call StackError ;Put first indefinite at [edi] = ST(0) + jz XtractExit ;Error was unmasked--just exit + mov EMSEG:[CURstk],edi + mov eax,EMSEG:[edi].ExpSgn + mov EMSEG:[esi].ExpSgn,eax + mov eax,EMSEG:[edi].lManHi + mov EMSEG:[esi].lManHi,eax + mov eax,EMSEG:[edi].lManLo + mov EMSEG:[esi].lManLo,eax + ret + + PrevStackWrap edi,Xtract + +EM_ENTRY eFXTRACT +eFXTRACT: +;edi = [CURstk] + mov esi,edi ;Save current ST + PrevStackElem edi,Xtract +;edi = ST(0) +;esi = ST(1) (operand) + mov eax,EMSEG:[esi].ExpSgn +;Exception priority requires reporting stack underflow (i.e., using an EMPTY) +;before stack overflow (i.e., no place for result). Yes, both can happen +;together if they've screwed with the stack! (ST empty when ST(-1) isn't). + cmp al,bTAG_EMPTY ;Is operand empty? + jz XtractEmpty + cmp EMSEG:[edi].bTag,bTAG_EMPTY ;Is there an empty spot? + jnz XtractStackOver + cmp al,bTAG_ZERO ;Is it special? + jae XtractSpclOrZero +XtractNormal: + mov EMSEG:[CURstk],edi +.erre TexpBias eq 0 + movzx ebx,ax ;Zero exponent +;Save mantissa in ST(0) + mov EMSEG:[edi].ExpSgn,ebx + mov ebx,EMSEG:[esi].lManHi + mov EMSEG:[edi].lManHi,ebx + mov ebx,EMSEG:[esi].lManLo + mov EMSEG:[edi].lManLo,ebx + mov edi,esi ;Save ST(1) pointer in edi + shr eax,16 ;Move exponent down + call NormInt16 ;in emload.asm +;mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7, tag in cl + mov EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].lManHi,ebx + mov EMSEG:[edi].ExpSgn,ecx +XtractExit: + ret + +XtractSpcl: + cmp al,bTAG_INF + jz XtractInf + cmp al,bTAG_NAN + jz XtractNAN +;Must be denormal. Change tag to VALID or SNGL. + cmp EMSEG:[esi].lManLo,0 ;Any bits in low half? +.erre bTAG_VALID eq 1 +.erre bTAG_SNGL eq 0 + setnz al ;if low half==0 then al=0 else al=1 + mov EMSEG:[CURerr],Denormal + test EMSEG:[CWmask],Denormal ;Is it masked? + jnz XtractNormal ;If so, ignore denormalization + ret + +XtractSpclOrZero: + ja XtractSpcl +;Operand is zero. Result is ST(0) = 0 (same sign), ST(1) = -infinity + mov EMSEG:[CURerr],ZeroDivide + test EMSEG:[CWmask],ZeroDivide ;Exception masked? + jz XtractExit + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].ExpSgn,eax + mov eax,EMSEG:[esi].lManHi + mov EMSEG:[edi].lManHi,eax + mov eax,EMSEG:[esi].lManLo + mov EMSEG:[edi].lManLo,eax + mov EMSEG:[esi].ExpSgn,(IexpMax-IexpBias+TexpBias) shl 16 + bSign shl 8 + bTAG_INF + mov EMSEG:[esi].bMan7,80H ;Change zero to infinity + ret + +XtractInf: +;Result is ST(0) = infinity (same sign), ST(1) = +infinity + mov EMSEG:[esi].bSgn,0 ;Ensure ST(1) is positive +XtractQNAN: + mov EMSEG:[CURstk],edi + mov EMSEG:[edi].ExpSgn,eax + mov eax,EMSEG:[esi].lManHi + mov EMSEG:[edi].lManHi,eax + mov eax,EMSEG:[esi].lManLo + mov EMSEG:[edi].lManLo,eax + ret + +XtractNAN: +;Result is two QNANs, signal Invalid Operation if SNAN + test EMSEG:[esi].bMan7,40H ;Is it SNAN? + jnz XtractQNAN + mov EMSEG:[CURerr],Invalid + test EMSEG:[CWmask],Invalid + jz XtractExit + or EMSEG:[esi].bMan7,40H ;Change to QNAN + jmp XtractQNAN + +;******************************************************************************* +; +;FSCALE instruction + +;Actual instruction entry point is in emarith.asm + +;Dispatch table for scale +; +;One operand has been loaded into ecx:ebx:esi ("source"), the other is +;pointed to by edi ("dest"). +; +;Tag of source is shifted. Tag values are as follows: + +.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero +.erre TAG_VALID eq 1 +.erre TAG_ZERO eq 2 +.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty + +;Any special case routines not found in this file are in emarith.asm + +tFscaleDisp label dword ;Source (reg) Dest (*[di] = ST) + dd ScaleDouble ;single single + dd ScaleDouble ;single double + dd ScaleX ;single zero + dd ScaleSpclDest ;single special + dd ScaleDouble ;double single + dd ScaleDouble ;double double + dd ScaleX ;double zero + dd ScaleSpclDest ;double special + dd ScaleX ;zero single + dd ScaleX ;zero double + dd ScaleX ;zero zero + dd ScaleSpclDest ;zero special + dd ScaleSpclSource ;special single + dd ScaleSpclSource ;special double + dd ScaleSpclSource ;special zero + dd TwoOpBothSpcl ;special special + dd ScaleTwoInf ;Two infinites + + +;The unmasked response to overflow and underflow with FSCALE is complicated +;by the extreme range it can generate. Normally, the exponent is biased +;by 24,576 in the appropriate direction to bring it back into range. +;This may not be enough, however. If it isn't, a result of infinity +;(with the correct sign) is returned for overflow, regardless of the +;rounding mode. For underflow, zero (with the correct sign) is returned, +;even if it could be represented as a denormal. This may be the only +;operation in which the unmasked response destroys the operands beyond +;recovery. + +BigScale: +;Scale factor is much too big. Just shift mantissa right two bits to get +;MSB out of sign bit and ensure no overflow when we add. + mov cl,2 ;Always shift 2 bits if it's big + jmp ScaleCont + +ScaleDouble: +;ebx:esi = ST(1) mantissa +;ecx = ST(1) sign in bit 15, exponent in high half +;edi = pointer to ST(0) + rol ecx,16 ;Bring exponent down, sign to top + or ch,ch ;Check sign of exponent + js ScaleX ;No work if less than zero + cmp cx,30 ;Scale factor exceed 30 bits? + jge BigScale + not cl ;cl = amount to shift right (mod 32) +ScaleCont: + shr ebx,cl ;ebx = exponent adjustment for ST(0) +;Use two's complement if negative (complement and increment) + mov eax,ecx + cdq ;Extend sign through edx + xor ebx,edx ;Complement if negative + sub ebx,edx ;Increment if negative +;Scale exponent + movsx eax,EMSEG:[edi].wExp ;Get exponent to adjust + add eax,ebx ;Can't overflow + cmp eax,IexpMax-IexpBias ;Within normal range? + jge ScaleOverflow + cmp eax,IexpMin-IexpBias + jle ScaleUnderflow +SaveScaledExp: +;Result fit withing normal range + mov EMSEG:[edi].wExp,ax ;Update exponent of ST(0) +ScaleX: + ret + +ScaleOverflow: +;eax = exponent that's too big + mov EMSEG:[CURerr],Overflow + test EMSEG:[CWmask],Overflow ;Is exception unmasked? + jz UnmaskedScaleOvfl +;Produce masked overflow response + mov al,EMSEG:[CWcntl] ;Get rounding control + mov ah,al +;Return max value if RCup bit = 1 and -, or RCdown bit = 1 and + +;i.e., RCup & sign OR RCdown & not sign +.erre RCchop eq RCup + RCdown ;Always return max value +.erre RCnear eq 0 ;Never return max value + sar ch,7 ;Expand sign through whole byte +.erre (RCdown and bSign) eq 0 ;Don't want to change real sign + xor ch,RCdown ;Flip sign for RCdown bit + and ah,ch ;RCup & sign OR RCdown & not sign + jz ScaleToInfinity ;Save Infinity +;Get max value + sub ecx,1 shl 16 ;Drop exponent by 1 + xor esi,esi + dec esi ;esi == -1 + mov ebx,esi +SaveScaleMax: + mov EMSEG:[edi].lManLo,esi + mov EMSEG:[edi].lManHi,ebx + mov EMSEG:[edi].ExpSgn,ecx + ret + +UnmaskedScaleOvfl: + sub eax,UnderBias ;Unmasked response + cmp eax,IexpMax-IexpBias ;Within normal range now? + jl SaveScaledExp ;Use exponent biased by 24K +ScaleToInfinity: + mov ebx,1 shl 31 + xor esi,esi + mov ecx,(IexpMax-IexpBias+TexpBias) shl 16 + bTAG_INF + mov ch,EMSEG:[edi].bSgn ;Give it same sign + jmp SaveScaleMax ;Use infinity + +ScaleUnderflow: +;eax = exponent that's too big + test EMSEG:[CWmask],Underflow ;Is exception unmasked? + jz ScaleSetUnder + cmp eax,-32768 ;Does exponent fit in 16 bits? + jg @F + mov ax,-32768 ;Max value +@@: +;Set up for denormalizer + mov ebx,EMSEG:[edi].lManHi + mov esi,EMSEG:[edi].lManLo + shrd ecx,eax,16 ;Move exponent to high end of ecx + mov ch,EMSEG:[edi].bSgn ;Keep sign + xor eax,eax ;No sticky bits + mov EMSEG:[Result],edi + jmp Denormalize ;In emround.asm + +ScaleSetUnder: +;Underflow exception not masked. Adjust exponent and try again. + mov EMSEG:[CURerr],Underflow + add eax,UnderBias ;Unmasked response + cmp eax,IexpMin-IexpBias ;Within normal range now? + jg SaveScaledExp ;Use exponent biased by 24K + mov EMSEG:[CURerr],Underflow +ScaleToZero: + mov ecx,bTAG_ZERO + mov ch,EMSEG:[edi].bSgn ;Give it same sign + xor ebx,ebx + mov esi,ebx + jmp SaveScaleMax ;Set to zero + +;*** +ScaleSpclDest: + mov al,EMSEG:[edi].bTag ;Pick up tag + cmp al,bTAG_INF ;Scaling infinity? + jz ScaleRet ;No change if so + jmp SpclDest ;In emarith.asm + +ScaleRet: + ret + +;*** +ScaleSpclSource: + cmp cl,bTAG_INF ;Scaling by infinity? + jnz SpclSource ;in emarith.asm + or ch,ch ;Scaling by -infinity? + js ScaleToZero + cmp EMSEG:[edi].bTag,bTAG_ZERO ;Zero scaled by +infinity? + jnz ScaleToInfinity + jmp ReturnIndefinite ;Invalid operation + +;*** +ScaleTwoInf: + or ch,ch ;Scaling by +infinity? + jns ScaleRet ;All done then +;Scaling infinity by -infinity + jmp ReturnIndefinite ;Invalid operation diff --git a/private/ntos/dll/i386/ldrthunk.asm b/private/ntos/dll/i386/ldrthunk.asm new file mode 100644 index 000000000..617c0ef00 --- /dev/null +++ b/private/ntos/dll/i386/ldrthunk.asm @@ -0,0 +1,89 @@ + title "LdrInitializeThunk" +;++ +; +; Copyright (c) 1989 Microsoft Corporation +; +; Module Name: +; +; ldrthunk.s +; +; Abstract: +; +; This module implements the thunk for the LdrpInitialize APC routine. +; +; Author: +; +; Steven R. Wood (stevewo) 27-Apr-1990 +; +; Environment: +; +; Any mode. +; +; Revision History: +; +;-- + +.386p + .xlist +include ks386.inc +include callconv.inc ; calling convention macros + .list + + EXTRNP _LdrpInitialize,3 + +_TEXT SEGMENT DWORD PUBLIC 'CODE' + ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING + + page , 132 + +;++ +; +; VOID +; LdrInitializeThunk( +; IN PVOID NormalContext, +; IN PVOID SystemArgument1, +; IN PVOID SystemArgument2 +; ) +; +; Routine Description: +; +; This function computes a pointer to the context record on the stack +; and jumps to the LdrpInitialize function with that pointer as its +; parameter. +; +; Arguments: +; +; NormalContext - User Mode APC context parameter (ignored). +; +; SystemArgument1 - User Mode APC system argument 1 (ignored). +; +; SystemArgument2 - User Mode APC system argument 2 (ignored). +; +; Return Value: +; +; None. +; +;-- + +cPublicProc _LdrInitializeThunk , 4 + +NormalContext equ [esp + 4] +SystemArgument1 equ [esp + 8] +SystemArgument2 equ [esp + 12] +Context equ [esp + 16] + + lea eax,Context ; Calculate address of context record + mov NormalContext,eax ; Pass as first parameter to +if DEVL + xor ebp,ebp ; Mark end of frame pointer list +endif +IFDEF STD_CALL + jmp _LdrpInitialize@12 ; LdrpInitialize +ELSE + jmp _LdrpInitialize ; LdrpInitialize +ENDIF + +stdENDP _LdrInitializeThunk + +_TEXT ends + end diff --git a/private/ntos/dll/i386/npxemltr.def b/private/ntos/dll/i386/npxemltr.def new file mode 100644 index 000000000..0ddc11dbc --- /dev/null +++ b/private/ntos/dll/i386/npxemltr.def @@ -0,0 +1,17 @@ +; SCCSID = @(#)npxemltr.def 13.2 89/11/15 +; +; IBM/Microsoft Confidential +; +; Copyright (c) IBM Corporation 1987, 1989 +; Copyright (c) Microsoft Corporation 1987, 1989 +; +; All Rights Reserved +LIBRARY npxemltr + +DESCRIPTION 'OS/2 V2.0 80387 emulator. Version 6.00.001' + +DATA NONE + +EXPORTS + NPXNPHandler + NPXFPDataSize diff --git a/private/ntos/dll/i386/npxnp.c b/private/ntos/dll/i386/npxnp.c new file mode 100644 index 000000000..745709033 --- /dev/null +++ b/private/ntos/dll/i386/npxnp.c @@ -0,0 +1,204 @@ +/*++ + +Copyright (c) 1989 Microsoft Corporation + +Module Name: + + npxnp.c + +Abstract: + + This module contains support for non-Flat mode NPX faults when + the application has it's CR0_EM bit clear. + +Author: + + Ken Reneris (kenr) 8-Dec-1994 + +Environment: + + User Mode only + +Revision History: + +--*/ + + +#include "csrdll.h" + +static UCHAR MOD16[] = { 0, 1, 2, 0 }; +static UCHAR MOD32[] = { 0, 1, 4, 0 }; + +UCHAR +NpxNpReadCSEip ( + IN PCONTEXT Context + ) +#pragma warning(disable:4035) +{ + _asm { + push es + mov ecx, Context + mov eax, [ecx] CONTEXT.SegCs + mov es, ax + mov eax, [ecx] CONTEXT.Eip + inc dword ptr [ecx] CONTEXT.Eip ; Advance EIP + mov al, es:[eax] + pop es + } +} +#pragma warning(default:4035) + + +VOID +NpxNpSkipInstruction ( + IN PCONTEXT Context + ) +/*++ + +Routine Description: + + This functions gains control when the system has no installed + NPX support, but the thread has cleared it's EM bit in CR0. + + The purpose of this function is to move the instruction + pointer forward over the current NPX instruction. + +Enviroment: + + 16:16 mode + +Arguments: + +Return Value: + +--*/ +{ + BOOLEAN fPrefix; + UCHAR ibyte, Mod, rm; + UCHAR Address32Bits; + ULONG CallerCs; + + Address32Bits = 0; // assume called from 16:16 + + // + // Lookup and determine callers default mode + // + + CallerCs = Context->SegCs; + _asm { + mov eax, CallerCs + lar eax, eax + test eax, 400000h + jz short IsDefault16Bit + + mov Address32Bits, 1 + +IsDefault16Bit: + } + + // + // No sense in using a try-except since we are not on the + // correct stack. A fault here could occur if the start + // of an NPX instruction is near the end of a selector, and the + // end of the instruction is past the selectors end. This + // would kill the app anyway. + // + + // + // Read any instruction prefixes + // + + fPrefix = TRUE; + while (fPrefix) { + ibyte = NpxNpReadCSEip(Context); + + switch (ibyte) { + case 0x2e: // cs override, skip it + case 0x36: // ss override, skip it + case 0x3e: // ds override, skip it + case 0x26: // es override, skip it + case 0x64: // fs override, skip it + case 0x65: // gs override, skip it + case 0x66: // operand size override, skip it + break; + + case 0x67: + // address size override + Address32Bits ^= 1; + break; + + default: + fPrefix = FALSE; + break; + } + } + + // + // Handle first byte of NPX instruction + // + + if (ibyte == 0x9b) { + + // + // FWait instruction - single byte opcode - all done + // + + return; + } + + if (ibyte < 0xD8 || ibyte > 0xDF) { + + // + // Not an ESC instruction + // + +#if DBG + DbgPrint ("P5_FPU_PATCH: 16: Not NPX ESC instruction\n"); +#endif + return; + } + + // + // Get ModR/M byte for NPX opcode + // + + ibyte = NpxNpReadCSEip(Context); + + if (ibyte > 0xbf) { + // + // Outside of ModR/M range for addressing, all done + // + + return; + } + + Mod = ibyte >> 6; + rm = ibyte & 0x7; + if (Address32Bits) { + Context->Eip += MOD32 [Mod]; + if (Mod == 0 && rm == 5) { + // disp 32 + Context->Eip += 4; + } + + // + // If SIB byte, read it + // + + if (rm == 4) { + ibyte = NpxNpReadCSEip(Context); + + if (Mod == 0 && (ibyte & 7) == 5) { + // disp 32 + Context->Eip += 4; + } + } + + } else { + Context->Eip += MOD16 [Mod]; + if (Mod == 0 && rm == 6) { + // disp 16 + Context->Eip += 2; + } + } +} diff --git a/private/ntos/dll/i386/nt386npx.inc b/private/ntos/dll/i386/nt386npx.inc new file mode 100644 index 000000000..b1095bc75 --- /dev/null +++ b/private/ntos/dll/i386/nt386npx.inc @@ -0,0 +1,40 @@ +NPX_CONTEXT_FULL EQU 00001000Fh ;/ Full context + +ContextFlags equ CsContextFlags +ctx_env equ CsFloatSave +ctx_stack equ CsFloatSave+FpRegisterArea ;need to change this puppy +ctx_Cr0NpxState equ CsFloatSave+FpCr0NpxState +ctx_SegGs equ CsSegGs +ctx_SegFs equ CsSegFs +ctx_SegEs equ CsSegEs +ctx_SegDs equ CsSegDs +ctx_RegEdi equ CsEdi +ctx_RegEsi equ CsEsi +ctx_RegEbp equ CsEbp +ctx_RegEbx equ CsEbx +ctx_RegEdx equ CsEdx +ctx_RegEcx equ CsEcx +ctx_RegEax equ CsEax +ctx_RegEip equ CsEip +ctx_SegCs equ CsSegCs +ctx_EFlags equ CsEflags +ctx_RegEsp equ CsEsp +ctx_SegSs equ CsSegSs + +XCPT_FLOAT_INVALID_OPERATION EQU STATUS_FLOAT_INVALID_OPERATION +XCPT_FLOAT_DENORMAL_OPERAND EQU STATUS_FLOAT_DENORMAL_OPERAND +XCPT_FLOAT_DIVIDE_BY_ZERO EQU STATUS_FLOAT_DIVIDE_BY_ZERO +XCPT_FLOAT_OVERFLOW EQU STATUS_FLOAT_OVERFLOW +XCPT_FLOAT_UNDERFLOW EQU STATUS_FLOAT_UNDERFLOW +XCPT_FLOAT_INEXACT_RESULT EQU STATUS_FLOAT_INEXACT_RESULT +XCPT_FLOAT_STACK_CHECK EQU STATUS_FLOAT_STACK_CHECK + +ExceptionNum equ ErExceptionCode +FHandlerFlags equ ErExceptionFlags +NestedExceptionReportRecord equ ErExceptionRecord +ExceptionAddress equ ErExceptionAddress +CParameters equ ErNumberParameters + + +Em87Busy equ 1 +Em87Idle equ 0 diff --git a/private/ntos/dll/i386/ntnapntr.asm b/private/ntos/dll/i386/ntnapntr.asm new file mode 100644 index 000000000..3c1c425f3 --- /dev/null +++ b/private/ntos/dll/i386/ntnapntr.asm @@ -0,0 +1,449 @@ +;++ +; +; Copyright (c) 1991 Microsoft Corporation +; +; Module Name: +; +; ntnap.asm +; +; Abstract: +; +; This module implements the system service dispatch procedure. +; It also creates a "profile" of each service by counting and +; timing calls. +; +; Author: +; +; Russ Blake (russbl) 22-Apr-1991 +; +; Environment: +; +; User or kernel mode. +; +; Revision History: +; +;-- + +include ks386.inc +include callconv.inc ; calling convention macros +include mac386.inc +include ntnap.inc + +.386 + +EXTRN _NapDllInit:near +EXTRN _NapRecordInfo:near + +NapStart equ [ebp - 08h] +NapEnd equ [ebp - 010h] +NapServiceNum equ [ebp - 014h] + +NapLocalSize equ 4 * 5 + +NapCalSrvNum equ 0FFFFFFFFh + +;++ +; +; Routine Description: +; +; This routine is called to save registers during API profiling. +; The objecttive is to preserve the caller's environment +; while timing takes place and, once, while dll initialization +; takes place. This routine svaes registers on the stack to +; permit recursivce calls. +; +; There should be a matching call to NapRestoreRegs to restore +; the registers. +; +; Arguments: +; +; All registers. +; +; Return Value: +; +; None. All registers are preserved on the stack. +; +;-- + + +.386p + +_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' + ASSUME CS:FLAT, DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING + +cPublicProc _NapSaveRegs + + ; + ; This is how the stack looks like upon entering this routine: + ; + ; ---+----+----+----+----+---- + ; | Return Address | + ; ---+----+----+----+----+---- + ; esp+ esp+ + ; 0 4 + ; + ; + ; -> popping makes esp go -> + ; <- pushing makes esp go <- + ; + + push ebp + mov ebp,esp ; Remember where we are during this stuff + ; ebp = Original esp - 4 + push eax + push ebx + push ecx + push edx + push esi + push edi + pushfd + push ds + push es + push ss + push fs + push gs + + mov eax,[ebp+4] ; Grab Return Address + push eax ; Put Return Address on Stack + mov ebp,[ebp+0] ; Restore original ebp + + ; + ; This is how the stack looks like just before executing RET: + ; + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | Return Address | g s | f s | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; 0 + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | s s | e s | d s | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; c + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | eflags | edi | esi | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | edx | ecx | ebx | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; +----+----+----+----+----+----+----+----+----+----+----+----+---- + ; | eax | original ebp | Return Address | + ; +----+----+----+----+----+----+----+----+----+----+----+----+---- + ; was + ; ebp+ + ; 0 + ; + + stdRET _NapSaveRegs + +stdENDP _NapSaveRegs + +cPublicProc _NapRestoreRegs,,near + + ; + ; This is how the stack looks like upon entering this routine: + ; + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | Return Address | g s | f s | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; 0 + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | s s | e s | d s | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; c + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | eflags | edi | esi | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; 18 + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; | edx | ecx | ebx | + ; +----+----+----+----+----+----+----+----+----+----+----+----+ + ; esp+ + ; 24 + ; + ; +----+----+----+----+----+----+----+----+----+----+----+----+---- + ; | eax | original ebp | Return Address | + ; +----+----+----+----+----+----+----+----+----+----+----+----+---- + ; esp+ esp+ esp+ + ; 30 34 38 + ; + pop eax ; Get Return Address + push ebp ; Save a temporary copy of original BP + mov ebp,esp ; BP = Original SP + 4 + + mov [ebp+038h],eax ; Put Return Address on Stack + pop eax ; Get Original BP + mov [ebp+034h],eax ; Put it in the original BP place + + pop gs + pop fs + pop ss + pop es + pop ds + popfd + pop edi + pop esi + pop edx + pop ecx + pop ebx + pop eax + pop ebp + + stdRET _NapRestoreRegs + +stdENDP _NapRestoreRegs + + +;++ +; +; Routine Description: +; +; This routine is called by the initialization code in the +; Nt Api Profiler to calibrate the cost of profiling. +; It simulates the overhead of a profiled call to a system +; service, but carefully avoids doing any of the normal +; work associated with such a call. +; +; NOTE: This routine's code should exactly parallel that of +; _NapDispatch, except for any operation normally +; (i.e., when not profiling) executed to call a system service. +; This amounts to an "int 2Eh" in the middle of the routine. +; +; Arguments: +; +; eax - Service Number of the routine being called. Must be -1 +; for all calls to this routine. The routine +; _NapRecordInfo notes this value and discards +; the call. +; +; edx - Pointer to the parameters to the Service; ignored by +; this routine. +; +; Return Value: +; +; None. +; +;-- + + +cPublicProc _NapCalibrate , ,near + + + push ebp ; Locals: the value of + mov ebp, esp ; the perf counter before and + sub esp, NapLocalSize ; after the API call + + mov eax, NapCalSrvNum ; special routine number + mov NapServicenum, eax ; is used for calibration + ; can't be passed in eax from + ; C routine, so load it here + ; save the service routine number + + + stdCall _NapSaveRegs ; save register state so call to + ; get counter does not destroy them + + stdCall _NapDllInit ; initialize dll if necessary + +; Now call NtQueryPerformanceCounter to get the starting count; +; Store this locally + + push 0 ; don't need frequency: pass 0 + lea eax, NapStart ; (eax) = pointer to counter + push eax ; pass pointer to counter + mov eax, NapCounterServiceNumber + lea edx, [esp] ; (edx) -> arguments + int 2Eh ; get the current counter value + add esp, 08h ; remove counter parameters + +; Restore caller's registers + + stdCall _NapRestoreRegs + +; We're just calibrating the overhead, so we don't call the system +; service here. + +; Save regsiters so we can complete the profile accounting. + + stdCall _NapSaveRegs + +; Now get the ending counter. + + push 0 ; don't need frequency: pass 0 + lea eax, NapEnd ; (eax) = pointer to counter + push eax ; pass pointer to counter + mov eax, NapCounterServiceNumber + lea edx, [esp] ; (edx) -> arguments + int 2Eh ; get the current counter value + add esp, 08h ; remove counter parameters + +; Compute the time for this call and increment the nukmber of calls. + + lea eax, NapEnd ; pointer to start/end counters + ; ID of this routine + stdCall _NapRecordInfo, <NapServiceNum, eax> + + stdCall _NapRestoreRegs + ; restore caller's registers + leave ; we needed this for pseudo locals + stdRET _NapCalibrate +stdENDP _NapCalibrate + + +;++ +; +; Routine Description: +; +; This routine is called by the USRSTUBS_ENTRY1 MACRO in the +; services.prf to carry out profiling on an Nt system api call. +; +; Arguments: +; +; eax - Service Number of the routine being called. This number +; is assigned by genprof.c from the table in services.tab. +; +; edx - Pointer to the parameters to the Service. +; +; Return Value: +; +; Whatever the system service returns. +; +;-- + + + +cPublicProc _NapProfileDispatch , ,near + + push ebp ; Locals: the value of + mov ebp, esp ; the perf counter before and + sub esp, NapLocalSize ; after the API call + + mov NapServicenum, eax + ; save the service routine number + + stdCall _NapSaveRegs ; save register state so call to + ; get counter does not destroy them + + stdCall _NapDllInit ; initialize dll if necessary + +; Now call NtQueryPerformanceCounter to get the starting count; +; Store this locally + + push 0 ; don't need frequency: pass 0 + lea eax, NapStart ; (eax) = pointer to counter + push eax ; pass pointer to counter + mov eax, NapCounterServiceNumber + lea edx, [esp] ; (edx) -> arguments + int 2Eh ; get the current counter value + add esp, 08h ; remove counter parameters + +; Restore caller's registers + + stdCall _NapRestoreRegs + + INT 2Eh ; invoke system service + +; Save regsiters so we can complete the profile accounting. + + stdCall _NapSaveRegs + +; Now get the ending counter. + + push 0 ; don't need frequency: pass 0 + lea eax, NapEnd ; (eax) = pointer to counter + push eax ; pass pointer to counter + mov eax, NapCounterServiceNumber + lea edx, [esp] ; (edx) -> arguments + int 2Eh ; get the current counter value + add esp, 08h ; remove counter parameters + +; Compute the time for this call and increment the number of calls. + + lea eax, NapEnd ; pointer to start/end counters + ; ID of this routine + stdCall _NapRecordInfo, <NapServiceNum, eax> + + stdCall _NapRestoreRegs + ; restore caller's registers + leave ; we needed this for pseudo locals + stdRET _NapProfileDispatch +stdENDP _NapProfileDispatch + +;++ +; +; Routine Description: +; +; This routine is claled to get the spin lock associated with +; a particular api. It prevents the simultaneous update +; from multiple threads in this or other processors of the +; profiling data for the api. +; +; Arguments: +; +; SpinLockAddr - address of the spin lock within the data +; for the api being updated. +; +; Return Value: +; +; None. +; +;-- + + +cPublicProc _NapAcquireSpinLock , ,near + + push eax + mov eax, [esp+8] ; get address of lock +WaitForLock: + lock bts dword ptr [eax], 0 ; test and set the spinlock + jc SHORT WaitForLock ; spinlock owned: go to SpinLabel + pop eax + + stdRET _NapAcquireSpinLock + +stdENDP _NapAcquireSpinLock + + +;++ +; +; Routine Description: +; +; This routine is called to release the spin lock associated with +; a particular api. +; +; Arguments: +; +; SpinLockAddr - address of the spin lock within the data +; for the api being updated. +; +; Return Value: +; +; None. +; +;-- + + +cPublicProc _NapReleaseSpinLock , ,near + + push eax + mov eax, [esp+8] ; get address of lock + lock btr dword ptr [eax], 0 ; release spinlock + pop eax + stdRET _NapReleaseSpinLock + +stdENDP _NapReleaseSpinLock + + +_TEXT ends + + end diff --git a/private/ntos/dll/i386/sources b/private/ntos/dll/i386/sources new file mode 100644 index 000000000..5e5cd357a --- /dev/null +++ b/private/ntos/dll/i386/sources @@ -0,0 +1,14 @@ +386_DLLLIBOBJECTS=$(BASEDIR)\public\sdk\lib\i386\exsup.lib + +i386_SOURCES=..\i386\critsect.asm \ + ..\i386\ldrthunk.asm \ + ..\i386\emulator.asm \ + ..\i386\npxnp.c \ + i386\usrstubs.asm + +ASM_DEFINES=-DNT386 + +CONDITIONAL_INCLUDES=\ + $(CONDITIONAL_INCLUDES) \ + emccall.asm\ + except32.inc |