summaryrefslogblamecommitdiffstats
path: root/private/ntos/dll/i386/emfprem.asm
blob: 3cb8670bb8fa411495b18f0dbc4b935af465b561 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407






















































































































































































































































































































































































































                                                                                
	subttl emfprem.asm - FPREM and FPREM1 instructions
	page
;*******************************************************************************
;emfprem.asm - FPREM and FPREM1 instructions
;	by Tim Paterson
;
;        Microsoft Confidential
;
;	 Copyright (c) Microsoft Corporation 1991
;	 All Rights Reserved
;
;Inputs:
;	edi = [CURstk]
;	ST(1) loaded into ebx:esi & ecx
;
;Revision History:
;
; []	09/05/91  TP	Initial 32-bit version.
;
;*******************************************************************************

;Dispatch table for remainder
;
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
;pointed to by edi ("dest").  
;
;Tag of source is shifted.  Tag values are as follows:

.erre   TAG_SNGL        eq      0       ;SINGLE: low 32 bits are zero
.erre   TAG_VALID       eq      1
.erre   TAG_ZERO        eq      2
.erre   TAG_SPCL        eq      3       ;NAN, Infinity, Denormal, Empty

;Any special case routines not found in this file are in emarith.asm

					;Divisor	Dividend
tFpremDisp	label	dword		;Source(ST(1))	Dest (ST(0))
	dd	PremDouble		;single		single
	dd	PremDouble		;single		double
	dd	PremX			;single		zero
	dd	PremSpclDest		;single		special
	dd	PremDouble		;double		single
	dd	PremDouble		;double		double
	dd	PremX			;double		zero
	dd	PremSpclDest		;double		special
	dd	ReturnIndefinite	;zero		single
	dd	ReturnIndefinite	;zero		double
	dd	ReturnIndefinite	;zero		zero
	dd	PremSpclDest		;zero		special
	dd	PremSpclSource		;special	single
	dd	PremSpclSource		;special	double
	dd	PremSpclSource		;special	zero
	dd	TwoOpBothSpcl		;special	special
	dd	ReturnIndefinite	;Two infinites


PremSpclDone:
	add	sp,4			;Clean off return address for normal
	ret

;***
PremSpclDest:
	mov	al,EMSEG:[edi].bTag		;Pick up tag
	cmp	al,bTAG_INF		;Dividing infinity?
	jz	ReturnIndefinite	;Invalid operation if so
	jmp	SpclDest		;In emarith.asm

;***
PremSpclSource:
	cmp	cl,bTAG_INF		;Dividing by infinity?
	jnz	SpclSource		;in emarith.asm
PremX:
;Return Dest unchanged, quotient = 0
	mov     EMSEG:[SWcc],0
	ret
;*******************************************************************************

;Map quotient bits to condition codes

Q0	equ	C1
Q1	equ	C3
Q2	equ	C0

MapQuo	label	byte
	db	0
	db	Q0
	db	Q1
	db	Q1+Q0
	db	Q2
	db	Q2+Q0
	db	Q2+Q1
	db	Q2+Q1+Q0

Prem1Cont:

;edx:eax = remainder, normalized
;ebx:esi = divisor
;ebp = quotient
;edi = exponent difference, zero or less
;ecx = 0 (positive sign)
;
;At this point, 0 <= remainder < divisor.  However, for FPREM1 we need
; -divisor/2 <= remainder <= divisor/2.  If remainder = divisor/2, whether
;we choose + or - is dependent on whichever gives us an even quotient
;(the usual IEEE rounding rule).  Quotient must be incremented if we
;use negative remainder.

	cmp	edi,-1
	jl	PremCont		;Remainder < divisor/2
	jg	NegRemainExp0		;Remainder > divisor/2
;Exponent is -1
	cmp	edx,ebx
	jl	PremCont		;Remainder < divisor/2
	jg	NegRemain		;Remainder > divisor/2
	cmp	eax,esi
	jl	PremCont		;Remainder < divisor/2
	jg	NegRemain		;Remainder > divisor/2
;Remainder = divisor/2.  Ensure quotient is even
	test	ebp,1			;Even?
	jz	PremCont
NegRemain:
;Theoretically we subtract divisor from remainder once more, leaving us
;with a negative remainder.  But since we use sign/magnitude representation,
;we want the abs() of that with sign bit set--so subtract remainder from
;(larger) divisor.  Note that exponent difference is -1, so we must align
;binary points first.
	add	esi,esi
	adc	ebx,ebx			;Double divisor to align binary points
NegRemainExp0:
	sub	esi,eax
	sbb	ebx,edx			;Subtract remainder
	mov	eax,esi
	mov	edx,ebx			;Result in edx:eax
	mov	ch,bSign		;Flip sign of remainder
	inc	ebp			;Increase quotient
;Must normalize result of subtraction
	bsr	ecx,edx			;Look for 1 bit
	jnz	@F
	sub	edi,32
	xchg	edx,eax			;Shift left 32 bits
	bsr	ecx,edx
@@:
	lea     edi,[edi+ecx-31]        ;Fix up exponent for normalization
	not     cl
	shld	edx,eax,cl
	shl	eax,cl
	mov	ch,bSign		;Flip sign of remainder

PremCont:
;edx:eax = remainder, normalized
;ebp = quotient
;edi = exponent difference, zero or less
;ch = sign
	or	eax,eax			;Low bits zero?
.erre	bTAG_VALID eq 1
.erre	bTAG_SNGL eq 0
	setnz   cl                      ;if low half==0 then cl=0 else cl=1
	mov	esi,EMSEG:[CURstk]
	mov     ebx,esi
	NextStackElem   ebx,Prem
	add	di,EMSEG:[ebx].wExp		;Compute result exponent
	cmp	di,IexpMin-IexpBias
	jle	PremUnderflow
SavePremResult:
	mov	EMSEG:[esi].lManLo,eax
	xor	EMSEG:[esi].bSgn,ch
	mov	EMSEG:[esi].lManHi,edx
	and	ebp,7			;Keep last 3 bits of quotient only
					;  and give write buffers a break
	mov	EMSEG:[esi].wExp,di
	mov	EMSEG:[esi].bTag,cl
	mov	al,MapQuo[ebp]		;Get cond. codes for this quotient
	mov	EMSEG:[SWcc],al
	ret

	NextStackWrap   ebx,Prem        ;Tied to NextStackElem above

PremUnderflow:
	test	EMSEG:[CWmask],Underflow	;Is exception unmasked?
	jz	UnmaskedPremUnder
	mov	cl,bTAG_DEN
	jmp	SavePremResult

UnmaskedPremUnder:
	add	edi,UnderBias		;Additional exp. bias for unmasked resp.
	or	EMSEG:[CURerr],Underflow
	jmp	SavePremResult

;*******************************************************************************

PremDouble:
;edi = [CURstk]
;ebx:esi = ST(1) mantissa, ecx = ExpSgn

	add	sp,4			;Clean off return address for special
	mov	eax,EMSEG:[edi].lManLo
	mov	edx,EMSEG:[edi].lManHi
	movsx	edi,EMSEG:[edi].wExp
	xor	ebp,ebp			;Quotient, in case we skip stage 1
	sar	ecx,16			;Bring exponent down
	sub	edi,ecx			;Get exponent difference
	jl	ExitPremLoop		;If dividend is smaller, return it.

;FPREM is performed in two stages.  The first stage is used only if the
;exponent difference is greater than 31.  It reduces the exponent difference
;by 32, and repeats until the difference is less than 32.  Note that
;unlike the hardware FPREM instruction, we are not limited to reducing
;the exponent by only 63--we just keep looping until it's done.
;
;The second stage performs ordinary 1-bit-at-a-time long division.
;It stops when the exponent difference is zero, meaning we have an
;integer quotient and the final remainder.
;
;edx:eax = dividend
;ebx:esi = divisor
;edi = exponent difference
;ebp = 0 (initial quotient)

	cmp	edi,32			;Do we need to do stage 1?
	jl	FitDivisor		;No, start stage 2

;FPREM stage 1
;
;Exponent difference is at least 32.  Use 32-bit division to compute
;quotient and exact remainder, reducing exponent difference by 32.

;DIV instruction will overflow if dividend >= divisor.  In this case,
;subtract divisor from dividend to ensure no overflow.  This will change
;the quotient, but that doesn't matter because we only need the last
;3 bits of the quotient (and we're about to calculate 32 quotient bits).
;This subtraction will not affect the remainder.

	sub	eax,esi
	sbb	edx,ebx	
	jnc	FpremReduce32		;Was dividend big?
	add	eax,esi			;Restore dividend, it was smaller
	adc	edx,ebx

;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
;divisor.  If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
;this guess will be no more than 2 larger than the correct value of that
;quotient digit (and never smaller).  Divisor meets magnitude condition 
;because it's normalized.
;
;This loop typically takes 117 clocks.

;edx:eax = dividend
;ebx:esi = divisor
;edi = exponent difference
;ebp = quotient (zero)

FpremReduce32:
;We know that dividend < divisor, but it is still possible that 
;high dividend == high divisor, which will cause the DIV instruction
;to overflow.
	cmp	edx,ebx			;Will DIV instruction overflow?
	jae	PremOvfl
	div	ebx			;Guess a quotient "digit"

;Currently, remainder in edx = dividend - (quotient * high half divisor).
;The definition of remainder is dividend - (quotient * all divisor).  So
;if we subtract (quotient * low half divisor) from edx, we'll get
;the true remainder.  If it's negative, our guess was too big.

	mov	ebp,eax			;Save quotient
	mov	ecx,edx			;Save remainder
	mul	esi			;Quotient * low half divisor
	neg	eax			;Subtract from dividend extended with 0
	sbb	ecx,edx			;Subtract from remainder
	mov	edx,ecx			;Remainder back to edx:eax
	jnc	HavPremQuo		;Was quotient OK?
FpremCorrect:
	dec	ebp			;Quotient was too big
	add	eax,esi			;Add divisor back into remainder
	adc	edx,ebx
	jnc	FpremCorrect		;Repeat if quotient is still too big
HavPremQuo:
	sub	edi,32			;Exponent reduced
	cmp	edi,32			;Exponent difference within 31?
	jl	PremNormalize		;Do it a bit a time
	or	edx,edx			;Check for zero remainder
	jnz	FpremReduce32
	or	eax,eax			;Remainder 0?
	jz	ExactPrem
	xchg	edx,eax			;Shift left 32 bits
	sub	edi,32			;Another 32 bits reduced
	cmp	edi,32
	jge	FpremReduce32
	xor	ebp,ebp			;No quotient bits are valid
	jmp	PremNormalize

PremOvfl:
;edx:eax = dividend
;ebx:esi = divisor
;On exit, ebp = second quotient "digit"
;
;Come here if divide instruction would overflow. This must mean that edx == ebx,
;i.e., the high halves of the dividend and divisor are equal. Assume a result
;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
;and divisor are equal, dividend - divisor * 2^32 can be computed by
;subtracting only the low halves. When adding divisor (in ebx) to this, note
;that edx == ebx, and we want the result in edx anyway.
;
;Note also that since dividend < divisor, the
;dividend - divisor * 2^32 calculation must always be negative. Thus the 
;addition of divisor back to it should generate a carry if it goes positive.

	mov	ebp,-1			;Max quotient digit
	sub	eax,esi			;Calculate correct remainder
	add	edx,eax			;Should set CY if quotient fit
	mov	eax,esi			;edx:eax has new remainder
	jc	HavPremQuo		;Remainder was positive
	jmp	FpremCorrect

ExactPrem:
;eax = 0
	mov	esi,EMSEG:[CURstk]
	mov	EMSEG:[esi].lManLo,eax
	mov	EMSEG:[esi].lManHi,eax
	add	sp,4			;Clean off first return address
	mov	EMSEG:[esi].wExp,ax
	mov	EMSEG:[esi].bTag,bTAG_ZERO
	ret


;FPREM stage 2
;
;Exponent difference is less than 32.  Use restoring long division to
;compute quotient bits until exponent difference is zero.  Note that we
;often get more than one bit/loop:  BSR is used to scan off leading
;zeros each time around.  Since the divisor is normalized, we can
;instantly compute a zero quotient bit for each leading zero bit.
;
;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
;plus 3 clocks/bit (BSR time).  If we had to use this for 32-bit reductions
;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
;instead of the 112 required by stage 1!

FpremLoop:
;edx:eax = dividend (remainder) minus divisor
;ebx:esi = divisor
;ebp = quotient
;edi = exponent difference, less than 32
;
;If R is current remainder and d is divisor, then we have edx:eax = R - d, 
;which is negative.  We want 2*R - d, which is positive.  
;2*R - d = 2*(R - d) + d.
	add	eax,eax			;2*(R - d)
	adc	edx,edx
	add	eax,esi			;2*(R-d) + d = 2*R - d
	adc	edx,ebx	
	add	ebp,ebp			;Double quotient too
	dec	edi			;Decrement exponent difference
DivisorFit:
	inc	ebp			;Count one in quotient
PremNormalize:
	bsr	ecx,edx			;Find first 1 bit
	jz	PremHighZero
	not     cl
	and     cl,1FH                  ;Convert bit no. to shift count
	shld	edx,eax,cl		;Normalize
	shl	eax,cl
	sub	edi,ecx			;Reduce exponent difference
	jl	PremTooFar
	shl	ebp,cl			;Shift quotient
FitDivisor:
;Dividend could be larger or smaller than divisor
	sub	eax,esi
	sbb	edx,ebx
	jnc	DivisorFit
;Couldn't subtract divisor from dividend.
	or	edi,edi			;Is exponent difference zero or less?
	jg	FpremLoop
	add	eax,esi			;Restore dividend
	adc	edx,ebx
	xor	ecx,ecx			;Sign is positive
	ret

PremTooFar:
;Exponent difference in edi went negative when reduced by shift count in ecx.
;We need a quotient corresponding to exponent difference of zero.
	add	ecx,edi			;Restore exponent difference
	shl	ebp,cl			;Fix up quotient
ExitPremLoop:
;edx:eax = remainder, normalized
;ebp = quotient
;edi = exponent difference, zero or less
	xor	ecx,ecx			;Sign is positive
	ret

PremHighZero:
;High half of remainder is all zero, so we've reduced exponent difference
;by 32 bits and overshot.  We need a quotient corresponding to exponent 
;difference of zero, so we just shift it by the original difference.  Then
;we need to normalize the low half remainder.
	mov	ecx,edi
	shl	ebp,cl			;Fix up quotient
	bsr	ecx,eax
	jz	ExactPrem
	lea     edi,[edi+ecx-63]        ;Fix up exponent for normalization
	xchg	eax,edx			;Shift by 32 bits
	not     cl
        shl     edx,cl                  ;Normalize remainder
        xor     ecx,ecx                 ;Sign is positive
        ret