1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
|
subttl emftran.asm - Transcendental instructions
page
;*******************************************************************************
; Copyright (c) Microsoft Corporation 1991
; All Rights Reserved
;
;emftran.asm - Transcendental instructions
; by Tim Paterson
;
;Purpose:
; F2XM1, FPATAN, FYL2X, FYL2XP1 instructions
;Inputs:
; edi = [CURstk]
;
;Revision History:
;
; [] 09/05/91 TP Initial 32-bit version.
;
;*******************************************************************************
;********************* Polynomial Coefficients *********************
;These polynomial coefficients were all taken from "Computer Approximations"
;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
;conversions to hexadecimal were done with a character-string calculator
;written in Visual Basic with precision set to 30 digits. Once the constants
;were typed into this file, all transfers were done with cut-and-paste
;operations to and from the calculator to help eliminate any typographical
;errors.
tAtanPoly label word
;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2),
;accurate to 20.78 digits over interval [0, tan(pi/12)].
dd 4 ;P() is degree four
; Hart constant
;
;+.16241 70218 72227 96595 08 E0
;Hex value: 0.A650A5D5050DE43A2C25A8C00 HFFFE
dq 0A650A5D5050DE43AH
dw bTAG_VALID,0FFFEH-1
;+.65293 76545 29069 63960 675 E1
;Hex value: 0.D0F0A714A9604993AC4AC49A0 H3
dq 0D0F0A714A9604994H
dw bTAG_VALID,03H-1
;+.39072 57269 45281 71734 92684 E2
;Hex value: 0.9C4A507F16530AC3CDDEFA3DE H6
dq 09C4A507F16530AC4H
dw bTAG_VALID,06H-1
;+.72468 55912 17450 17145 90416 9 E2
;Hex value: 0.90EFE6FB30465042CF089D1310 H7
dq 090EFE6FB30465043H
dw bTAG_VALID,07H-1
;+.41066 29181 34876 24224 77349 62 E2
;Hex value: 0.A443E2004BB000B84A5154D44 H6
dq 0A443E2004BB000B8H
dw bTAG_VALID,06H-1
dd 4 ;Q() is degree four
; Hart constant
;
;+.15023 99905 56978 85827 4928 E2
;Hex value: 0.F0624CD575B782643AFB912D0 H4
dq 0F0624CD575B78264H
dw bTAG_VALID,04H-1
;+.59578 42201 83554 49303 22456 E2
;Hex value: 0.EE504DDC907DEAEB7D7473B82 H6
dq 0EE504DDC907DEAEBH
dw bTAG_VALID,06H-1
;+.86157 32305 95742 25062 42472 E2
;Hex value: 0.AC508CA5E78E504AB2032E864 H7
dq 0AC508CA5E78E504BH
dw bTAG_VALID,07H-1
;+.41066 29181 34876 24224 84140 84 E2
;Hex value: 0.A443E2004BB000B84F542813C H6
dq 0A443E2004BB000B8H
dw bTAG_VALID,06H-1
;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3)
;= 0.26794 91924 31122 70647 25536 58494 12763 ;From Hart appendix
;Hex value: 0.8930A2F4F66AB189B517A51F2 HFFFF
Tan15Hi equ 08930A2F4H
Tan15Lo equ 0F66AB18AH
Tan15exp equ 0FFFFH-1
;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236 ;From Hart appendix
;Hex value: 0.DDB3D742C265539D92BA16B8 H1
Sqrt3Hi equ 0DDB3D742H
Sqrt3Lo equ 0C265539EH
Sqrt3exp equ 01H-1
;pi = +3.14159265358979323846264338328
;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
PiHi equ 0C90FDAA2H
PiLo equ 02168C235H
PiExp equ 02H-1
;3*pi = +9.42477796076937971538793014984
;Hex value: 0.96CBE3F9990E91A79394C9E890 H4
XThreePiHi equ 096CBE3F9H
XThreePiMid equ 0990E91A7H
XThreePiLo equ 090000000H
ThreePiExp equ 04H-1
;This is a table of multiples of pi/6. It is used to adjust the
;final result angle after atan(). Derived from Hart appendix
;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612
;
;When the reduced argument for atan() is very small, these correction
;constants simply become the result. These constants have all been
;rounded to nearest, but the user may have selected a different rounding
;mode. The tag byte is not needed for these constants, so its space
;is used to indicate if it was rounded. To determine if a constant
;was rounded, 7FH is subtracted from this flag; CY set means it was
;rounded up.
RoundedUp equ 040H
RoundedDown equ 0C0H
tAtanPiFrac label dword
;pi/2 = +1.57079632679489661923132169163
;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
dq 0C90FDAA22168C235H
dw RoundedUp,01H-1
;2*pi/3 = +2.09439510239319549230842892218
;Hex value: 0.860A91C16B9B2C232DD997078 H2
dq 0860A91C16B9B2C23H
dw RoundedDown,02H-1
;none
dd 0,0,0
;pi/6 = +0.523598775598298873077107230544E0
;Hex value: 0.860A91C16B9B2C232DD99707A H0
dq 0860A91C16B9B2C23H
dw RoundedDown,00H-1
;pi/2 = +1.57079632679489661923132169163
;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
dq 0C90FDAA22168C235H
dw RoundedUp,01H-1
;pi/3 = +1.04719755119659774615421446109
;Hex value: 0.860A91C16B9B2C232DD997078 H1
dq 0860A91C16B9B2C23H
dw RoundedDown,01H-1
;pi = +3.14159265358979323846264338328
;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
dq 0C90FDAA22168C235H
dw RoundedUp,02H-1
;5*pi/6 = +2.61799387799149436538553615272
;Hex value: 0.A78D3631C681F72BF94FFCC96 H2
dq 0A78D3631C681F72CH
dw RoundedUp,02H-1
;*********************
tExpPoly label word
;These constants are from Hart #1324: 2^x - 1 =
; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) )
;accurate to 21.54 digits over interval [0, 0.5].
dd 2 ;P() is degree two
; Hart constant
;
;+.60613 30790 74800 42574 84896 07 E2
;Hex value: 0.F27406FCF405189818F68BB78 H6
dq 0F27406FCF4051898H
dw bTAG_VALID,06H-1
;+.30285 61978 21164 59206 24269 927 E5
;Hex value: 0.EC9B3D5414E1AD0852E432A18 HF
dq 0EC9B3D5414E1AD08H
dw bTAG_VALID,0FH-1
;+.20802 83036 50596 27128 55955 242 E7
;Hex value: 0.FDF0D84AC3A35FAF89A690CC4 H15
dq 0FDF0D84AC3A35FB0H
dw bTAG_VALID,015H-1
dd 3 ;Q() is degree three. First
;coefficient is 1.0 and is not listed.
; Hart constant
;
;+.17492 20769 51057 14558 99141 717 E4
;Hex value: 0.DAA7108B387B776F212ECFBEC HB
dq 0DAA7108B387B776FH
dw bTAG_VALID,0BH-1
;+.32770 95471 93281 18053 40200 719 E6
;Hex value: 0.A003B1829B7BE85CC81BD5309 H13
dq 0A003B1829B7BE85DH
dw bTAG_VALID,013H-1
;+.60024 28040 82517 36653 36946 908 E7
;Hex value: 0.B72DF814E709837E066855BDD H17
dq 0B72DF814E709837EH
dw bTAG_VALID,017H-1
;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808 ;From Hart appendix
;Hex value: 0.B504F333F9DE6484597D89B30 H1
Sqrt2Hi equ 0B504F333H
Sqrt2Lo equ 0F9DE6484H
Sqrt2Exp equ 01H-1
;sqrt(2) - 1 = +0.4142135623730950488016887242E0
;Hex value: 0.D413CCCFE779921165F626CC4 HFFFF
Sqrt2m1Hi equ 0D413CCCFH
Sqrt2m1Lo equ 0E7799211H
XSqrt2m1Lo equ 060000000H
Sqrt2m1Exp equ 0FFFFH-1
;2 - sqrt(2) = +0.5857864376269049511983112758E0
;Hex value: 0.95F619980C4336F74D04EC9A0 H0
TwoMinusSqrt2Hi equ 095F61998H
TwoMinusSqrt2Lo equ 00C4336F7H
TwoMinusSqrt2Exp equ 00H-1
;*********************
tLogPoly label dword
;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2),
; z = (x+1) / (x-1) accurate to 19.74 digits over interval
;[1/sqrt(2), sqrt(2)]. The original Hart coefficients were for log10();
;the P() coefficients have been scaled by log2(10) to compute log2().
;
;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017 ;From Hart appendix
dd 3 ;P() is degree three
; Original Hart constant Scaled value
;
;+.18287 59212 09199 9337 E0 +0.607500660543248917834110566373E0
;Hex value: 0.9B8529CD54E72022A12BAEC53 H0
dq 09B8529CD54E72023H
dw bTAG_VALID,00H-1
;-.41855 96001 31266 20633 E1 -13.9042489506087332809657007634
;Hex value: 0.DE77CDBF64E8C53F0DCD458D0 H4
dq 0DE77CDBF64E8C53FH
dw bSign shl 8 + bTAG_VALID,04H-1
;+.13444 58152 27503 62236 E2 +44.6619330844279438866067340334
;Hex value: 0.B2A5D1C95708A0C9FE50F6F97 H6
dq 0B2A5D1C95708A0CAH
dw bTAG_VALID,06H-1
;-.10429 11213 72526 69497 44122 E2 -34.6447606134704282123622236943
;Hex value: 0.8A943C20526AE439A98B30F6A H6
dq 08A943C20526AE43AH
dw bSign shl 8 + bTAG_VALID,06H-1
dd 3 ;Q() is degree three. First
;coefficient is 1.0 and is not listed.
; Hart constant
;
;-.89111 09060 90270 85654 E1
;Hex value: 0.8E93E7183AA998D74F45CDFF0 H4
dq 08E93E7183AA998D7H
dw bSign shl 8 + bTAG_VALID,04H-1
;+.19480 96618 79809 36524 155 E2
;Hex value: 0.9BD904CCFEE118D4BEF319716 H5
dq 09BD904CCFEE118D5H
dw bTAG_VALID,05H-1
;-.12006 95907 02006 34243 4218 E2
;Hex value: 0.C01C811D2EC1B5806304B1858 H4
dq 0C01C811D2EC1B580H
dw bSign shl 8 + bTAG_VALID,04H-1
;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213 ;From Hart appendix
;Hex value: 0.B8AA3B295C17F0BBBE87FED04 H1
Log2OfEHi equ 0B8AA3B29H
Log2OfELo equ 05C17F0BCH
Log2OfEexp equ 01H-1
;********************* Generic polynomial evaluation *********************
;
;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly
;
;Inputs:
; ebx:esi,ecx = floating point number, internal format
; edi = pointer to polynomial degree and coefficients
;Outputs:
; result in ebx:esi,ecx
; edi incremented to start of last coefficient in list
;
;EvalPoly is the basic polynomial evaluator, using Horner's rule. The
;polynomial pointer in edi points to a list: the first dword in the list
;is the degree of the polynomial (n); it is followed by the n+1
;coefficients in internal (12-byte) format. The argment for EvalPoly
;must be stored in the static FloatTemp in addition to being in
;registers.
;
;EvalPolyAdd is an alternate entry point into the middle of EvalPoly.
;It is used when the first coefficient is 1.0, so it skips the first
;multiplication. It requires that the degree of the polynomial be
;already loaded into ebp.
;
;EvalPolySetup store a copy of the argument in the static ArgTemp,
;and stores the square of the argument in the static FloatTemp.
;Then it falls into EvalPoly to evaluate the polynomial on the square.
;
;Eval2Poly evaluate two polynomials on its argument. The first
;polynomial is x * P(x^2), and its result is left at [[CURstk]].
;The second polynomial is Q(x^2), and its result is left in registers.
;The most significant coefficient of Q() is 1.
;
;Polynomial evaluation uses a slight variation on the standard add
;and multiply routines. PolyAddDouble and PolyMulDouble both check
;to see if the argument in registers (the current accumulation) is
;zero. The argument pointed to by edi is a coefficient and is never
;zero.
;
;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped",
;i.e., redirected to special handlers for polynomial evaluation.
;[RoundMode] ordinarily points to the routine that handles the
;the current rounding mode and precision control; however, during
;polynomial evaluation, we always want full precision and round
;nearest. The normal rounding routines also store their result
;at [[Result]], but we want the result left in registers.
;[ZeroVector] exists solely so polynomial evaluation can trap
;when AddDouble results of zero. The normal response is to store
;a zero at [[Result]], but we need the zero left in registers.
;PolyRound and PolyZero handle these traps.
EvalPolySetup:
;Save x in ArgTemp
mov EMSEG:[ArgTemp].ExpSgn,ecx
mov EMSEG:[ArgTemp].lManHi,ebx
mov EMSEG:[ArgTemp].lManLo,esi
mov EMSEG:[RoundMode],offset PolyRound
mov EMSEG:[ZeroVector],offset PolyZero
push edi ;Save pointer to polynomials
;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
mov edx,ebx
mov edi,esi
mov eax,ecx
;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
call MulDoubleReg ;Compute x^2
;Save x^2 in FloatTemp
mov EMSEG:[FloatTemp].ExpSgn,ecx
mov EMSEG:[FloatTemp].lManHi,ebx
mov EMSEG:[FloatTemp].lManLo,esi
pop edi
EvalPoly:
;ebx:esi,ecx = arg to evaluate, also in FloatTemp
;edi = pointer to degree and list of coefficients.
push edi
mov eax,cs:[edi+4].ExpSgn
mov edx,cs:[edi+4].lManHi
mov edi,cs:[edi+4].lManLo
call MulDoubleReg ;Multiply arg by first coef.
pop edi
mov ebp,cs:[edi] ;Get polynomial degree
add edi,4+Reg87Len ;Point to second coefficient
jmp EvalPolyAdd
PolyLoop:
push ebp ;Save loop count
ifdef NT386
mov edi,YFloatTemp
else
mov edi,offset edata:FloatTemp
endif
call PolyMulDouble
pop ebp
pop edi
add di,Reg87Len
EvalPolyAdd:
push edi
mov eax,cs:[edi].ExpSgn
mov edx,cs:[edi].lManHi
mov edi,cs:[edi].lManLo
cmp cl,bTAG_ZERO ;Adding to zero?
jz AddToZero
call AddDoubleReg ;ebp preserved
ContPolyLoop:
dec ebp
jnz PolyLoop
pop edi
ret
AddToZero:
;Number in registers is zero, so just return value from memory.
mov ecx,eax
mov ebx,edx
mov esi,edi
jmp ContPolyLoop
Eval2Poly:
call EvalPolySetup
push edi
ifdef NT386
mov edi,YArgTemp
else
mov edi,offset edata:ArgTemp
endif
call PolyMulDouble ;Multiply first result by argument
pop edi
;Save result of first polynomial at [[CURstk]]
mov edx,EMSEG:[CURstk]
mov EMSEG:[edx].ExpSgn,ecx
mov EMSEG:[edx].lManHi,ebx
mov EMSEG:[edx].lManLo,esi
;Load x^2 back into registers
mov ecx,EMSEG:[FloatTemp].ExpSgn
mov ebx,EMSEG:[FloatTemp].lManHi
mov esi,EMSEG:[FloatTemp].lManLo
;Start second polynomial evaluation
add edi,4+Reg87Len ;Point to coefficient
mov ebp,cs:[edi-4] ;Get polynomial degree
jmp EvalPolyAdd
PolyRound:
;This routine handles all rounding during polynomial evaluation.
;It performs 64-but round nearest, with result left in registers.
;
;Inputs:
; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
;Outputs:
; same, plus tag in cl.
;
;To perform "round even" when the round bit is set and the sticky bits
;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
;is set, that will always force a round up (to even) if the round bit is
;set. If the LSB is zero, then the sticky bits remain zero and we always
;round down. This rounding rule is implemented by adding RoundBit-1
;(7F..FFH), setting CY if round up.
;
;This routine needs to be reversible in case we're at the last step
;in the polynomial and final rounding uses a different rounding mode.
;We do this by copying the LSB of esi into al. While the rounding is
;reversible, you can't tell if the answer was exact.
mov edx,esi
and dl,1 ;Look at LSB
or al,dl ;Set LSB as sticky bit
add eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
adc esi,0
adc ebx,0
jc PolyBumpExponent ;Overflowed, increment exponent
or esi,esi ;Any bits in low half?
.erre bTAG_VALID eq 1
.erre bTAG_SNGL eq 0
setnz cl ;if low half==0 then cl=0 else cl=1
ret
PolyBumpExponent:
add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
or ebx,1 shl 31 ;Set MSB
mov cl,bTAG_SNGL
PolyZero:
;Enter here when result is zero
ret
;*******************************************************************************
;FPATAN instruction
;Actual instruction entry point is in emarith.asm
tFpatanDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
dd AtanDouble ;single single
dd AtanDouble ;single double
dd AtanZeroDest ;single zero
dd AtanSpclDest ;single special
dd AtanDouble ;double single
dd AtanDouble ;double double
dd AtanZeroDest ;double zero
dd AtanSpclDest ;double special
dd AtanZeroSource ;zero single
dd AtanZeroSource ;zero double
dd AtanZeroDest ;zero zero
dd AtanSpclDest ;zero special
dd AtanSpclSource ;special single
dd AtanSpclSource ;special double
dd AtanSpclSource ;special zero
dd TwoOpBothSpcl ;special special
dd AtanTwoInf ;Two infinites
;Compute atan( st(1)/st(0) ). Neither st(0) or st(1) are zero or
;infinity at this point.
;
;Argument reduction starts by dividing the smaller by the larger,
;ensuring that the result x is <= 1. The absolute value of the quotient
;is used and the quadrant is fixed up later. If x = st(0)/st(1), then
;the final atan result is subtracted from pi/2 (and normalized for the
;correct range of -pi to +pi).
;
;The range of x is further reduced using the formulas:
; t = (x - k) / (1 + kx)
; atan(x) = atan(k) + atan(t)
;
;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we
;are assured that t <= tan(pi/12) = 2 - sqrt(3), and
;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12).
;Thus we can always reduce the argument to abs(t) <= tan(pi/12).
;
;Since k = 1/sqrt(3), it is convenient to multiply the numerator
;and denominator of t by 1/k, which gives
;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ).
;This is the form found in Cody and Waite and in previous versions
;of the emulator. It requires one each add, subtract, multiply, and
;divide.
;
;Hart has derived a simpler version of this formula:
;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ).
;Note that this computation requires one each add, subtract, and
;divide, but no multiply.
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
;[edi] points to st(1), where result is returned
AtanDouble:
mov EMSEG:[Result],edi
mov EMSEG:[RoundMode],offset PolyRound
mov EMSEG:[ZeroVector],offset PolyZero
mov ah,EMSEG:[edi].bSgn ;Sign of result
mov al,ch ;Affects quadrant of result
and al,bSign ;Zero other bits, used as flags
push eax ;Save flag
;First figure out which is larger
push offset AtanQuo ;Return address for DivDouble
shld edx,ecx,16 ;Get exponent to ax
cmp dx,EMSEG:[edi].wExp ;Compare exponents
jl DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
jg DivDouble ; ...is bigger, make it divisor
;Exponents are equal, compare mantissas
cmp ebx,EMSEG:[edi].lManHi
jb DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
ja DivDouble ; ...is bigger, make it divisor
cmp esi,EMSEG:[edi].lManLo
jbe DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
jmp DivDouble
TinyAtan:
;Come here if the angle was reduced to zero, or the divide resulted in
;unmasked underflow so that the quotient exponent was biased.
;Note that an angle of zero means reduction was performed, and the
;result will be corrected to a non-zero value.
mov dl,[esp] ;Get flag byte
or dl,dl ;No correction needed?
jz AtanSetSign ;Just return result of divide
and EMSEG:[CURerr],not Underflow
;Angle in registers is too small to affect correction amount. Just
;load up correction angle instead of adding it in.
add dl,40H ;Change flags for correction lookup
shr dl,5-2 ;Now in bits 2,3,4
and edx,7 shl 2
mov ebx,[edx+2*edx+tAtanPiFrac].lManHi
mov esi,[edx+2*edx+tAtanPiFrac].lManLo
mov ecx,[edx+2*edx+tAtanPiFrac].ExpSgn
shrd eax,ecx,8 ;Copy rounding flag to high eax
jmp AtanSetSign
AtanQuo:
;Return here after divide. Underflow flag is set only for "big underflow",
;meaning the (15-bit) exponent couldn't even be kept in 16 bits. This can
;only happen dividing a denormal by one of the largest numbers.
;
;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
test EMSEG:[CURerr],Underflow;Did we underflow?
jnz TinyAtan
;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3)
xor cx,cx ;Use absolute value
cmp ecx,Tan15exp shl 16
jg AtnNeedReduce
jl AtnReduced
cmp ebx,Tan15Hi
ja AtnNeedReduce
jb AtnReduced
cmp esi,Tan15Lo
jbe AtnReduced
AtnNeedReduce:
or byte ptr [esp],20H ;Note reduction in flags on stack
;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ).
mov eax,Sqrt3exp shl 16
mov edx,Sqrt3Hi
mov edi,Sqrt3Lo
call AddDoubleReg ;x + sqrt(3)
mov edi,esi
mov esi,ebx ;Mantissa in esi:edi
mov ebx,ecx ;ExpSgn to ebx
mov ecx,(2+TexpBias) shl 16
mov edx,1 shl 31
xor eax,eax ;edx:edi,eax = 4.0
;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
call DivDoubleReg ;4 / ( x + sqrt(3) )
not ch ;Flip sign
mov eax,Sqrt3exp shl 16
mov edx,Sqrt3Hi
mov edi,Sqrt3Lo
call AddDoubleReg ;sqrt(3) - 4 / ( x + sqrt(3) )
;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6).
cmp cl,bTAG_ZERO
jz TinyAtan
AtnReduced:
;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x
;[or 1 - x^2/3 = 1], which happens when x < 2^-32. This prevents underflow
;in computing x^2.
TinyAtanArg equ -32
cmp ecx,TinyAtanArg shl 16
jl AtanCorrection
mov edi,offset tAtanPoly
call Eval2Poly
mov edi,EMSEG:[CURstk] ;Point to first result
call DivDouble ;x * P(x^2) / Q(x^2)
AtanCorrection:
;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
;
;Correct sign and add fraction of pi to account for various angle reductions:
;
; flag bit indicates correction
;----------------------------------------------------
; 5 arg > tan(pi/12) add pi/6
; 6 st(1) > st(0) sub from pi/2
; 7 st(0) < 0 sub from pi
;
;This results in the following correction for the result R:
;
;bit 7 6 5 correction
;---------------------------
; 0 0 0 none
; 0 0 1 pi/6 + R
; 0 1 0 pi/2 - R
; 0 1 1 pi/3 - R
; 1 0 0 pi - R
; 1 0 1 5*pi/6 - R
; 1 1 0 pi/2 + R
; 1 1 1 2*pi/3 + R
mov dl,[esp] ;Get flag byte
or dl,dl ;No correction needed?
jz AtanSetSign
add dl,40H ;Set bit 7 for all -R cases
;This changes the meaning of the flag bits to the following:
;
;bit 7 6 5 correction
;---------------------------
; 0 0 0 pi/2 + R
; 0 0 1 2*pi/3 + R
; 0 1 0 none
; 0 1 1 pi/6 + R
; 1 0 0 pi/2 - R
; 1 0 1 pi/3 - R
; 1 1 0 pi - R
; 1 1 1 5*pi/6 - R
xor ch,dl ;Flip sign bit in cases 4 - 7
shr dl,5-2 ;Now in bits 2,3,4
and edx,7 shl 2
mov eax,[edx+2*edx+tAtanPiFrac].ExpSgn
mov edi,[edx+2*edx+tAtanPiFrac].lManLo
mov edx,[edx+2*edx+tAtanPiFrac].lManHi
call AddDoubleReg ;Add in correction angle
AtanSetSign:
pop edx ;Get flags again
mov ch,dh ;Set sign to original ST(1)
;Rounded mantissa in ebx:esi:eax, exp/sign in ecx
jmp TransUnround
;***
AtanSpclDest:
mov al,EMSEG:[edi].bTag ;Pick up tag
; cmp cl,bTAG_INF ;Is argument infinity?
cmp al,bTAG_INF ;Is argument infinity?
jnz SpclDest ;In emarith.asm
AtanZeroSource:
;Dividend is infinity or divisor is zero. Return pi/2 with
;same sign as dividend.
mov ecx,(PiExp-1) shl 16 + bTAG_VALID ;Exponent for pi/2
PiMant:
;For storing multiples of pi. Exponent/tag is in ecx.
mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
mov ebx,XPiHi
mov esi,XPiMid
mov eax,XPiLo
;A jump through [TransRound] is only valid if the number is known not to
;underflow. Unmasked underflow requires [RoundMode] be set.
jmp EMSEG:[TransRound]
;***
AtanSpclSource:
cmp cl,bTAG_INF ;Scaling by infinity?
jnz SpclSource ;in emarith.asm
AtanZeroDest:
;Divisor is infinity or dividend is zero. Return zero for +divisor,
;pi for -divisor. Result sign is same is dividend.
or ch,ch ;Check divisor's sign
mov ecx,PiExp shl 16 + bTAG_VALID ;Exponent for pi
js PiMant ;Store pi
;Result is zero
mov EMSEG:[edi].lManHi,0
mov EMSEG:[edi].lManLo,0
mov EMSEG:[edi].wExp,0
mov EMSEG:[edi].bTAG,bTAG_ZERO
ret
;***
AtanTwoInf:
;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor.
;Result sign is same is dividend infinity.
or ch,ch ;Check divisor's sign
mov ecx,(PiExp-2) shl 16 + bTAG_VALID ;Exponent for pi/4
jns PiMant ;Store pi/4
mov ecx,(ThreePiExp-2) shl 16 + bTAG_VALID ;Exponent for 3*pi/4
mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
mov ebx,XThreePiHi
mov esi,XThreePiMid
mov eax,XThreePiLo
;A jump through [TransRound] is only valid if the number is known not to
;underflow. Unmasked underflow requires [RoundMode] be set.
jmp EMSEG:[TransRound]
;*******************************************************************************
ExpSpcl:
;Tagged special
cmp cl,bTAG_DEN
jz ExpDenorm
cmp cl,bTAG_INF
mov al, cl
jnz SpclDestNotDen ;Check for Empty or NAN
;Have infinity, check its sign.
;Return -1 for -infinity, no change if +infinity
or ch,ch ;Check sign
jns ExpRet ;Just return the +inifinity
mov EMSEG:[edi].lManLo,0
mov EMSEG:[edi].lManHi,1 shl 31
mov EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL ;-1.0 (exponent is zero)
ret
ExpDenorm:
mov EMSEG:[CURerr],Denormal
test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
jnz ExpCont ;Yes, continue
ExpRet:
ret
EM_ENTRY eF2XM1
eF2XM1:
;edi = [CURstk]
mov ecx,EMSEG:[edi].ExpSgn
cmp cl,bTAG_ZERO
jz ExpRet ;Return same zero
ja ExpSpcl
ExpCont:
;The input range specified for the function is (-1, +1). The polynomial
;used for this function is valid only over the range [0, +0.5], so range
;reduction is needed. Range reduction is based on the identity:
;
; 2^(a+b) = 2^a * 2^b
;
;1.0 or 0.5 can be added/subtracted from the argument to bring it into
;range. We calculate 2^x - 1 with a polynomial, and then adjust the
;result according to the amount added or subtracted, as shown in the table:
;
;Arg range Adj Polynomial result Required result, 2^x - 1
;
; (-1, -0.5] +1 P = 2^(x+1) - 1 (P - 1)/2
;
; (-0.5, 0) +0.5 P = 2^(x+0.5) - 1 P * sqrt(2)/2 + (sqrt(2)/2 - 1)
;
; (0, 0.5) 0 P = 2^x - 1 P
;
; [0.5, 1) -0.5 P = 2^(x-0.5) - 1 P * sqrt(2) + (sqrt(2)-1)
;
;Since the valid input range does not include +1.0 or -1.0, and zero is
;handled separately, the precision exception will always be set.
mov EMSEG:[Result],edi
mov EMSEG:[RoundMode],offset PolyRound
mov EMSEG:[ZeroVector],offset PolyZero
push offset TransUnround ;Always exit through here
mov ebx,EMSEG:[edi].lManHi
mov esi,EMSEG:[edi].lManLo
;Check for small argument, so that x^2 does not underflow. Note that
;e^x = 1+x for small x, where small x means x + x^2/2 = x [or 1 + x/2 = 1],
;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x.
TinyExpArg equ -64
cmp ecx,TinyExpArg shl 16
jl TinyExp
cmp ecx,-1 shl 16 + bSign shl 8 ;See if positive, < 0.5
jl ExpReduced
;Argument was not in range (0, 0.5), so we need some kind of reduction
or ecx,ecx ;Exp >= 0 means arg >= 1.0 --> too big
;CONSIDER: this returns through TransUnround which restores the rounding
;vectors, but it also randomly rounds the result becase eax is not set.
jge ExpRet ;Give up if arg out of range
;We're going to need to add/subtract 1.0 or 0.5, so load up the constant
mov edx,1 shl 31
xor edi,edi
mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
mov ebp,offset ExpReducedMinusHalf
or ch,ch ;If it's positive, must be [0.5, 1)
jns ExpReduction
xor ah,ah ;edx:edi,eax = +0.5
mov ebp,offset ExpReducedPlusHalf
cmp ecx,eax ;See if abs(arg) >= 0.5
jl ExpReduction ;No, adjust by .5
xor eax,eax ;edx:edi,eax = 1.0
mov ebp,offset ExpReducedPlusOne
ExpReduction:
call AddDoubleReg ;Argument now in range [0, 0.5]
cmp cl,bTAG_ZERO ;Did reduction result in zero?
jz ExpHalf ;If so, must have been exactly 0.5
push ebp ;Address of reduction cleanup
ExpReduced:
mov edi,offset tExpPoly
call Eval2Poly
;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
;Q(x^2) is in registers, P(x^2) is at [[CURstk]]
mov edi,EMSEG:[CURstk]
mov dx,bSign shl 8 ;Subtract memory operand
;Note that Q() and P() have no roots over the input range
;(they will never be zero).
call AddDouble ;Q(x^2) - x*P(x^2)
sub ecx,1 shl 16 ;Divide by two
mov edi,EMSEG:[CURstk]
jmp DivDouble ;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
;Returns to correct argument reduction correction routine or TransUnround
TinyExp:
;Exponent is very small (and was not reduced)
mov edx,cFLDLN2hi
mov edi,cFLDLN2lo
mov eax,cFLDLN2exp shl 16
;This could underflow (but not big time)
jmp MulDoubleReg ;Returns to TransUnround
ExpHalf:
;Argument of exactly 0.5 was reduced to zero. Just return result.
mov ebx,Sqrt2m1Hi
mov esi,Sqrt2m1Lo
mov eax,XSqrt2m1Lo + 1 shl 31 - 1
mov ecx,Sqrt2m1Exp shl 16
ret ;Exit through TransUnround
ExpReducedPlusOne:
;Correct result is (P - 1)/2
sub ecx,1 shl 16 ;Divide by two
mov edx,1 shl 31
xor edi,edi
mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
jmp AddDoubleReg
ExpReducedPlusHalf:
;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2)
mov edx,Sqrt2Hi
mov edi,Sqrt2Lo
mov eax,Sqrt2exp-1 shl 16 ;sqrt(2)/2
call MulDoubleReg
mov edx,TwoMinusSqrt2Hi
mov edi,TwoMinusSqrt2Lo
mov eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8 ;(2-sqrt(2))/2
jmp AddDoubleReg
ExpReducedMinusHalf:
;Correct result is P * sqrt(2) + (sqrt(2)-1)
mov edx,Sqrt2Hi
mov edi,Sqrt2Lo
mov eax,Sqrt2exp shl 16
call MulDoubleReg
mov edx,Sqrt2m1Hi
mov edi,Sqrt2m1Lo
mov eax,Sqrt2m1Exp shl 16
jmp AddDoubleReg
;*******************************************************************************
;Dispatch table for log(x+1)
;
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
;pointed to by edi ("dest").
;
;Tag of source is shifted. Tag values are as follows:
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
.erre TAG_VALID eq 1
.erre TAG_ZERO eq 2
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
;Any special case routines not found in this file are in emarith.asm
tFyl2xp1Disp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
dd LogP1Double ;single single
dd LogP1Double ;single double
dd LogP1ZeroDest ;single zero
dd LogP1SpclDest ;single special
dd LogP1Double ;double single
dd LogP1Double ;double double
dd LogP1ZeroDest ;double zero
dd LogP1SpclDest ;double special
dd XorSourceSign ;zero single
dd XorSourceSign ;zero double
dd XorDestSign ;zero zero
dd LogP1SpclDest ;zero special
dd LogSpclSource ;special single
dd LogSpclSource ;special double
dd LogSpclSource ;special zero
dd TwoOpBothSpcl ;special special
dd LogTwoInf ;Two infinites
LogP1Double:
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
;[edi] points to st(1), where result is returned
;
;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)]
;The approximation used (valid over exactly this range) is
; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is
; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2)
;
;We're not too picky about this range check because the function is simply
;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and
;signal Invalid if less, -infinity if equal.
or ecx,ecx ;abs(x) >= 1.0?
jge LogP1OutOfRange ;Valid range is approx [-0.3, +0.4]
mov EMSEG:[Result],edi
mov EMSEG:[RoundMode],offset PolyRound
mov EMSEG:[ZeroVector],offset PolyZero
mov eax,1 shl 16 ;Exponent of 1 for adding 2.0
push offset TotalLog ;Return address for BasicLog
; jmp BasicLog ;Fall into BasicLog
;.erre BasicLog eq $
;BasicLog is used by eFYL2X and eFYL2XP1.
;eax has exponent and sign to add 1.0 or 2.0 to argument
;ebx:esi,ecx has argument, non-zero, tag not set
;ST has argument to take log2 of, minus 1. (This is the actual argument
;of eFYL2XP1, or argument minus 1 of eFYL2X.)
BasicLog:
mov edx,1 shl 31
xor edi,edi ;edx:edi,eax = +1.0 or +2.0
call AddDoubleReg
mov edi,EMSEG:[CURstk] ;Point to x-1
call DivDouble ;Compute (x-1) / (x+1)
;Result in registers is z = (x-1)/(x+1). For tiny z, ln(x) = 2*z, so
; log2(x) = 2 * log2(e) * z. Tiny z is such that z + z^3/3 = z.
cmp ecx,-32 shl 16 ;Smallest exponent to bother with
jl LogSkipPoly
mov edi,offset tLogPoly
call Eval2Poly
mov edi,EMSEG:[CURstk] ;Point to first result, r * P(r^2)
jmp DivDouble ;Compute r * P(r^2) / Q(r^2)
LogSkipPoly:
;Multiply r by 2 * log2(e)
mov edx,Log2OfEHi
mov edi,Log2OfELo
mov eax,(Log2OfEexp+1) shl 16
jmp MulDoubleReg
LogP1OutOfRange:
;Input range isn't valid, so we can return anything we want--EXCEPT, for
;numbers < -1 we must signal Invalid Operation, and Divide By Zero for
;-1. Otherwise, we return an effective log of one by just leaving the
;second operand as the return value.
;
;Exponent in ecx >= 0 ( abs(x) >= 1 )
or ch,ch ;Is it positive?
jns LogP1Ret ;If so, skip it
and ecx,0FFFFH shl 16 ;Look at exponent only: 0 for -1.0
sub ebx,1 shl 31 ;Kill MSB
or ebx,esi
or ebx,ecx
jnz ReturnIndefinite ;Must be < -1.0
jmp DivideByMinusZero
LogP1Ret:
ret
;***
LogP1ZeroDest:
or ch,ch ;Is it negative?
jns LogP1Ret ;If not, just leave it zero
or ecx,ecx ;abs(x) >= 1.0?
jl XorDestSign ;Flip sign of zero
;Argument is <= -1
jmp ReturnIndefinite ;Have 0 * log( <=0 )
;***
LogP1SpclDest:
mov al,EMSEG:[edi].bTag ;Pick up tag
cmp al,bTAG_INF ;Is argument infinity?
jnz SpclDest ;In emarith.asm
;Multiplying log(x+1) * infinity.
;If x > 0, return original infinity.
;If -1 <= x < 0, return infinity with sign flipped.
;If x < -1 or x == 0, invalid operation.
cmp cl,bTAG_ZERO
jz ReturnIndefinite
or ch,ch ;Is it positive?
jns LogP1Ret
test ecx,0FFFFH shl 16 ;Is exponent zero?
jl XorDestSign
jg ReturnIndefinite
sub ebx,1 shl 31 ;Kill MSB
or ebx,esi
jnz ReturnIndefinite ;Must be < -1.0
jmp XorDestSign
;***
LogSpclSource:
cmp cl,bTAG_INF ;Is argument infinity?
jnz SpclSource ;in emarith.asm
or ch,ch ;Is it negative infinity?
js ReturnIndefinite
jmp MulByInf
;***
LogTwoInf:
or ch,ch ;Is it negative infinity?
js ReturnIndefinite
jmp XorDestSign
;*******************************************************************************
;Dispatch table for log(x)
;
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
;pointed to by edi ("dest").
;
;Tag of source is shifted. Tag values are as follows:
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
.erre TAG_VALID eq 1
.erre TAG_ZERO eq 2
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
;Any special case routines not found in this file are in emarith.asm
tFyl2xDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
dd LogDouble ;single single
dd LogDouble ;single double
dd LogZeroDest ;single zero
dd LogSpclDest ;single special
dd LogDouble ;double single
dd LogDouble ;double double
dd LogZeroDest ;double zero
dd LogSpclDest ;double special
dd DivideByMinusZero ;zero single
dd DivideByMinusZero ;zero double
dd ReturnIndefinite ;zero zero
dd LogSpclDest ;zero special
dd LogSpclSource ;special single
dd LogSpclSource ;special double
dd LogSpclSource ;special zero
dd TwoOpBothSpcl ;special special
dd LogTwoInf ;Two infinites
LogDouble:
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
;[edi] points to st(1), where result is returned
;
;Must reduce the argument to the range [1/sqrt(2), sqrt(2)]
or ch,ch ;Is it positive?
js ReturnIndefinite ;Can't take log of negative number
mov EMSEG:[Result],edi
mov EMSEG:[RoundMode],offset PolyRound
mov EMSEG:[ZeroVector],offset PolyZero
shld eax,ecx,16 ;Save exponent in ax as int part of log2
xor ecx,ecx ;Zero exponent: 1 <= x < 2
cmp ebx,Sqrt2Hi ;x > sqrt(2)?
jb LogReduced
ja LogReduceOne
cmp esi,Sqrt2Lo
jb LogReduced
LogReduceOne:
sub ecx,1 shl 16 ;1/sqrt(2) < x < 1
inc eax
LogReduced:
push eax ;Save integer part of log2
mov ebp,ecx ;Save reduced exponent (tag is wrong!)
mov edx,1 shl 31
mov eax,bSign shl 8 ;Exponent of 0, negaitve
xor edi,edi ;edx:edi,eax = -1.0
call AddDoubleReg
cmp cl,bTAG_ZERO ;Was it exact power of two?
jz LogDone ;Skip log if power of two
;Save (x - 1), reload x with reduced exponent
mov edi,EMSEG:[CURstk] ;Point to original x again
xchg EMSEG:[edi].lManHi,ebx
xchg EMSEG:[edi].lManLo,esi
mov EMSEG:[edi].ExpSgn,ecx
mov ecx,ebp ;Get reduced exponent
xor eax,eax ;Exponent of 0, positive
call BasicLog
LogDone:
pop eax ;Get integer part back
cwde
or eax,eax ;Is it zero?
jz TotalLog
;Next 3 instructions take abs() of integer
cdq ;Extend sign through edx
xor eax,edx ;Complement...
sub eax,edx ; and increment if negative
bsr dx,ax ;Look for MSB to normalize integer
;Bit number in dx ranges from 0 to 15
mov cl,dl
not cl ;Convert to shift count
shl eax,cl ;Normalize
.erre TexpBias eq 0
rol edx,16 ;Move exponent high, sign low
or ebx,ebx ;Was log zero?
jz ExactPower
xchg edx,eax ;Exp/sign to eax, mantissa to edx
xor edi,edi ;Extend with zero
call AddDoubleReg
TotalLog:
;Registers could be zero if input was exactly 1.0
cmp cl,bTAG_ZERO
jz ZeroLog
TotalLogNotZero:
mov edi,EMSEG:[Result] ;Point to second arg
push offset TransUnround
jmp MulDouble
ExactPower:
;Arg was a power of two, so log is exact (but not zero).
mov ebx,eax ;Mantissa to ebx
mov ecx,edx ;Exponent to ecx
xor esi,esi ;Extend with zero
;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx
;
;The result log is exact, so we don't want TransUnround, which is designed
;to ensure the result is never exact. Instead we set the [RoundMode]
;vector to [TransRound] before the final multiply.
mov eax,EMSEG:[TransRound]
mov EMSEG:[RoundMode],eax
mov edi,EMSEG:[Result] ;Point to second arg
push offset RestoreRound ;Return addr. for MulDouble in emtrig.asm
jmp MulDouble
ZeroLog:
mov eax,EMSEG:[SavedRoundMode]
mov EMSEG:[RoundMode],eax
mov EMSEG:[ZeroVector],offset SaveResult
jmp SaveResult
;***
LogZeroDest:
or ch,ch ;Is it negative?
js ReturnIndefinite ;Can't take log of negative numbers
;See if log is + or - so we can get correct sign of zero
or ecx,ecx ;Is exponent >= 0?
jge LogRet ;If so, keep present zero sign
FlipDestSign:
not EMSEG:[edi].bSgn
ret
;***
LogSpclDest:
mov al,EMSEG:[edi].bTag ;Pick up tag
cmp al,bTAG_INF ;Is argument infinity?
jnz SpclDest ;In emarith.asm
;Multiplying log(x) * infinity.
;If x > 1, return original infinity.
;If 0 <= x < 1, return infinity with sign flipped.
;If x < 0 or x == 1, invalid operation.
cmp cl,bTAG_ZERO
jz FlipDestSign
or ch,ch ;Is it positive?
js ReturnIndefinite
test ecx,0FFFFH shl 16 ;Is exponent zero?
jg LogRet ;x > 1, just return infinity
jl FlipDestSign
sub ebx,1 shl 31 ;Kill MSB
or ebx,esi
jz ReturnIndefinite ;x == 1.0
LogRet:
ret
|