1 files changed, 1089 insertions, 0 deletions
diff --git a/private/ntos/rtl/ppc/largeint.s b/private/ntos/rtl/ppc/largeint.s
new file mode 100644
index 000000000..c59584fcf
--- /dev/null
+++ b/private/ntos/rtl/ppc/largeint.s
@@ -0,0 +1,1089 @@
+//      TITLE("Large Integer Arithmetic")
+//++
+//
+// Copyright (c) 1993  IBM Corporation
+//
+// Module Name:
+//
+//    largeint.s
+//
+// Abstract:
+//
+//    This module implements routines for performing extended integer
+//    arithmtic.
+//
+// Author:
+//
+//    David N. Cutler (davec) 18-Apr-1990
+//    Converted to PowerPC by Walt Daniels and Norman Cohen Aug 93
+//       (from MIPS based code)
+//
+// References:
+//    See PowerPC Architecture book Appendix E.2 for 64-bit shifts
+//    See "Hacker's Delight", Hank Warren, Nov. 91 for fancy divides
+//
+// Environment:
+//
+//    Any mode.
+//
+// Revision History:
+//    Fixed RtlExtendedLargeIntegerDivide       (Steve Johns) 18-Feb-94
+//       - if divisor >= 2^16 && dividend >= 2^32, quotient incorrect
+//       - also, removed 6 uncessary occurrences of CMPI
+//
+//--
+
+#include "ksppc.h"
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerAdd (
+//    IN LARGE_INTEGER Addend1,
+//    IN LARGE_INTEGER Addend2
+//    )
+//
+// Routine Description:
+//
+//    This function adds a signed large integer to a signed large integer and
+//    returns the signed large integer result.
+//
+// Arguments:
+//
+//    Addend1 (r.5, r.6) - Supplies the first addend value.
+//
+//    Addend2 (r.7, r.8) - Supplies the second addend value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerAdd)
+
+        addc    r.5,r.5,r.7             // add low parts of large integer
+        adde    r.6,r.6,r.8             // add high parts with carry
+        stw     r.5,0(r.3)              // store low 32-bits
+        stw     r.6,4(r.3)              // store high 32-bits
+        LEAF_EXIT(RtlLargeIntegerAdd)   // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlConvertLongToLargeInteger (
+//     IN LONG SignedInteger
+//     )
+//
+// Routine Description:
+//
+//     This function converts the a signed integer to a signed large integer
+//     and returns the result.
+//
+// Arguments:
+//
+//     SignedInteger (r.4) - Supplies the value to convert.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlConvertLongToLargeInteger)
+
+        srawi   r.5,r.4,31              // compute high part of result
+        stw     r.4,0(r.3)              // store low 32-bits
+        stw     r.5,4(r.3)              // store high 32-bits
+        LEAF_EXIT(RtlConvertLongToLargeInteger)  // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlConvertUlongToLargeInteger (
+//     IN LONG UnsignedInteger
+//     )
+//
+// Routine Description:
+//
+//     This function converts the an unsigned integer to a signed large
+//     integer and returns the result.
+//
+// Arguments:
+//
+//     UnsignedInteger (r.4) - Supplies the value to convert.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlConvertUlongToLargeInteger)
+
+        li      r.5,0                   // clear high part
+        stw     r.4,0(r.3)              // store low 32-bits
+        stw     r.5,4(r.3)              // store high 32-bits
+        LEAF_EXIT(RtlConvertUlongToLargeInteger) // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlEnlargedIntegerMultiply (
+//    IN LONG Multiplicand,
+//    IN LONG Multiplier
+//    )
+//
+// Routine Description:
+//
+//    This function multiplies a signed integer by an signed integer and
+//    returns a signed large integer result.
+//
+// Arguments:
+//
+//    Multiplicand (r.4) - Supplies the multiplicand value.
+//
+//    Multiplier (r.5) - Supplies the multiplier value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlEnlargedIntegerMultiply)
+
+        mullw   r.6,r.4,r.5             // keep low 32-bits of result
+        mulhw   r.7,r.4,r.5             // keep high 32-bits of result
+        stw     r.6,0(r.3)              // store low 32-bits
+        stw     r.7,4(r.3)              // store high 32-bits
+        LEAF_EXIT(RtlEnlargedIntegerMultiply) // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlEnlargedUnsignedMultiply (
+//    IN ULONG Multiplicand,
+//    IN ULONG Multiplier
+//    )
+//
+// Routine Description:
+//
+//    This function multiplies an unsigned integer by an unsigned integer
+//    and returns a signed large integer result.
+//
+// Arguments:
+//
+//    Multiplicand (r.4) - Supplies the multiplicand value.
+//
+//    Multiplier (r.5) - Supplies the multiplier value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlEnlargedUnsignedMultiply)
+
+        mullw   r.6,r.4,r.5             // keep low-32-bits
+        mulhwu  r.7,r.4,r.5             // keep high 32-bits
+        stw     r.6,0(r.3)              // store low 32-bits
+        stw     r.7,4(r.3)              // store high 32-bits
+        LEAF_EXIT(RtlEnlargedUnsignedMultiply) // return
+
+
+
+//++
+//
+// ULONG
+// RtlEnlargedUnsignedDivide (
+//    IN ULARGE_INTEGER Dividend,
+//    IN ULONG Divisor,
+//    IN PULONG Remainder.
+//    )
+//
+// Routine Description:
+//
+//    This function divides an unsigned large integer by an unsigned long
+//    and returns the resultant quotient and optionally the remainder.
+//
+//    N.B. It is assumed that no overflow will occur.
+//
+// Arguments:
+//
+//    Dividend (r.3, r.4) - Supplies the dividend value.
+//       (High-order bits in r.4, low-order bits in r.3)
+//
+//    Divisor (r.5) - Supplies the divisor value.
+//
+//    Remainder (r.6) - Supplies an optional pointer to a variable that
+//        receives the remainder. Ptr is null if not needed.
+//
+// Return Value:
+//
+//    The unsigned long integer quotient is returned as the function value.
+//
+//--
+
+        LEAF_ENTRY(RtlEnlargedUnsignedDivide)
+
+        cmplw   r.4,r.5
+        bge     overflow                // catch overflow or division by 0
+        cmplwi  r.4,0                   // test high part for 0
+        beq     only_32_bits            // 32-bit division suffices
+// Normalize:  Shift divisor and dividend left to get rid of leading zeroes
+// in the divisor.  Since r.4 < r.5, only zeroes are shifted out of the
+// dividend.
+        cntlzw  r.7,r.5                // number of bits to shift (N)
+        slw     r.5,r.5,r.7            // shift divisor
+        slw     r.4,r.4,r.7            // shift upper part of divisor
+        subfic  r.9,r.7,32             // 32-N
+        srw     r.9,r.3,r.9            // leftmost N bits of r.3, slid right
+        or      r.4,r.4,r.9            //   and inserted into low end of r.4
+        slw     r.3,r.3,r.7            // shift lower part of divisor
+// Estimate high-order halfword of quotient.  If the dividend is
+// A0 A1 A2 A3 and the divisor is B0 B1  (where each Ai or Bi is a halfword),
+// then the estimate is A0 A1 0000 divided by B0 0000, or A0 A1 divided by B0.
+// (r.4 holds A0 A1, r.3 holds A2 A3, and r.5 holds B0 B1.)
+// The estimate may be too high because it does not account for B1; in rare
+// cases, the estimate will not even fit in a halfword.  High estimates are
+// corrected for later.
+        srwi    r.8,r.5,16             // r.8 <- B0
+        divwu   r.12,r.4,r.8           // r.12 <- floor([A0 A1]/B0)
+// Subtract partial quotient times divisor from dividend: If Q0 is the quotient
+// computed above, this means that Q0 0000 times B0 B1 is subtracted from
+// A0 A1 A2 A3.  We compute Q0 times B0 B1 and then shift the two-word
+// product left 16 bits.
+        mullw   r.9,r.12,r.5           // low word of Q0 times B0 B1
+        mulhwu  r.10,r.12,r.5          // high word of Q0 times B0 B1
+        slwi    r.10,r.10,16           // shift high word left 16 bits
+        inslwi  r.10,r.9,16,16         // move 16 bits from left of low word to
+                                       //   right of high word
+        slwi    r.9,r.9,16             // shift low word left 16 bits
+        subfc   r.3,r.9,r.3            // low word of difference
+        subfe   r.4,r.10,r.4           // high word of difference
+// If the estimate for Q0 was too high, the difference will be negative.
+// While A0 A1 A2 A3 is negative, repeatedly add B0 B1 0000 to A0 A1 A2 A3
+// and decrement Q0 by one to correct for the overestimate.
+        cmpwi   r.4,0                  // A0 A1 A2 A3 is negative iff A0 A1 is
+        bge     Q0_okay                // no correction needed
+        inslwi  r.10,r.5,16,16         // high word of B0 B1 0000 (= 0000 B0)
+        slwi    r.9,r.5,16             // low word of B0 B1 0000 (= B1 0000)
+adjust_Q0:
+        addc   r.3,r.3,r.9             // add B0 B1 0000 to A0 A1 A2 A3 (low)
+        adde   r.4,r.4,r.10            // add B0 B1 0000 to A0 A1 A2 A3 (high)
+        cmpwi  r.4,0                   // Is A0 A1 A2 A3 now nonnegative?
+        addi   r.12,r.12,-1            // decrement Q0
+        blt    adjust_Q0               // if A0 A1 A2 A3 still negative, repeat
+Q0_okay:
+// Estimate low-order halfword of quotient.  A0 is necessarily 0000 at this
+// point, so if the remaining part of the dividend is A0 A1 A2 A3 then the
+// estimate is A1 A2 0000 divided by B0 0000, or A1 A2 divided by B0.
+// (r.4 holds A0 A1, r.3 holds A2 A3, and r.8 holds B0.)
+        slwi    r.9,r.4,16             // r.9 <- A1 0000
+        inslwi  r.9,r.3,16,16          // r.9 <- A1 A2
+        divwu   r.11,r.9,r.8           // r.11 <- floor([A1 A2]/B0)
+// Subtract partial quotient times divisor from remaining part of dividend:
+// If Q1 is the quotient computed above, this means
+// that Q1 times B0 B1 is subtracted from A0 A1 A2 A3.  We compute
+        mullw   r.9,r.11,r.5           // low word of Q1 times B0 B1
+        mulhwu  r.10,r.11,r.5          // high word of Q1 times B0 B1
+        subfc   r.3,r.9,r.3            // low word of difference
+        subfe   r.4,r.10,r.4           // high word of difference
+// If the estimate for Q1 was too high, the difference will be negative.
+// While A0 A1 A2 A3 is negative, repeatedly add B0 B1 to A0 A1 A2 A3
+// and decrement Q1 by one to correct for the overestimate.
+        cmpwi   r.4,0                  // A0 A1 A2 A3 is negative iff A0 A1 is
+        bge     Q1_okay                // no correction needed
+adjust_Q1:
+        addc   r.3,r.3,r.5             // add B0 B1 to A0 A1 A2 A3 (low)
+        addze  r.4,r.4                 // add B0 B1 to A0 A1 A2 A3 (high)
+        cmpwi  r.4,0                   // Is A0 A1 A2 A3 now nonnegative?
+        addi   r.11,r.11,-1            // decrement Q1
+        blt    adjust_Q1               // if A0 A1 A2 A3 still negative, repeat
+Q1_okay:
+// Build the results.  The desired quotient is Q0 Q1.
+// The desired remainder is obtained by shifting A2 A3 right by the number
+// of bits by which the dividend and divisor were shifted left in the
+// normalization step.  The number of bits shifted is still in r.7.
+        cmplwi r.6,0                   // remainder needed?
+        bne    rem1                    // if so, go compute it
+        slwi   r.3,r.12,16             // r.3 <- Q0 0000
+        or     r.3,r.3,r.11            // r.3 <- Q0 Q1
+        blr
+rem1:
+        srw    r.8,r.3,r.7             // remainder <- [A2 A3] >> (r.7)
+        slwi   r.3,r.12,16             // r.3 <- Q0 0000
+        stw    r.8,0(r.6)              // store remainder
+        or     r.3,r.3,r.11            // r.3 <- Q0 Q1
+        blr
+//
+// End of normal case
+//
+// The case of a 32-bit dividend:
+only_32_bits:
+        cmplwi  r.6,0                   // remainder needed?
+        bne     rem2                    // if so, go compute quotient+remainder
+        divwu   r.3,r.3,r.5             // result <- dividend/divisor
+        blr
+rem2:
+        divwu   r.7,r.3,r.5             // quotient <- dividend / divisor
+        mullw   r.8,r.7,r.5             // r.8 <- quotient * divisor
+        subf    r.8,r.8,r.3             // remainder<-dividend-quotient*divisor
+        mr      r.3,r.7                 // result <- quotient
+        stw     r.8,0(r.6)              // store remainder
+        blr
+// The error cases:
+overflow:
+        twi     6,r.5,0                 // trap if divide by zero
+        twi     0x1b,r.5,0              // trap on overflow
+
+        LEAF_EXIT(RtlEnlargedUnsignedDivide)
+
+
+//++
+//
+// ULARGE_INTEGER
+// RtlExtendedLargeIntegerDivide (
+//    IN ULARGE_INTEGER Dividend,
+//    IN ULONG Divisor,
+//    IN PULONG Remainder.
+//    )
+//
+// Routine Description:
+//
+//    This function divides an unsigned large integer by an unsigned long
+//    and returns the resultant quotient and optionally the remainder.
+//
+// Arguments:
+//
+//    Dividend (r.5, r.6) - Supplies the dividend value.
+//
+//    Divisor (r.7) - Supplies the divisor value.
+//
+//    Remainder (r.8)- Supplies an optional pointer to a variable
+//      that receives the remainder.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlExtendedLargeIntegerDivide)
+
+        cmplwi  r.7,0                   // zero divisor?
+        beq     div_zero_s              // if so, branch to error exit
+        cmpwi   r.6,0                   // check sign of dividend high word
+        bne     big_dividend
+
+// The high-order word of the dividend is zero, so 32-bit unsigned division
+// can be used.
+        li      r.12,0                  // upper word of quotient is zero
+        stw     r.12,4(r.3)             // store upper word of quotient
+        divwu   r.11,r.5,r.7            // compute lower word of quotient
+        stw     r.11,0(r.3)             // store lower word of quotient
+        cmplwi  r.8,0                   // remainder needed?
+        beqlr                           // if not, return
+        mullw   r.10,r.11,r.7           // quotient * divisor
+        subf    r.9,r.10,r.5            // dividend - quotient * divisor
+        stw     r.9,0(r.8)              // store remainder
+        blr                             // return
+
+big_dividend:
+        srwi.   r.0,r.7,16              // upper 16 bits of divisor
+        bne     long_division           // if not, must use long division
+
+// The divisor is only one 16-bit digit long, so use short division:
+        srwi    r.0,r.6,16              // first 16-bit digit of dividend
+        divwu   r.4,r.0,r.7             // first 16-bit digit of quotient
+        mullw   r.10,r.4,r.7            // amount to subtract for remainder
+        subf    r.9,r.10,r.0            // remainder from first digit
+        insrwi  r.6,r.9,16,0            // combine rmndr with 2nd digit of dvdnd
+        divwu   r.12,r.6,r.7            // second digit of quotient
+        insrwi  r.12,r.4,16,0           // high two quotient digits in one word
+        mullw   r.10,r.12,r.7           // amount to subtract for remainder
+        subf    r.9,r.10,r.6            // remainder from second digit
+        srwi    r.0,r.5,16              // third digit of dividend
+        insrwi  r.0,r.9,16,0            // combine rmndr with 3rd digit of dvdnd
+        divwu   r.4,r.0,r.7             // third digit of quotient
+        mullw   r.10,r.4,r.7            // amount to subtract for remainder
+        subf    r.9,r.10,r.0            // remainder from third digit
+        insrwi  r.5,r.9,16,0            // combine rmndr with 4th digit of dvdnd
+        divwu   r.11,r.5,r.7            // fourth digit of quotient
+        mullw   r.10,r.11,r.7           // amount to subtract for remainder
+        insrwi  r.11,r.4,16,0           // low two quotient digits in one word
+        subf    r.9,r.10,r.5            // remainder from fourth digit
+        b       store_results
+
+long_division:
+// Since the divisor is more than one 16-bit digit long, the quotient will
+// be of the form 0x0000 Q2 Q3 Q4, where each of Q2, Q3, and Q4 is a 16-bit
+// digit.
+//
+// Normalize the divisor and dividend so that the high-order bit of the
+// divisor is 1.  This normalization must be undone after the division to
+// compute the remainder.  Let U1 U2 U3 U4 be the 16-bit digits of the
+// unnormalized dividend.  Each digit Ui consists of an S-bit high-order part
+// UiH and a (16-S)-bit low-order part UiL, where S is the number of leading
+// zeroes in the divisor.  Thus R6 holds U1H U1L U2H U2L and R5 holds
+// U3H U3L U4H U4L.  Let N0 N1 N2 N3 N4 be the 16-bit digits of the  normalized
+// dividend: N0 = U1H, N1 = U1L U2H, N2 = U2L U3H, N3 = U3L U4H, N4 = U4L 0...0.
+// Let D1 D2 be the normalized divisor.
+        cntlzw  r.0,r.7                 // number of bits to shift left (S)
+        subfic  r.12,r.0,16             // 16-S
+        subfic  r.11,r.0,32             // 32-S
+        srw     r.9,r.6,r.12            // U1H U1L U2H = N0 N1
+        srw     r.4,r.5,r.11            // U3H
+        slw     r.10,r.6,r.0            // U1L U2H U2L 0...0
+        or      r.6,r.4,r.10            // U1L U2H U2L U3H = N1 N2
+        slw     r.4,r.5,r.0             // U3L U4H U4L 0...0 = N3 N4
+        slw     r.7,r.7,r.0             // normalized divisor (D1 D2)
+        srwi    r.11,r.7,16             // D1
+// Set Q2 = [N0 N1 N2] / [D1 D2].  Start by guessing Q2 = [N0 N1] / D1, then
+// adjust if necessary.  This guess will occasionally be one too high and
+// very rarely two too high, but never higher than that and never too low.
+// (See Theorem B in Section 4.3.1 of Knuth, Vol. II, pp. 256-7.)
+// Let C N0' N1' N2' be the partial remainder [N0 N1 N2] - Q2 * [D1 D2].
+        divwu   r.12,r.9,r.11           // guess Q2 = [N0 N1] / D1
+        srwi    r.10,r.9,16             // 0x0000 N0
+        mullw   r.5,r.12,r.7            // low word of Q2 * [D1 D2]
+        subfc   r.9,r.5,r.6             // low word of C N0' N1' N2' (N1' N2')
+        mulhwu  r.5,r.12,r.7            // high word of Q2 * [D1 D2]
+        subfe.  r.10,r.5,r.10           // high word of C N0' N1' N2' (C N0')
+        bge     Q2_okay                 // if difference is >= 0, Q2 was not too high
+adjust_Q2:
+        addc    r.9,r.9,r.7             // low word of [C N0' N1' N2']+[D1 D2]
+        addze.  r.10,r.10               // high word of [C N0' N1' N2']+[D1 D2]
+        addi    r.12,r.12,-1            // Q2 - 1
+        blt     adjust_Q2               // try again if still negative
+Q2_okay:
+// At this point 0 <= [C N0' N1' N2'] < [D1 D2], so [C N0'] = 0x00000000.
+// r.12 holds the upper word of the quotient, 0x0000 Q2.
+// Set Q3 = [N1' N2' N3] / [D1 D2] by guessing and adjusting as above.
+// Let [N0" N1" N2" N3"] be the partial remainder [N1' N2' N3] - Q3 * [D1 D2].
+        divwu   r.6,r.9,r.11            // guess Q3 = [N1' N2'] / D1
+        srwi    r.10,r.9,16             // 0x0000 N1'
+        slwi    r.9,r.9,16              // N2' 0x0000
+        inslwi  r.9,r.4,16,16           // N2' N3
+        mullw   r.5,r.6,r.7             // low word of Q3 * [D1 D2]
+        subfc   r.9,r.5,r.9             // N2" N3"
+        mulhwu  r.5,r.6,r.7             // high word of Q3 * [D1 D2]
+        subfe.  r.10,r.5,r.10           // N0" N1"
+        bge     Q3_okay                 // if difference >= 0, Q3 was not too high
+adjust_Q3:
+        addc    r.9,r.9,r.7             // low word of [N0" N1" N2" N3"]+[D1 D2]
+        addze.  r.10,r.10               // high word [N0" N1" N2" N3"]+[D1 D2]
+        addi    r.6,r.6,-1              // Q3 - 1
+        blt     adjust_Q3               // try again if difference still negative
+Q3_okay:
+// At this point 0 <= [N0" N1" N2" N3"] < [D1 D2], so [N0" N1"] = 0x00000000.
+// Set Q4 = [N2" N3" N4] / [D1 D2] by guessing and adjusting as above.
+// Let [R1 R2 R3 R4] be the partial remainder [N2" N3" N4] - Q4 * [D1 D2].
+        divwu   r.11,r.9,r.11           // guess Q4 = [N2" N3"] / D1
+        insrwi  r.4,r.9,16,0            // N3" N4
+        srwi    r.10,r.9,16             // 0x0000 N2"
+        mullw   r.5,r.11,r.7            // low word of Q4 * [D1 D2]
+        subfc   r.9,r.5,r.4             // R3 R4
+        mulhwu  r.5,r.11,r.7            // high word of Q4 * [D1 D2]
+        subfe.  r.10,r.5,r.10           // R1 R2
+        bge     Q4_okay                 // if difference < 0, Q4 was not too high
+adjust_Q4:
+        addc    r.9,r.9,r.7             // low word of [R1 R2 R3 R4]+[D1 D2]
+        addze.  r.10,r.10               // high word of [R1 R2 R3 R4]+[D1 D2]
+        addi    r.11,r.11,-1            // Q4 - 1
+        blt     adjust_Q4               // try again if partial remainder still negative
+Q4_okay:
+// At this point 0 <= [R1 R2 R3 R4] < [D1 D2], so [R3 R4] is the remainder.
+        insrwi  r.11,r.6,16,0           // low word of quotient: Q3 Q4
+        srw     r.9,r.9,r.0             // unnormalize remainder
+
+store_results:
+        stw     r.11,0(r.3)             // store low word of quotient
+        stw     r.12,4(r.3)             // store high word of quotient
+        cmplwi  r.8,0                   // remainder needed?
+        beqlr                           // if not, return
+        stw     r.9,0(r.8)              // store remainder
+        blr                             // return
+
+div_zero_s:
+        twi     6,r.7,0                 // Trap on divide by zero
+
+        LEAF_EXIT(RtlExtendedLargeIntegerDivide)
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlExtendedMagicDivide (
+//    IN LARGE_INTEGER Dividend,
+//    IN ULARGE_INTEGER MagicDivisor,
+//    IN CCHAR ShiftCount
+//    )
+//
+// Routine Description:
+//
+//    This function divides a signed large integer by an unsigned large integer
+//    and returns the signed large integer result. The division is performed
+//    using reciprocal multiplication of a signed large integer value by an
+//    unsigned large integer fraction which represents the most significant
+//    64-bits of the reciprocal divisor rounded up in its least significant bit
+//    and normalized with respect to bit 63. A shift count is also provided
+//    which is used to truncate the fractional bits from the result value.
+//    The value returned is the most significant 64 bits of the product
+//    Dividend*MagicDivisor, shifted right ShiftCount bits.
+//
+// Arguments:
+//
+//    Dividend (r.5, r.6) - Supplies the dividend value.
+//
+//    MagicDivisor (r.7, r.8) - Supplies the magic divisor value
+//       which is a 64-bit multiplicative reciprocal.
+//
+//    Shiftcount (r.9) - Supplies the right shift adjustment value,
+//       assumed to be in the range 0 to 63.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+// Let Dividend = A B and MagicDivisor = C D, where each of A, B, C, and D is
+// a 32-bit word.  Then Dividend*MagicDivisor is a 128-bit product, computed
+// as follows:
+//                                                A              B
+//                                   x            C              D
+//            ==========================================================
+//                                          high_word(B*D) low_word(B*D)
+//                           high_word(A*D)  low_word(A*D)
+//                           high_word(B*C)  low_word(B*C)
+//            high_word(A*C)  low_word(A*C)
+//            ==========================================================
+//                 P1             P2            P3              P4
+//
+// Since the return value is [P1 P2] >> Shift_Count, P3 and P4 need not be
+// computed, but the carry out of the P3 column must be computed to compute P2.
+
+        LEAF_ENTRY(RtlExtendedMagicDivide)
+
+// If the dividend is negative, negate it and record the fact by setting
+// cr7 to LT.
+        crclr   4*cr.7+0                // clear cr.7 LT bit
+        cmpwi   r.6,0                   // is high-order word of divisor < 0?
+        bge     divisor_nonnegative     // if not, we are ready to compute
+        crset   4*cr.7+0                // set cr.7 LT bit to mark negation
+        subfic  r.5,r.5,0               // negate lower half of dividend
+        subfze  r.6,r.6                 // negate upper half of dividend
+divisor_nonnegative:
+// To avoid pipeline delays, produce partial products first, in the order they
+// will be consumed by the addc and addze instructions below.
+        mulhwu  r.11,r.6,r.7            // high(A*D)
+        mulhwu  r.0,r.5,r.8             // high(B*C)
+        mulhwu  r.12,r.6,r.8            // high(A*C)
+        mullw   r.4,r.6,r.8             // low(A*C)
+        mulhwu  r.10,r.5,r.7            // high(B*D)
+        mullw   r.6,r.6,r.7             // low(A*D)
+        mullw   r.7,r.5,r.8             // low(B*C)
+// Now combine the partial products, forming P1 in r.12, P2 in r.11, P3 in r.10:
+        addc    r.11,r.11,r.0           // high(A*D)+high(B*C)
+        addze   r.12,r.12               // high(A*C)+partial carry
+        addc    r.11,r.11,r.4           // high(A*D)+high(B*C)+low(A*C)
+        addze   r.12,r.12               // high(A*C)+partial carry
+        addc    r.10,r.10,r.6           // high(B*D)+low(A*D)
+        addze   r.11,r.11               // hi(A*D)+hi(B*C)+low(A*C)+part. carry
+        addze   r.12,r.12               // high(A*C)+partial carry
+        addc    r.10,r.10,r.7           // high(B*D)+low(A*D)+low(B*C) = P3
+        addze   r.11,r.11               // hi(A*D)+hi(B*C)+low(A*C)+carry = P2
+        addze   r.12,r.12               // high(A*C)+carry = P1
+// Shift the 64-bit value whose high half is in r.12 and whose low half is in
+// r.11 right by (r.7) bits.  The sequence below depends on the fact that
+// shift amounts are interpreted mod 64.  In particular, a shift amount of
+// -N bits, where 0 < N <= 32, specifies a shift of 64-N bits, where
+// 32 <= 64-N < 64, and a shift of 32 or more bits always yields zero.
+// Let s = (r.9) be the amount to shift right.  The sequence below behaves
+// differently when s<32, when s=32, and when s>32.  When s<32, we view the
+// 64-bit value to be shifted as follows:
+//
+//    |             r.12             |             r.11             |
+//    +-----------------+------------+-----------------+------------+
+//    |        T        |     U      |        V        |     W      |
+//    +-----------------+------------+-----------------+------------+
+//    |<- (32-s) bits ->|<- s bits ->|<- (32-s) bits ->|<- s bits ->|
+//
+// When s >= 32, we view the 64-bit value to be shifted as follows:
+//
+//    |             r.12             |             r.11             |
+//    +-----------------+------------+------------------------------+
+//    |        X        |     Y      |              Z               |
+//    +-----------------+------------+------------------------------+
+//    |                 |<- (s-32) ->|<------------ 32 ------------>|
+//    |<- (64-s) bits ->|<--------------- s bits ------------------>|
+//
+                                // When s < 32: | When s = 32: | When s > 32:
+                                // -------------+--------------+-------------
+        subfic  r.7,r.9,32      // 32-s (> 0)   |   0          | 32-s (< 0)
+        addi    r.8,r.9,-32     // s-32 (< 0)   |   0          | s-32 (> 0)
+        srw     r.11,r.11,r.9   // 0...0 V      |   0          | 0
+        slw     r.4,r.12,r.7    // U 0...0      |   X (= X Y)  | 0
+        or      r.11,r.11,r.4   // U V          |   X          | 0
+        srw     r.4,r.12,r.8    // 0            |   X          | 0...0 X
+        or      r.11,r.11,r.4   // U V          |   X          | 0...0 X
+        srw     r.12,r.12,r.9   // 0...0 T      |   0          | 0
+
+// If the original dividend was negated, we now negate the result:
+        bnl     cr.7,sign_is_right
+        subfic  r.11,r.11,0             // negate low word
+        subfze  r.12,r.12               // negate high word
+sign_is_right:
+// Store the result:
+        stw     r.11,0(r.3)             // low word of result
+        stw     r.12,4(r.3)             // high word of result
+        blr                             // return
+
+        LEAF_EXIT(RtlExtendedMagicDivide)
+
+
+//++
+//
+// ULARGE_INTEGER
+// RtlLargeIntegerDivide (
+//    IN ULARGE_INTEGER Dividend,
+//    IN ULARGE_INTEGER Divisor,
+//    IN PLARGE_INTEGER Remainder.
+//    )
+//
+// Routine Description:
+//
+//    This function divides an unsigned large integer by an unsigned large
+//    integer and returns the resultant quotient and optionally the remainder.
+//
+// Arguments:
+//
+//    Dividend (r.5, r.6) - Supplies the dividend value.
+//
+//    Divisor (r.7, r.8) - Supplies the divisor value.
+//
+//    Remainder (r.9)- Supplies an optional pointer to a variable
+//      that receives the remainder.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerDivide)
+
+        or.     r.0,r.7,r.8             // combine low and high parts of divisor
+        beq     div0                    // if 0, then attempted division by zero
+        li      r.0,64                  // set loop count
+        mtctr   r.0                     // in the count register
+        li      r.10,0                  // clear partial remainder
+        li      r.11,0                  //
+
+// Invariants for the following loop:
+//    1. (q<<(CTR)*Divisor) + [r.11 r.10 r.6 r.5]>>(64-(CTR)) = Dividend
+//    2. [r.11 r.10] < Divisor
+// where q is the rightmost (64-(CTR)) bits of [r.6 r.5]
+// Initially, (CTR)=64 and [r.11 r.10] = 0,, so q=0 and
+//    [r.11 r.10 r.6 r.5]>>(64-(CTR)) = [0 0 r.6 r.5]>>0 = [r.6 r.5], reducing
+//    the loop invariants to:
+//       1. [r.6 r.5] = Dividend
+//       2. 0 < Divisor
+// At the end of the loop, (CTR)=0, so q=[r.6 r.5] and the loop invariants
+//    reduce to:
+//       1. [r.6 r.5]*Divisor + [r.11 r.10] = Dividend
+//       2. [r.11 r.10] < Divisor
+//    That is, [r.6 r.5] holds the quotient and [r.11 r.10] holds the remainder.
+// During execution of the loop, [r.11 r.10] holds the partial remainder, the
+//    leftmost (CTR) bits of [r.6 r.5] hold the bits of the dividend not yet
+//    appended to the partial remainder, and the remaining bits of [r.6 r.5]
+//    hold the leftmost (64-(CTR)) bits of the quotient.
+
+divl:
+// Shift the 128-bit quantity [r.11 r.10 r.6 r.5] left one bit.  This has the
+// effect of dropping the leftmost bit of the partial remainder (necessarily
+// zero), "bringing down" the next bit of the dividend to the right end of the
+// partial remainder, and shifting the partial quotient left one bit.
+        slwi    r.11,r.11,1             // shift r.11 left one bit
+        inslwi  r.11,r.10,1,31          // left bit of r.10 to right bit of r.11
+        slwi    r.10,r.10,1             // shift r.10 left one bit
+        inslwi  r.10,r.6,1,31           // left bit of r.6 to right bit of r.10
+        slwi    r.6,r.6,1               // shift r.6 left one bit
+        inslwi  r.6,r.5,1,31            // left bit of r.5 to right bit of r.6
+        slwi    r.5,r.5,1               // shift r.5 left one bit
+// If [r.11 r.10] >= Divisor, increment the quotient and subtract the divisor
+// from the partial remainder:
+        cmplw   cr.0,r.11,r.8           // high(partial_rem) vs. high(divisor)
+        cmplw   cr.1,r.10,r.7           // low(partial_rem) vs. low(divisor)
+        bgt     cr.0,PR_greater         // high(part_rem) > high(divisor)
+        blt     cr.0,endl               // high(part_rem) < high(divisor)
+        blt     cr.1,endl               // highs =, low(part_rem) < low(divisor)
+
+PR_greater:
+        ori     r.5,r.5,1               // increment shifted quotient
+        subfc   r.10,r.7,r.10           // low(part_rem-divisor)
+        subfe   r.11,r.8,r.11           // high(part_rem-divisor)
+
+endl:   bdnz    divl                    // decrement CTR and loop
+
+        cmplwi  r.9,0                   // remainder requested?
+        beq     norem                   // no remainder
+        stw     r.10,0(r.9)             // store low part of remainder
+        stw     r.11,4(r.9)             // store high part of remainder
+norem:  stw     r.5,0(r.3)              // store low part of quotient
+        stw     r.6,4(r.3)              // store high part of quotient
+        blr
+
+div0:
+        twi     6,r.0,0                 // Trap on divide by zero
+
+        LEAF_EXIT(RtlLargeIntegerDivide) //
+
+
+
+//++
+//
+// LARGE_INTEGER
+// Rtl     ExtendedIntegerMultiply (
+//    IN LARGE_INTEGER Multiplicand,
+//    IN LONG Multiplier
+//    )
+//
+// Routine Description:
+//
+//    This function multiplies a signed large integer by a signed integer and
+//    returns the signed large integer result.
+//
+// Arguments:
+//
+//    Multiplicand (r.5, r.6) - Supplies the multiplicand value.
+//
+//    Multiplier (r.7) - Supplies the multiplier value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlExtendedIntegerMultiply)
+
+// If A B is the multiplicand and C is the multiplier (where A, B, and C are
+// each 32 bits long), the product is computed as follows:
+//
+//                           A         B
+//      *                              C
+//         =============================
+//                   high(B*C)  low(B*C)
+//      +  high(A*C)  low(A*C)
+//         =============================
+//                P1        P2        P3
+//
+// Since the high-order bit of B is not a sign bit but the high-order bit of
+// C is, we have no multiplication instruction appropriate for computing
+// high(B*C) directly.  Instead, we negate any negative operand before
+// doing the multiplication, multiply using unsigned arithmetic, and then
+// negate the product if we had negated exactly one operand.  If P1 is
+// nonzero before negating the product, the multiplication has overflowed.
+// We use the LT bit of cr.7 to track whether exactly one operand has
+// been negated.
+
+        crclr   4*cr.7+0                // clear LT bit of cr.7
+        cmpwi   r.6,0                   // test sign of multiplicand
+        bge     multiplicand_adjusted   // if nonnegative, proceed
+        subfic  r.5,r.5,0               // negate low part of multiplicand
+        subfze  r.6,r.6                 // negate high part of multiplicand
+        crset   4*cr.7+0                // set LT bit of cr.7
+multiplicand_adjusted:
+        cmpwi   r.7,0                   // test sign of multiplier
+        bge     multiplier_adjusted     // if nonnegative, proceed
+        neg     r.7,r.7                 // negate multiplier
+        crnot   4*cr.7+0,4*cr.7+0       // invert LT bit of cr.7
+multiplier_adjusted:
+        mulhwu  r.9,r.5,r.7             // high(B*C)
+        mullw   r.10,r.6,r.7            // low(A*C)
+        mulhwu  r.11,r.6,r.7            // high(A*C)
+        mullw   r.8,r.5,r.7             // P3 = low(B*C)
+        addc    r.9,r.9,r.10            // P2 = high(B*C)+low(A*C)
+        addze   r.11,r.11               // P1 = high(A*C)+[carry out of P2]
+        cmpwi   r.11,0                  // check for overflow
+        bne     mull_over
+        bnl     cr.7,product_adjusted   // was exactly one operand negated?
+        subfic  r.8,r.8,0               // negate low part of product
+        subfze  r.9,r.9                 // negate high part of product
+product_adjusted:
+        stw     r.8,0(r.3)              // store low word of product
+        stw     r.9,4(r.3)              // store high word of product
+        blr
+mull_over:
+        twi     0x1b,r.11,0             // Trap on overflow
+
+        LEAF_EXIT(RtlExtendedIntegerMultiply)
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerNegate (
+//    IN LARGE_INTEGER Subtrahend
+//    )
+//
+// Routine Description:
+//
+//    This function negates a signed large integer and returns the signed
+//    large integer result.
+//
+// Arguments:
+//
+//    Subtrahend (r.5, r.6) - Supplies the subtrahend value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerNegate)
+
+        subfic  r.5,r.5,0               // double precision subtract from 0
+        subfze  r.6,r.6
+        stw     r.5,0(r.3)              // store low part of result
+        stw     r.6,4(r.3)              // store high part of result
+        LEAF_EXIT(RtlLargeIntegerNegate) // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerSubtract (
+//    IN LARGE_INTEGER Minuend,
+//    IN LARGE_INTEGER Subtrahend
+//    )
+//
+// Routine Description:
+//
+//    This function subtracts a signed large integer from a signed large
+//    integer and returns the signed large integer result.
+//
+// Arguments:
+//
+//    Minuend (r.5, r.6) - Supplies the minuend value.
+//
+//    Subtrahend (r.7, r.8) - Supplies the subtrahend value.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerSubtract)
+
+        subfc   r.5,r.7,r.5             // double precision subtract
+        subfe   r.6,r.8,r.6
+        stw     r.5,0(r.3)              // store low part of result
+        stw     r.6,4(r.3)              // store high part of result
+        LEAF_EXIT(RtlLargeIntegerSubtract) // return
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerShiftLeft (
+//    IN LARGE_INTEGER LargeInteger,
+//    IN CCHAR ShiftCount
+//    )
+//
+// Routine Description:
+//
+//    This function shifts a signed large integer left by an unsigned
+//    integer modulo 64 and returns the shifted signed large integer
+//    result.
+//
+//    N.B. No test is made for significant bits shifted out of the result.
+//
+// Arguments:
+//
+//    LargeInteger (r.5, r.6) - Supplies the large integer to be shifted.
+//
+//    ShiftCount (r.7) - Supplies the left shift count.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerShiftLeft)
+
+        andi.   r.7,r.7,0x3f            // mod 64 of shift count
+        subfic  r.8,r.7,32
+        slw     r.6,r.6,r.7
+        srw     r.0,r.5,r.8
+        or      r.6,r.6,r.0
+        addic   r.8,r.7,-32
+        slw     r.0,r.5,r.8
+        or      r.6,r.6,r.0
+        slw     r.5,r.5,r.7
+        stw     r.5,0(r.3)              // store low result
+        stw     r.6,4(r.3)              // store high result
+        LEAF_EXIT(RtlLargeIntegerShiftLeft)
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerShiftRight (
+//    IN LARGE_INTEGER LargeInteger,
+//    IN CCHAR ShiftCount
+//    )
+//
+// Routine Description:
+//
+//    This function shifts an unsigned large integer right by an unsigned
+//    integer modulo 64 and returns the shifted unsigned large integer
+//    result.
+//
+// Arguments:
+//
+//    LargeInteger (r.5, r.6) - Supplies the large integer to be shifted.
+//
+//    ShiftCount (r.7) - Supplies the right shift count.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerShiftRight)
+
+        andi.   r.7,r.7,0x3f            // mod 64 of shift count
+
+// The sequence below depends on the fact that shift amounts are interpreted
+// mod 64.  In particular, a shift amount of -N bits, where 0 < N <= 32,
+// specifies a shift of 64-N bits, where 32 <= 64-N < 64, and a shift of 32 or
+// more bits always yields zero.  Let s = (r.7) be the amount to shift right.
+// The sequence below behaves differently when s<32, when s=32, and when s>32.
+// When s<32, we view the 64-bit value to be shifted as follows:
+//
+//
+//    |             r.6              |             r.5              |
+//    +-----------------+------------+-----------------+------------+
+//    |        T        |     U      |        V        |     W      |
+//    +-----------------+------------+-----------------+------------+
+//    |<- (32-s) bits ->|<- s bits ->|<- (32-s) bits ->|<- s bits ->|
+//
+// When s >= 32, we view the 64-bit value to be shifted as follows:
+//
+//    |             r.6              |             r.5              |
+//    +-----------------+------------+------------------------------+
+//    |        X        |     Y      |              Z               |
+//    +-----------------+------------+------------------------------+
+//    |                 |<- (s-32) ->|<------------ 32 ------------>|
+//    |<- (64-s) bits ->|<--------------- s bits ------------------>|
+//
+                                // When s < 32: | When s = 32: | When s > 32:
+                                // -------------+--------------+-------------
+        subfic  r.8,r.7,32      // 32-s (> 0)   |   0          | 32-s (< 0)
+        addi    r.9,r.7,-32     // s-32 (< 0)   |   0          | s-32 (> 0)
+        srw     r.5,r.5,r.7     // 0...0 V      |   0          | 0
+        slw     r.0,r.6,r.8     // U 0...0      |   X (= X Y)  | 0
+        or      r.5,r.5,r.0     // U V          |   X          | 0
+        srw     r.0,r.6,r.9     // 0            |   X          | 0...0 X
+        or      r.5,r.5,r.0     // U V          |   X          | 0...0 X
+        srw     r.6,r.6,r.7     // 0...0 T      |   0          | 0
+
+        stw     r.5,0(r.3)      // store low result
+        stw     r.6,4(r.3)      // store high result
+        LEAF_EXIT(RtlLargeIntegerShiftRight)
+
+
+
+//++
+//
+// LARGE_INTEGER
+// RtlLargeIntegerArithmeticShift (
+//    IN LARGE_INTEGER LargeInteger,
+//    IN CCHAR ShiftCount
+//    )
+//
+// Routine Description:
+//
+//    This function shifts a signed large integer right by an unsigned
+//    integer modulo 64 and returns the shifted signed large integer
+//    result.
+//
+// Arguments:
+//
+//    LargeInteger (r.5, r.6) - Supplies the large integer to be shifted.
+//
+//    ShiftCount (r.7) - Supplies the right shift count.
+//
+// Return Value:
+//
+//    The large integer result is stored at the address supplied by r.3.
+//
+//--
+
+        LEAF_ENTRY(RtlLargeIntegerArithmeticShift)
+
+        andi.   r.7,r.7,0x3f            // mod 64 of shift count
+
+// The sequence below depends on the fact that shift amounts are interpreted
+// mod 64.  In particular, a shift amount of -N bits, where 0 < N <= 32,
+// specifies a shift of 64-N bits, where 32 <= 64-N < 64, and an arithmetic
+// shift of 32 or more bits always yields 32 copies of the original sign bit.
+// Let s = (r.7) be the amount to shift right.  The sequence below behaves
+// differently when s<=32 and when s>32.  When s<=32, we view the 64-bit value
+// to be shifted as follows:
+//
+//
+//    |             r.6              |             r.5              |
+//    +-----------------+------------+-----------------+------------+
+//    |        T        |     U      |        V        |     W      |
+//    +-----------------+------------+-----------------+------------+
+//    |<- (32-s) bits ->|<- s bits ->|<- (32-s) bits ->|<- s bits ->|
+//
+// When s > 32, we view the 64-bit value to be shifted as follows:
+//
+//    |             r.6              |             r.5              |
+//    +-----------------+------------+------------------------------+
+//    |        X        |     Y      |              Z               |
+//    +-----------------+------------+------------------------------+
+//    |                 |<- (s-32) ->|<------------ 32 ------------>|
+//    |<- (64-s) bits ->|<--------------- s bits ------------------>|
+//
+
+                              // When s <= 32:         | When s > 32:
+                              // ----------------------+----------------------
+        subfic  r.8,r.7,32    // 32-s (>= 0)           | 32-s (< 0)
+        srw     r.5,r.5,r.7   // 0...0 V               | 0
+        slw     r.0,r.6,r.8   // U 0...0               | 0
+        or      r.5,r.5,r.0   // U V                   | 0
+        addic.  r.9,r.7,-32   // s-32 (<= 0)           | s-32 (> 0)
+        sraw    r.10,r.6,r.9  // sign(TU)...sign(TU)   | sign(XY)...sign(XY) X
+        ble     more_than_32  // (CR0 set by addic. two instructions earlier.)
+        mr      r.5,r.10      // [instruction skipped] | sign(XY)...sign(XY) X
+more_than_32:                 //                       |
+        sraw    r.6,r.6,r.7   // sign(T)...sign(T) T   | sign(XY)...sign(XY)
+
+        stw     r.5,0(r.3)    // store low result
+        stw     r.6,4(r.3)    // store high result
+        LEAF_EXIT(RtlLargeIntegerArithmeticShift)