author: Adam <you@example.com> 2020-05-17 05:51:50 +0200
committer: Adam <you@example.com> 2020-05-17 05:51:50 +0200
commit: e611b132f9b8abe35b362e5870b74bce94a1e58e (patch)
tree: a5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/ntos/tdi/tcpip/tcp/alpha/xsum.s
download: NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip
1 files changed, 271 insertions, 0 deletions
diff --git a/private/ntos/tdi/tcpip/tcp/alpha/xsum.s b/private/ntos/tdi/tcpip/tcp/alpha/xsum.s
new file mode 100644
index 000000000..032d40747
--- /dev/null
+++ b/private/ntos/tdi/tcpip/tcp/alpha/xsum.s
@@ -0,0 +1,271 @@
+//      TITLE("Compute Checksum")
+//++
+//
+// Copyright (c) 1994  Microsoft Corporation
+//
+// Module Name:
+//
+//    xsum.s
+//
+// Abstract:
+//
+//    This module implements a function to compute the checksum of a buffer.
+//
+// Author:
+//
+//    John Vert (jvert) 11-Jul-1994
+//
+// Environment:
+//
+// Revision History:
+//
+//--
+
+#include "ksalpha.h"
+
+
+        SBTTL("Compute Checksum")
+//++
+//
+// ULONG
+// tcpxsum (
+//    IN ULONG Checksum,
+//    IN PUSHORT Source,
+//    IN ULONG Length
+//    )
+//
+// Routine Description:
+//
+//    This function computes the checksum of the specified buffer.
+//
+// Arguments:
+//
+//    Checksum (a0) - Supplies the initial checksum value.
+//
+//    Source (a1) - Supplies a pointer to the checksum buffer
+//
+//    Length (a2) - Supplies the length of the buffer in words.
+//
+// Return Value:
+//
+//    The computed checksum is returned as the function value.
+//
+//--
+
+        LEAF_ENTRY(tcpxsum)
+        zap     a0, 0xf0, a0            // clear high half of a0
+        bis     a1, zero, t6            // save initial buffer address
+        bis     zero, zero, v0          // clear accumulated checksum
+
+//
+// Check if the buffer is quadword aligned.
+//
+// If the buffer is not quadword aligned, then add the leading words to the
+// checksum.
+//
+        ldq_u   t0, 0(a1)               // get containing quadword of first part
+        blbc    a1, 10f                 // check for word alignment
+        beq     a2, 65f                 // if zero bytes, don't do anything
+        extbl   t0, a1, t1              // get leading byte
+        sll     t1, 8, v0               // shift it to correct spot for later byte swap
+        addq    a1, 1, a1               // increment buffer to first full word
+        subq    a2, 1, a2               // decrement byte count
+
+10:
+        and     a1, 6, t2               // check if buffer quadword aligned
+        beq     t2, 20f                 // if eq, quadword aligned
+        extql   t0, t2, t0              // extract bytes to checksum
+        and     a1, 7, t3               // compute bytes summed
+        subq    zero, t3, t3
+        addq    t3, 8, t3
+        addq    a1, 8, a1               // advance buffer address to next qword
+        bic     a1, 7, a1               //
+        subq    a2, t3, t2
+        blt     t2, 55f                 // if ltz, too many, jump to residual code
+
+        addq    v0, t0, v0              // add bytes to partial checksum
+        cmpult  v0, t0, t1              // generate carry
+        addq    t1, v0, v0              // add carry back into checksum
+
+        bis     t2, zero, a2            // reduce count of bytes to checksum
+        beq     t2, 60f                 // if eq, no more bytes
+
+20:
+//
+// Compute the checksum in 64-byte blocks
+//
+        bic     a2, 7, t4               // subtract out residual bytes
+        beq     t4, 40f                 // if eq, no quadwords to checksum
+        subq    zero, t4, t2            // compute negative of byte count
+        and     t2, 15 << 2, t3         // compute bytes in first iteration
+        ldq     t0, 0(a1)               // get first quadword to checksum
+        beq     t3, 35f                 // if eq, full 64-byte block
+        subq    a1, t3, a1              // bias buffer address by offset
+        bic     t4, 64-1, t4            // subtract out bytes in first iteration
+        lda     t2, 30f                 // get base address of code vector
+        addl    t3, t3, t3              //
+        addq    t3, t2, t2              // compute code vector offset
+        bis     t0, zero, t1            // copy first quadword to checksum
+        jmp     (t2)                    // dispatch
+
+
+30:
+//
+// The following code vector computes the checksum of a 64-byte block.
+//
+.set noreorder
+        ldq     t1, 8(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 16(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 24(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 32(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 40(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 48(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 56(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        addq    a1, 64, a1
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+.set reorder
+
+        beq     t4, 40f                 // if zero, end of block
+
+35:
+        ldq     t0, 0(a1)
+//
+// The following loop is allowed to be reordered by the assembler for
+// optimal scheduling.  It is never branched into.
+//
+        subq    t4, 64, t4              // reduce byte count of longwords
+
+        ldq     t1, 8(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 16(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 24(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 32(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 40(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        ldq     t0, 48(a1)
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        ldq     t1, 56(a1)
+        addq    v0, t0, v0
+        cmpult  v0, t0, t2
+        addq    v0, t2, v0
+
+        addq    a1, 64, a1
+        addq    v0, t1, v0
+        cmpult  v0, t1, t2
+        addq    v0, t2, v0
+
+        bne     t4, 35b                 // if ne zero, not end of block
+
+40:
+//
+// Check for any remaining bytes.
+//
+        and     a2, 7, a2               // isolate residual bytes
+        beq     a2, 60f                 // if eq, no residual bytes
+50:
+//
+// Checksum remaining bytes.
+//
+// The technique we use here is to load the final quadword, then
+// zero out the bytes that are not included.
+//
+        ldq     t0, 0(a1)               // get quadword surrounding remainder
+55:
+        ornot   zero, zero, t1          // get FF mask
+        sll     t1, a2, t2              // shift to produce byte mask
+        zap     t0, t2, t0              // zero out bytes past end of buffer
+        addq    v0, t0, v0              // add quadword to partial checksum
+        cmpult  v0, t0, t1              // generate carry
+        addq    t1, v0, v0              // add carry back into checksum
+60:
+//
+// Byte swap the 64-bit checksum if the start of the buffer was not word aligned
+//
+        blbc    t6, 65f
+        zap     v0, 0xAA, t0            // isolate even bytes
+        sll     t0, 8, t0               // shift even bytes into odd positions
+        srl     v0, 8, t1               // shift odd bytes into even positions
+        zap     t1, 0xAA, t1            // isolate odd bytes
+        bis     t0, t1, v0              // merge bytes back together
+
+65:
+//
+// add computed checksum to original checksum, and fold the 64-bit
+// result down to 16 bits.
+//
+        addq    v0, a0, v0              // add computed checksum to original
+        cmpult  v0, a0, t0              // generate carry
+        addq    v0, t0, v0              // add carry back into checksum
+
+//
+// swap the longwords in order to sum two longwords and their carry in one add.
+//
+        sll     v0, 32, t0              // shift low longword into high
+        srl     v0, 32, t1              // shift high longword into low
+        bis     t1, t0, t5              // merge back together
+
+        addq    v0, t5, t0              // produce sum + carry in high longword
+        srl     t0, 32, t1              // shift back down to low half
+//
+// swap the words in order to sum two words and their carry in one add
+//
+        sll     t1, 16, t2              // shift high word into low
+        srl     t1, 16, t3              // shift low word into high
+        bis     t2, t3, t4              // merge back together
+        addq    t4, t1, t2              // produce sum and carry in high word
+        extwl   t2, 2, v0               // extract result.
+        ret     zero, (ra)              // return
+
+        .end    tcpxsum
+
author	Adam <you@example.com>	2020-05-17 05:51:50 +0200
committer	Adam <you@example.com>	2020-05-17 05:51:50 +0200
commit	e611b132f9b8abe35b362e5870b74bce94a1e58e (patch)
tree	a5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/ntos/tdi/tcpip/tcp/alpha/xsum.s
download	NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2 NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip