summaryrefslogtreecommitdiffstats
path: root/private/ntos/tdi/tcpip/tcp/ppc/xsum.s
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--private/ntos/tdi/tcpip/tcp/ppc/xsum.s257
1 files changed, 257 insertions, 0 deletions
diff --git a/private/ntos/tdi/tcpip/tcp/ppc/xsum.s b/private/ntos/tdi/tcpip/tcp/ppc/xsum.s
new file mode 100644
index 000000000..831ef6b53
--- /dev/null
+++ b/private/ntos/tdi/tcpip/tcp/ppc/xsum.s
@@ -0,0 +1,257 @@
+// TITLE("Compute Checksum")
+//++
+//
+// Copyright (c) 1994 IBM Corporation
+//
+// Module Name:
+//
+// xsum.s
+//
+// Abstract:
+//
+// This module implement a function to compute the checksum of a buffer.
+//
+// Author:
+//
+// David N. Cutler (davec) 27-Jan-1992
+//
+// Environment:
+//
+// User mode.
+//
+// Revision History:
+//
+// Michael W. Thomas 02/14/94 Converted from MIPS
+// Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and
+// optimized for PowerPC.
+//
+//--
+
+#include "ksppc.h"
+
+ SBTTL("Compute Checksum")
+//++
+//
+// ULONG
+// tcpxsum (
+// IN ULONG Checksum,
+// IN PUCHAR Source,
+// IN ULONG Length
+// )
+//
+// Routine Description:
+//
+// This function computes the checksum of the specified buffer.
+//
+// N.B. The checksum is the 16 bit checksum of the 16 bit aligned
+// buffer. If the buffer is not 16 bit aligned the first byte is
+// moved to high order position to be added to the correct half.
+//
+// Arguments:
+//
+// Checksum (r3) - Supplies the initial checksum value.
+//
+// Source (r4) - Supplies a pointer to the checksum buffer.
+//
+// Length (r5) - Supplies the length of the buffer in bytes.
+//
+// Return Value:
+//
+// The computed checksum is returned as the function value.
+//
+//--
+
+ LEAF_ENTRY(tcpxsum)
+
+ cmpwi r.5, 0 // check if bytes to checksum
+ mtcrf 0x01, r.4 // set up for alignment check
+ li r.6, 0 // initialize partial checksum
+ beqlr- // return if no bytes to checksum
+
+ andi. r.7, r.5, 1 // check if length is even
+ crmove 7, 31 // remember original alignment
+ bf 31, evenalign // jif 16 bit aligned
+
+//
+// Initialize the checksum to the first byte shifted up a byte.
+//
+ lbz r.6, 0(r.4) // get first byte of buffer
+ subi r.5, r.5, 1 // reduce count of bytes to checksum
+ cmpwi cr.6, r.5, 0 // check if done
+ crnot eq, eq // invert odd/even length check
+ addi r.4, r.4, 1 // advance buffer address
+ mtcrf 0x01, r.4 // reset 32 bit alignment check
+ slwi r.6, r.6, 8 // shift byte up in computed checksum
+ // max current checksum is 0x0ff00
+ beq cr.6, combine // jif no more bytes to checksum
+
+evenalign:
+
+//
+// Check if the length of the buffer is an even number of bytes.
+//
+// If the buffer is not an even number of bytes, add the last byte to the
+// computed checksum.
+//
+
+ beq evenlength
+ subic. r.5, r.5, 1 // reduce count of bytes to checksum
+ lbzx r.7, r.4, r.5 // get last byte from buffer
+ add r.6, r.6, r.7 // add last byte to computed checksum
+ // max current checksum is 0x0ffff
+ beq combine // jif no more bytes in buffer
+
+evenlength:
+
+//
+// Check if we are 4 byte aligned, if not add first 2 byte word into
+// checksum so the buffer is then 4 byte aligned.
+//
+
+ bf 30, fourbytealigned // jif 4 byte aligned
+
+ lhz r.7, 0(r.4) // get 2 byte word
+ subic. r.5, r.5, 2 // reduce length
+ addi r.4, r.4, 2 // bump address
+ add r.6, r.6, r.7 // add 2 bytes to computed checksum
+ // max current checksum is 0x1fffe
+ beq combine // jif no more bytes to checksum
+
+//
+// Attempt to sum the remainder of the buffer in sets of 32 bytes. This
+// should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock
+// on 604. (A seperate implementation will be required to take advantage
+// of 64 bit loads on the 620).
+//
+
+fourbytealigned:
+
+ srwi. r.7, r.5, 5 // get count of 32 byte sets
+ mtcrf 0x03, r.5 // break length into block for
+ // various run lengths.
+ subi r.4, r.4, 4 // adjust buffer address for lwzu
+ mtctr r.7
+ addic r.6, r.6, 0 // clear carry bit
+ beq try16 // jif no 32 byte sets
+
+do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set
+ lwz r.9, 8(r.4) // get 2nd 4
+ adde r.6, r.6, r.8 // add 1st 4 to checksum
+ lwz r.10, 12(r.4) // get 3rd 4
+ adde r.6, r.6, r.9 // add 2nd 4
+ lwz r.11, 16(r.4) // get 4th 4
+ adde r.6, r.6, r.10 // add 3rd 4
+ lwz r.8, 20(r.4) // get 5th 4
+ adde r.6, r.6, r.11 // add 4th 4
+ lwz r.9, 24(r.4) // get 6th 4
+ adde r.6, r.6, r.8 // add 5th 4
+ lwz r.10, 28(r.4) // get 7th 4
+ adde r.6, r.6, r.9 // add 6th 4
+ lwzu r.11, 32(r.4) // get 8th 4 and update address
+ adde r.6, r.6, r.10 // add 7th 4
+ adde r.6, r.6, r.11 // add 8th 4
+ bdnz do32
+
+try16: bf 27, try8 // jif no 16 byte block
+
+ lwz r.8, 4(r.4) // get 1st 4
+ lwz r.9, 8(r.4) // get 2nd 4
+ adde r.6, r.6, r.8 // add 1st 4
+ lwz r.10, 12(r.4) // get 3rd 4
+ adde r.6, r.6, r.9 // add 2nd 4
+ lwzu r.11, 16(r.4) // get 4th 4 and update address
+ adde r.6, r.6, r.10 // add 3rd 4
+ adde r.6, r.6, r.11 // add 4th 4
+
+try8: bf 28, try4 // jif no 8 byte block
+ lwz r.8, 4(r.4) // get 1st 4
+ lwzu r.9, 8(r.4) // get 2nd 4 and update address
+ adde r.6, r.6, r.8 // add 1st 4
+ adde r.6, r.6, r.9 // add 2nd 4
+
+try4: bf 29, try2 // jif no 4 byte block
+ lwzu r.8, 4(r.4) // get 4 bytes and update address
+ adde r.6, r.6, r.8
+
+try2: bf 30, fold // jif no 2 byte block
+
+//
+// At this point, r.4 is pointing at the last 4 byte block processed (or
+// not processed if there were no 4 byte blocks). We need to add when we
+// pull the last two bytes.
+//
+ lhz r.8, 4(r.4) // get last two bytes
+ adde r.6, r.6, r.8 // add last two bytes
+
+//
+// Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum.
+//
+
+fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper)
+ rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower)
+ adde r.6, r.6, r.7 // upper + lower + carry
+ // max current checksum is 0x1ffff
+
+//
+// Combine input checksum and partial checksum.
+//
+// If the input buffer was byte aligned, then word swap bytes in computed
+// checksum before combination with input chewcksum.
+//
+
+combine:
+
+ bf 7, waseven // jif original alignment was 16 bit
+
+//
+// Swap bytes within upper and lower halves.
+// eg: AA BB CC DD becomes BB AA DD CC
+//
+// As the current maximum partial checksum is 0x1ffff don't worry about AA.
+// ie: want BB 00 DD CC
+//
+
+ rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD
+ rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC
+
+waseven:
+
+ add r.3, r.3, r.6 // combine checksums
+ // max current checksum is 0x101fffe
+ rotlwi r.4, r.3, 16 // swap checksum words
+ add r.3, r.3, r.4 // add words with carry into high word
+ srwi r.3, r.3, 16 // extract final checksum
+
+ LEAF_EXIT(tcpxsum)
+
+ .debug$S
+ .ualong 1
+
+ .uashort 15
+ .uashort 0x9 # S_OBJNAME
+ .ualong 0
+ .byte 8, "xsum.obj"
+
+ .uashort 24
+ .uashort 0x1 # S_COMPILE
+ .byte 0x42 # Target processor = PPC 604
+ .byte 3 # Language = ASM
+ .byte 0
+ .byte 0
+ .byte 17, "PowerPC Assembler"
+
+ .uashort 43
+ .uashort 0x205 # S_GPROC32
+ .ualong 0
+ .ualong 0
+ .ualong 0
+ .ualong tcpxsum.end-..tcpxsum
+ .ualong 0
+ .ualong tcpxsum.end-..tcpxsum
+ .ualong [secoff]..tcpxsum
+ .uashort [secnum]..tcpxsum
+ .uashort 0x1000
+ .byte 0x00
+ .byte 7, "tcpxsum"
+
+ .uashort 2, 0x6 # S_END