summaryrefslogtreecommitdiffstats
path: root/private/ntos/tdi/tcpip/tcp/i386/xsum.asm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--private/ntos/tdi/tcpip/tcp/i386/xsum.asm259
1 files changed, 259 insertions, 0 deletions
diff --git a/private/ntos/tdi/tcpip/tcp/i386/xsum.asm b/private/ntos/tdi/tcpip/tcp/i386/xsum.asm
new file mode 100644
index 000000000..7cb03bce2
--- /dev/null
+++ b/private/ntos/tdi/tcpip/tcp/i386/xsum.asm
@@ -0,0 +1,259 @@
+ title "Compute Checksum"
+
+;/*++
+;
+; Copyright (c) 1992 Microsoft Corporation
+;
+; Module Name:
+;
+; cksy.asm
+;
+; Abstract:
+;
+; This module implements a function to compute the checksum of a buffer.
+;
+; Author:
+;
+; David N. Cutler (davec) 27-Jan-1992
+;
+; Revision History:
+;
+; Who When What
+; -------- -------- ----------------------------------------------
+; mikeab 01-22-94 Pentium optimization
+;
+; Environment:
+;
+; Any mode.
+;
+; Revision History:
+;
+;--*/
+
+LOOP_UNROLLING_BITS equ 5
+LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS)
+
+ .386
+ .model small,c
+
+ assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
+ assume fs:nothing,gs:nothing
+
+ .xlist
+ include callconv.inc
+ include ks386.inc
+ .list
+
+ .code
+
+;++
+;
+; ULONG
+; tcpxsum(
+; IN ULONG cksum,
+; IN PUCHAR buf,
+; IN ULONG len
+; )
+;
+; Routine Description:
+;
+; This function computes the checksum of the specified buffer.
+;
+; Arguments:
+;
+; cksum - Suppiles the initial checksum value, in 16-bit form,
+; with the high word set to 0.
+;
+; buf - Supplies a pointer to the buffer to the checksum buffer.
+;
+; len - Supplies the length of the buffer in bytes.
+;
+; Return Value:
+;
+; The computed checksum in 32-bit two-partial-accumulators form, added to
+; the initial checksum, is returned as the function value.
+;
+;--
+
+cksum equ 12 ; stack offset to initial checksum
+buf equ 16 ; stack offset to source address
+len equ 20 ; stack offset to length in words
+
+to_checksum_last_word:
+ jmp checksum_last_word
+
+to_checksum_done:
+ jmp checksum_done
+
+to_checksum_dword_loop_done:
+ jmp checksum_dword_loop_done
+
+cPublicProc tcpxsum,3
+
+ push ebx ; save nonvolatile register
+ push esi ; save nonvolatile register
+
+ mov ecx,[esp + len] ; get length in bytes
+ sub eax,eax ; clear computed checksum
+ test ecx,ecx ; any bytes to checksum at all?
+ jz short to_checksum_done ; no bytes to checksum
+
+;
+; if the checksum buffer is not word aligned, then add the first byte of
+; the buffer to the input checksum.
+;
+
+ mov esi,[esp + buf] ; get source address
+ sub edx,edx ; set up to load word into EDX below
+ test esi,1 ; check if buffer word aligned
+ jz short checksum_word_aligned ; if zf, buffer word aligned
+ mov ah,[esi] ; get first byte (we know we'll have
+ ; to swap at the end)
+ inc esi ; increment buffer address
+ dec ecx ; decrement number of bytes
+ jz short to_checksum_done ; if zf set, no more bytes
+
+;
+; If the buffer is not an even number of of bytes, then initialize
+; the computed checksum with the last byte of the buffer.
+;
+
+checksum_word_aligned: ;
+ shr ecx,1 ; convert to word count
+ jnc short checksum_start ; if nc, even number of bytes
+ mov al,[esi+ecx*2] ; initialize the computed checksum
+ jz short to_checksum_done ; if zf set, no more bytes
+
+;
+; Compute checksum in large blocks of dwords, with one partial word up front if
+; necessary to get dword alignment, and another partial word at the end if
+; needed.
+;
+
+;
+; Compute checksum on the leading word, if that's necessary to get dword
+; alignment.
+;
+
+checksum_start: ;
+ test esi,02h ; check if source dword aligned
+ jz short checksum_dword_aligned ; source is already dword aligned
+ mov dx,[esi] ; get first word to checksum
+ add esi,2 ; update source address
+ add eax,edx ; update partial checksum
+ ; (no carry is possible, because EAX
+ ; and EDX are both 16-bit values)
+ dec ecx ; count off this word (zero case gets
+ ; picked up below)
+
+;
+; Checksum as many words as possible by processing a dword at a time.
+;
+
+checksum_dword_aligned:
+ push ecx ; so we can tell if there's a trailing
+ ; word later
+ shr ecx,1 ; # of dwords to checksum
+ jz short to_checksum_last_word ; no dwords to checksum
+
+ mov edx,[esi] ; preload the first dword
+ add esi,4 ; point to the next dword
+ dec ecx ; count off the dword we just loaded
+ jz short to_checksum_dword_loop_done
+ ; skip the loop if that was the only
+ ; dword
+ mov ebx,ecx ; EBX = # of dwords left to checksum
+ add ecx,LOOP_UNROLLING-1 ; round up loop count
+ shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled
+ ; loop count
+ and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first
+ ; loop
+ jz short checksum_dword_loop ; special-case when no partial loop,
+ ; because fixup below doesn't work
+ ; in that case (carry flag is
+ ; cleared at this point, as required
+ ; at loop entry)
+ lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
+ ; adjust buffer pointer back to
+ ; compensate for hardwired displacement
+ ; at loop entry point
+ ; ***doesn't change carry flag***
+ jmp loop_entry[ebx*4] ; enter the loop to do the first,
+ ; partial iteration, after which we can
+ ; just do 64-word blocks
+ ; ***doesn't change carry flag***
+
+checksum_dword_loop:
+
+DEFLAB macro pre,suf
+pre&suf:
+ endm
+
+TEMP=0
+ REPT LOOP_UNROLLING
+ deflab loop_entry_,%TEMP
+ adc eax,edx
+ mov edx,[esi + TEMP]
+TEMP=TEMP+4
+ ENDM
+
+checksum_dword_loop_end:
+
+ lea esi,[esi + LOOP_UNROLLING * 4] ; update source address
+ ; ***doesn't change carry flag***
+ dec ecx ; count off unrolled loop iteration
+ ; ***doesn't change carry flag***
+ jnz checksum_dword_loop ; do more blocks
+
+checksum_dword_loop_done label proc
+ adc eax,edx ; finish dword checksum
+ mov edx,0 ; prepare to load trailing word
+ adc eax,edx
+
+;
+; Compute checksum on the trailing word, if there is one.
+; High word of EDX = 0 at this point
+; Carry flag set iff there's a trailing word to do at this point
+;
+
+checksum_last_word label proc ; "proc" so not scoped to function
+ pop ecx ; get back word count
+ test ecx,1 ; is there a trailing word?
+ jz short checksum_done ; no trailing word
+ add ax,[esi] ; add in the trailing word
+ adc eax,0 ;
+
+checksum_done label proc ; "proc" so not scoped to function
+ mov ecx,eax ; fold the checksum to 16 bits
+ ror ecx,16
+ add eax,ecx
+ mov ebx,[esp + buf]
+ shr eax,16
+ test ebx,1 ; check if buffer word aligned
+ jz short checksum_combine ; if zf set, buffer word aligned
+ ror ax,8 ; byte aligned--swap bytes back
+checksum_combine label proc ; "proc" so not scoped to function
+ add ax,word ptr [esp + cksum] ; combine checksums
+ pop esi ; restore nonvolatile register
+ adc eax,0 ;
+ pop ebx ; restore nonvolatile register
+ stdRET tcpxsum
+
+
+REFLAB macro pre,suf
+ dd pre&suf
+ endm
+
+ align 4
+loop_entry label dword
+ dd 0
+TEMP=LOOP_UNROLLING*4
+ REPT LOOP_UNROLLING-1
+TEMP=TEMP-4
+ reflab loop_entry_,%TEMP
+ ENDM
+
+stdENDP tcpxsum
+
+ end
+ \ No newline at end of file