diff options
Diffstat (limited to '')
-rw-r--r-- | private/ntos/tdi/tcpip/tcp/i386/xsum.asm | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/private/ntos/tdi/tcpip/tcp/i386/xsum.asm b/private/ntos/tdi/tcpip/tcp/i386/xsum.asm new file mode 100644 index 000000000..7cb03bce2 --- /dev/null +++ b/private/ntos/tdi/tcpip/tcp/i386/xsum.asm @@ -0,0 +1,259 @@ + title "Compute Checksum" + +;/*++ +; +; Copyright (c) 1992 Microsoft Corporation +; +; Module Name: +; +; cksy.asm +; +; Abstract: +; +; This module implements a function to compute the checksum of a buffer. +; +; Author: +; +; David N. Cutler (davec) 27-Jan-1992 +; +; Revision History: +; +; Who When What +; -------- -------- ---------------------------------------------- +; mikeab 01-22-94 Pentium optimization +; +; Environment: +; +; Any mode. +; +; Revision History: +; +;--*/ + +LOOP_UNROLLING_BITS equ 5 +LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS) + + .386 + .model small,c + + assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT + assume fs:nothing,gs:nothing + + .xlist + include callconv.inc + include ks386.inc + .list + + .code + +;++ +; +; ULONG +; tcpxsum( +; IN ULONG cksum, +; IN PUCHAR buf, +; IN ULONG len +; ) +; +; Routine Description: +; +; This function computes the checksum of the specified buffer. +; +; Arguments: +; +; cksum - Suppiles the initial checksum value, in 16-bit form, +; with the high word set to 0. +; +; buf - Supplies a pointer to the buffer to the checksum buffer. +; +; len - Supplies the length of the buffer in bytes. +; +; Return Value: +; +; The computed checksum in 32-bit two-partial-accumulators form, added to +; the initial checksum, is returned as the function value. +; +;-- + +cksum equ 12 ; stack offset to initial checksum +buf equ 16 ; stack offset to source address +len equ 20 ; stack offset to length in words + +to_checksum_last_word: + jmp checksum_last_word + +to_checksum_done: + jmp checksum_done + +to_checksum_dword_loop_done: + jmp checksum_dword_loop_done + +cPublicProc tcpxsum,3 + + push ebx ; save nonvolatile register + push esi ; save nonvolatile register + + mov ecx,[esp + len] ; get length in bytes + sub eax,eax ; clear computed checksum + test ecx,ecx ; any bytes to checksum at all? + jz short to_checksum_done ; no bytes to checksum + +; +; if the checksum buffer is not word aligned, then add the first byte of +; the buffer to the input checksum. +; + + mov esi,[esp + buf] ; get source address + sub edx,edx ; set up to load word into EDX below + test esi,1 ; check if buffer word aligned + jz short checksum_word_aligned ; if zf, buffer word aligned + mov ah,[esi] ; get first byte (we know we'll have + ; to swap at the end) + inc esi ; increment buffer address + dec ecx ; decrement number of bytes + jz short to_checksum_done ; if zf set, no more bytes + +; +; If the buffer is not an even number of of bytes, then initialize +; the computed checksum with the last byte of the buffer. +; + +checksum_word_aligned: ; + shr ecx,1 ; convert to word count + jnc short checksum_start ; if nc, even number of bytes + mov al,[esi+ecx*2] ; initialize the computed checksum + jz short to_checksum_done ; if zf set, no more bytes + +; +; Compute checksum in large blocks of dwords, with one partial word up front if +; necessary to get dword alignment, and another partial word at the end if +; needed. +; + +; +; Compute checksum on the leading word, if that's necessary to get dword +; alignment. +; + +checksum_start: ; + test esi,02h ; check if source dword aligned + jz short checksum_dword_aligned ; source is already dword aligned + mov dx,[esi] ; get first word to checksum + add esi,2 ; update source address + add eax,edx ; update partial checksum + ; (no carry is possible, because EAX + ; and EDX are both 16-bit values) + dec ecx ; count off this word (zero case gets + ; picked up below) + +; +; Checksum as many words as possible by processing a dword at a time. +; + +checksum_dword_aligned: + push ecx ; so we can tell if there's a trailing + ; word later + shr ecx,1 ; # of dwords to checksum + jz short to_checksum_last_word ; no dwords to checksum + + mov edx,[esi] ; preload the first dword + add esi,4 ; point to the next dword + dec ecx ; count off the dword we just loaded + jz short to_checksum_dword_loop_done + ; skip the loop if that was the only + ; dword + mov ebx,ecx ; EBX = # of dwords left to checksum + add ecx,LOOP_UNROLLING-1 ; round up loop count + shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled + ; loop count + and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first + ; loop + jz short checksum_dword_loop ; special-case when no partial loop, + ; because fixup below doesn't work + ; in that case (carry flag is + ; cleared at this point, as required + ; at loop entry) + lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)] + ; adjust buffer pointer back to + ; compensate for hardwired displacement + ; at loop entry point + ; ***doesn't change carry flag*** + jmp loop_entry[ebx*4] ; enter the loop to do the first, + ; partial iteration, after which we can + ; just do 64-word blocks + ; ***doesn't change carry flag*** + +checksum_dword_loop: + +DEFLAB macro pre,suf +pre&suf: + endm + +TEMP=0 + REPT LOOP_UNROLLING + deflab loop_entry_,%TEMP + adc eax,edx + mov edx,[esi + TEMP] +TEMP=TEMP+4 + ENDM + +checksum_dword_loop_end: + + lea esi,[esi + LOOP_UNROLLING * 4] ; update source address + ; ***doesn't change carry flag*** + dec ecx ; count off unrolled loop iteration + ; ***doesn't change carry flag*** + jnz checksum_dword_loop ; do more blocks + +checksum_dword_loop_done label proc + adc eax,edx ; finish dword checksum + mov edx,0 ; prepare to load trailing word + adc eax,edx + +; +; Compute checksum on the trailing word, if there is one. +; High word of EDX = 0 at this point +; Carry flag set iff there's a trailing word to do at this point +; + +checksum_last_word label proc ; "proc" so not scoped to function + pop ecx ; get back word count + test ecx,1 ; is there a trailing word? + jz short checksum_done ; no trailing word + add ax,[esi] ; add in the trailing word + adc eax,0 ; + +checksum_done label proc ; "proc" so not scoped to function + mov ecx,eax ; fold the checksum to 16 bits + ror ecx,16 + add eax,ecx + mov ebx,[esp + buf] + shr eax,16 + test ebx,1 ; check if buffer word aligned + jz short checksum_combine ; if zf set, buffer word aligned + ror ax,8 ; byte aligned--swap bytes back +checksum_combine label proc ; "proc" so not scoped to function + add ax,word ptr [esp + cksum] ; combine checksums + pop esi ; restore nonvolatile register + adc eax,0 ; + pop ebx ; restore nonvolatile register + stdRET tcpxsum + + +REFLAB macro pre,suf + dd pre&suf + endm + + align 4 +loop_entry label dword + dd 0 +TEMP=LOOP_UNROLLING*4 + REPT LOOP_UNROLLING-1 +TEMP=TEMP-4 + reflab loop_entry_,%TEMP + ENDM + +stdENDP tcpxsum + + end +
\ No newline at end of file |