1 files changed, 354 insertions, 0 deletions
diff --git a/private/crt32/string/i386/memcpy.asm b/private/crt32/string/i386/memcpy.asm
new file mode 100644
index 000000000..4cde3cf72
--- /dev/null
+++ b/private/crt32/string/i386/memcpy.asm
@@ -0,0 +1,354 @@
+        page    ,132
+        title   memcpy - Copy source memory bytes to destination
+;***
+;memcpy.asm - contains memcpy and memmove routines
+;
+;       Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
+;
+;Purpose:
+;       memcpy() copies a source memory buffer to a destination buffer.
+;       Overlapping buffers are not treated specially, so propogation may occur.
+;       memmove() copies a source memory buffer to a destination buffer.
+;       Overlapping buffers are treated specially, to avoid propogation.
+;
+;Revision History:
+;       02-06-87  JCR   Added memmove entry
+;       04-08-87  JCR   Conditionalized memmove/memcpy entries
+;       06-30-87  SKS   Rewritten for speed and size
+;       08-21-87  SKS   Fix return value for overlapping copies
+;       05-17-88  SJM   Add model-independent (large model) ifdef
+;       08-04-88  SJM   convert to cruntime/ add 32-bit support
+;       08-19-88  JCR   Minor 386 corrections/enhancements
+;       10-25-88  JCR   General cleanup for 386-only code
+;       03-23-90  GJF   Changed to _stdcall. Also, fixed the copyright.
+;       05-10-91  GJF   Back to _cdecl, sigh...
+;       11-13-92  SRW   Make it fast with unaligned arguments
+;
+;*******************************************************************************
+
+        .xlist
+        include cruntime.inc
+        .list
+
+M_EXIT  macro
+        mov     eax,[dst]       ; return pointer to destination
+ifdef   _STDCALL_
+        ret     2*DPSIZE + ISIZE ; _stdcall return
+else
+        ret                     ; _cdecl return
+endif
+        endm    ; M_EXIT
+
+        CODESEG
+
+page
+;***
+;memcpy - Copy source buffer to destination buffer
+;
+;Purpose:
+;       memcpy() copies a source memory buffer to a destination memory buffer.
+;       This routine does NOT recognize overlapping buffers, and thus can lead
+;       to propogation.
+;       For cases where propogation must be avoided, memmove() must be used.
+;
+;       Algorithm:
+;
+;       void * memcpy(void * dst, void * src, size_t count)
+;       {
+;               void * ret = dst;
+;
+;               /*
+;                * copy from lower addresses to higher addresses
+;                */
+;               while (count--)
+;                       *dst++ = *src++;
+;
+;               return(ret);
+;       }
+;
+;memmove - Copy source buffer to destination buffer
+;
+;Purpose:
+;       memmove() copies a source memory buffer to a destination memory buffer.
+;       This routine recognize overlapping buffers to avoid propogation.
+;       For cases where propogation is not a problem, memcpy() can be used.
+;
+;   Algorithm:
+;
+;       void * memmove(void * dst, void * src, size_t count)
+;       {
+;               void * ret = dst;
+;
+;               if (dst <= src || dst >= (src + count)) {
+;                       /*
+;                        * Non-Overlapping Buffers
+;                        * copy from lower addresses to higher addresses
+;                        */
+;                       while (count--)
+;                               *dst++ = *src++;
+;                       }
+;               else {
+;                       /*
+;                        * Overlapping Buffers
+;                        * copy from higher addresses to lower addresses
+;                        */
+;                       dst += count - 1;
+;                       src += count - 1;
+;
+;                       while (count--)
+;                               *dst-- = *src--;
+;                       }
+;
+;               return(ret);
+;       }
+;
+;
+;Entry:
+;       void *dst = pointer to destination buffer
+;       const void *src = pointer to source buffer
+;       size_t count = number of bytes to copy
+;
+;Exit:
+;       Returns a pointer to the destination buffer in AX/DX:AX
+;
+;Uses:
+;       CX, DX
+;
+;Exceptions:
+;*******************************************************************************
+
+ifdef	MEM_MOVE
+        _MEM_     equ <memmove>
+else
+ 	_MEM_	  equ <memcpy>
+endif
+
+%       public  _MEM_
+_MEM_   proc \
+        uses edi esi, \
+        dst:ptr byte, \
+        src:ptr byte, \
+        count:IWORD
+
+              ; destination pointer
+              ; source pointer
+              ; number of bytes to copy
+
+        mov     esi,[src]       ; esi = source
+        mov     edi,[dst]       ; edi = dest
+        mov     ecx,[count]     ; ecx = number of bytes to move
+
+;
+; Check for overlapping buffers:
+;       If (dst <= src) Or (dst >= src + Count) Then
+;               Do normal (Upwards) Copy
+;       Else
+;               Do Downwards Copy to avoid propagation
+;
+
+        cmp     edi,esi         ; dst <= src ?
+        jbe     short CopyUp    ; yes, copy toward higher addresses
+
+        mov     eax,esi
+        add     eax,ecx
+        cmp     edi,eax         ; dst >= (src + count) ?
+        jnae    CopyDown        ; no, copy toward lower addresses
+
+;
+; Copy toward higher addresses.
+;
+CopyUp:
+
+;
+; The algorithm for forward moves is to align the destination to a dword
+; boundary and so we can move dwords with an aligned destination.  This
+; occurs in 3 steps.
+;
+;   - move x = ((4 - Dest & 3) & 3) bytes
+;   - move y = ((L-x) >> 2) dwords
+;   - move (L - x - y*4) bytes
+;
+        test    edi,11b          ; destination dword aligned?
+        jnz     short byterampup ; if we are not dword aligned already, align
+
+        mov     edx,ecx         ; byte count
+        and     edx,11b         ; trailing byte count
+        shr     ecx,2           ; shift down to dword count
+        rep     movsd           ; move all of our dwords
+
+        jmp     dword ptr TrailingVecs[edx*4]
+
+        align   @WordSize
+TrailingVecs    dd      Trail0, Trail1, Trail2, Trail3
+
+        align   @WordSize
+Trail3:
+        mov     ax,[esi]
+        mov     [edi],ax
+        mov     al,[esi+2]
+        mov     [edi+2],al
+
+        M_EXIT
+
+        align   @WordSize
+Trail2:
+        mov     ax,[esi]
+        mov     [edi],ax
+
+        M_EXIT
+
+        align   @WordSize
+Trail1:
+        mov     al,[esi]
+        mov     [edi],al
+
+Trail0:
+        M_EXIT
+
+;
+; Code to do optimal memory copies for non-dword-aligned destinations.
+;
+        align   @WordSize
+byterampup:
+
+; The following length check is done for two reasons:
+;
+;    1. to ensure that the actual move length is greater than any possiale
+;       alignment move, and
+;
+;    2. to skip the multiple move logic for small moves where it would
+;       be faster to move the bytes with one instruction.
+;
+; Leading bytes could be handled faster via split-out optimizations and
+; a jump table (as trailing bytes are), at the cost of size.
+;
+; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
+; bytes to copy.
+;
+        cmp     ecx,12                  ; check for reasonable length
+        jbe     short ShortMove         ; do short move if appropriate
+        mov     edx,edi
+        neg     edx
+        and     edx,11b                 ; # of leading bytes
+        sub     ecx,edx                 ; subtract out leading bytes
+        mov     eax,ecx                 ; # of bytes remaining after leading
+        mov     ecx,edx                 ; # of leading bytes
+        rep     movsb                   ; copy leading bytes
+        mov     ecx,eax                 ; compute number of dwords to move
+        and     eax,11b                 ; # of trailing bytes
+        shr     ecx,2                   ; # of whole dwords
+        rep     movsd                   ; move whole dwords
+        jmp     dword ptr TrailingVecs[eax*4] ; copy trailing bytes
+
+;
+; Simple copy, byte at a time. This could be faster with a jump table and
+; split-out optimizations, copying as much as possible a dword/word at a
+; time and using MOV with displacements, but such short cases are unlikely
+; to be called often (it seems silly to call a function to copy less than
+; three dwords).
+;
+        align   @WordSize
+ShortMove:
+        rep movsb
+
+        M_EXIT
+
+;
+; Copy down to avoid propogation in overlapping buffers.
+;
+        align   @WordSize
+CopyDown:
+        std                     ; Set Direction Flag = Down
+        add     esi,ecx         ; point to byte after end of source buffer
+        add     edi,ecx         ; point to byte after end of dest buffer
+;
+; See if the destination start is dword aligned
+;
+
+        test    edi,11b
+        jnz     short byterampup_copydown       ; not dword aligned
+;
+; Destination start is dword aligned
+;
+        mov     edx,ecx         ; set aside count of bytes to copy
+        and     edx,11b         ; # of trailing bytes to copy
+        sub     esi,4           ; point to start of first dword to copy
+        sub     edi,4           ; point to start of first dword to copy to
+        shr     ecx,2           ; dwords to copy
+        rep     movsd           ; copy as many dwords as possible
+        jmp     dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes
+
+        align   @WordSize
+TrailingVecs_copydown   label   dword
+        dd      Trail0_copydown
+        dd      Trail1_copydown
+        dd      Trail2_copydown
+        dd      Trail3_copydown
+
+        align   @WordSize
+Trail3_copydown:
+        mov     ax,[esi+2]
+        mov     [edi+2],ax
+        mov     al,[esi+1]
+        mov     [edi+1],al
+        cld                     ; Set Direction Flag = Up
+
+        M_EXIT
+
+        align   @WordSize
+Trail2_copydown:
+        mov     ax,[esi+2]
+        mov     [edi+2],ax
+        cld                     ; Set Direction Flag = Up
+
+        M_EXIT
+
+        align   @WordSize
+Trail1_copydown:
+        mov     al,[esi+3]
+        mov     [edi+3],al
+Trail0_copydown:
+        cld                     ; Set Direction Flag = Up
+
+        M_EXIT
+
+;
+; Destination start is not dword aligned.
+;
+; Leading bytes could be handled faster via split-out optimizations and
+; a jump table (as trailing bytes are), at the cost of size.
+;
+; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
+; bytes to copy.
+;
+        align   @WordSize
+byterampup_copydown:
+        dec     esi             ; point to first leading src byte
+        dec     edi             ; point to first leading dest byte
+        cmp     ecx,12          ; check for reasonable length
+        jbe     short ShortMove_copydown ; do short move if appropriate
+        neg     edx
+        and     edx,11b
+        sub     ecx,edx         ; # of bytes after leading bytes
+        mov     eax,ecx         ; set aside # of bytes remaining
+        mov     ecx,edx         ; # of leading bytes
+        rep     movsb           ; copy leading odd bytes
+        mov     ecx,eax         ; # of remaining bytes
+        and     eax,11b         ; # of trailing bytes
+        sub     esi,3           ; point to start of first whole src dword
+        sub     edi,3           ; point to start of first whole dest dword
+        shr     ecx,2           ; # of whole dwords
+        rep     movsd           ; copy whole dwords
+        jmp     dword ptr TrailingVecs_copydown[eax*4]
+
+        align   @WordSize
+ShortMove_copydown:
+        rep     movsb
+        cld                     ; Set Direction Flag = Up
+
+        M_EXIT
+
+_MEM_   endp
+        end
+
+