summaryrefslogtreecommitdiffstats
path: root/private/crt32/string/i386/memcpy.asm
diff options
context:
space:
mode:
Diffstat (limited to 'private/crt32/string/i386/memcpy.asm')
-rw-r--r--private/crt32/string/i386/memcpy.asm354
1 files changed, 354 insertions, 0 deletions
diff --git a/private/crt32/string/i386/memcpy.asm b/private/crt32/string/i386/memcpy.asm
new file mode 100644
index 000000000..4cde3cf72
--- /dev/null
+++ b/private/crt32/string/i386/memcpy.asm
@@ -0,0 +1,354 @@
+ page ,132
+ title memcpy - Copy source memory bytes to destination
+;***
+;memcpy.asm - contains memcpy and memmove routines
+;
+; Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
+;
+;Purpose:
+; memcpy() copies a source memory buffer to a destination buffer.
+; Overlapping buffers are not treated specially, so propogation may occur.
+; memmove() copies a source memory buffer to a destination buffer.
+; Overlapping buffers are treated specially, to avoid propogation.
+;
+;Revision History:
+; 02-06-87 JCR Added memmove entry
+; 04-08-87 JCR Conditionalized memmove/memcpy entries
+; 06-30-87 SKS Rewritten for speed and size
+; 08-21-87 SKS Fix return value for overlapping copies
+; 05-17-88 SJM Add model-independent (large model) ifdef
+; 08-04-88 SJM convert to cruntime/ add 32-bit support
+; 08-19-88 JCR Minor 386 corrections/enhancements
+; 10-25-88 JCR General cleanup for 386-only code
+; 03-23-90 GJF Changed to _stdcall. Also, fixed the copyright.
+; 05-10-91 GJF Back to _cdecl, sigh...
+; 11-13-92 SRW Make it fast with unaligned arguments
+;
+;*******************************************************************************
+
+ .xlist
+ include cruntime.inc
+ .list
+
+M_EXIT macro
+ mov eax,[dst] ; return pointer to destination
+ifdef _STDCALL_
+ ret 2*DPSIZE + ISIZE ; _stdcall return
+else
+ ret ; _cdecl return
+endif
+ endm ; M_EXIT
+
+ CODESEG
+
+page
+;***
+;memcpy - Copy source buffer to destination buffer
+;
+;Purpose:
+; memcpy() copies a source memory buffer to a destination memory buffer.
+; This routine does NOT recognize overlapping buffers, and thus can lead
+; to propogation.
+; For cases where propogation must be avoided, memmove() must be used.
+;
+; Algorithm:
+;
+; void * memcpy(void * dst, void * src, size_t count)
+; {
+; void * ret = dst;
+;
+; /*
+; * copy from lower addresses to higher addresses
+; */
+; while (count--)
+; *dst++ = *src++;
+;
+; return(ret);
+; }
+;
+;memmove - Copy source buffer to destination buffer
+;
+;Purpose:
+; memmove() copies a source memory buffer to a destination memory buffer.
+; This routine recognize overlapping buffers to avoid propogation.
+; For cases where propogation is not a problem, memcpy() can be used.
+;
+; Algorithm:
+;
+; void * memmove(void * dst, void * src, size_t count)
+; {
+; void * ret = dst;
+;
+; if (dst <= src || dst >= (src + count)) {
+; /*
+; * Non-Overlapping Buffers
+; * copy from lower addresses to higher addresses
+; */
+; while (count--)
+; *dst++ = *src++;
+; }
+; else {
+; /*
+; * Overlapping Buffers
+; * copy from higher addresses to lower addresses
+; */
+; dst += count - 1;
+; src += count - 1;
+;
+; while (count--)
+; *dst-- = *src--;
+; }
+;
+; return(ret);
+; }
+;
+;
+;Entry:
+; void *dst = pointer to destination buffer
+; const void *src = pointer to source buffer
+; size_t count = number of bytes to copy
+;
+;Exit:
+; Returns a pointer to the destination buffer in AX/DX:AX
+;
+;Uses:
+; CX, DX
+;
+;Exceptions:
+;*******************************************************************************
+
+ifdef MEM_MOVE
+ _MEM_ equ <memmove>
+else
+ _MEM_ equ <memcpy>
+endif
+
+% public _MEM_
+_MEM_ proc \
+ uses edi esi, \
+ dst:ptr byte, \
+ src:ptr byte, \
+ count:IWORD
+
+ ; destination pointer
+ ; source pointer
+ ; number of bytes to copy
+
+ mov esi,[src] ; esi = source
+ mov edi,[dst] ; edi = dest
+ mov ecx,[count] ; ecx = number of bytes to move
+
+;
+; Check for overlapping buffers:
+; If (dst <= src) Or (dst >= src + Count) Then
+; Do normal (Upwards) Copy
+; Else
+; Do Downwards Copy to avoid propagation
+;
+
+ cmp edi,esi ; dst <= src ?
+ jbe short CopyUp ; yes, copy toward higher addresses
+
+ mov eax,esi
+ add eax,ecx
+ cmp edi,eax ; dst >= (src + count) ?
+ jnae CopyDown ; no, copy toward lower addresses
+
+;
+; Copy toward higher addresses.
+;
+CopyUp:
+
+;
+; The algorithm for forward moves is to align the destination to a dword
+; boundary and so we can move dwords with an aligned destination. This
+; occurs in 3 steps.
+;
+; - move x = ((4 - Dest & 3) & 3) bytes
+; - move y = ((L-x) >> 2) dwords
+; - move (L - x - y*4) bytes
+;
+ test edi,11b ; destination dword aligned?
+ jnz short byterampup ; if we are not dword aligned already, align
+
+ mov edx,ecx ; byte count
+ and edx,11b ; trailing byte count
+ shr ecx,2 ; shift down to dword count
+ rep movsd ; move all of our dwords
+
+ jmp dword ptr TrailingVecs[edx*4]
+
+ align @WordSize
+TrailingVecs dd Trail0, Trail1, Trail2, Trail3
+
+ align @WordSize
+Trail3:
+ mov ax,[esi]
+ mov [edi],ax
+ mov al,[esi+2]
+ mov [edi+2],al
+
+ M_EXIT
+
+ align @WordSize
+Trail2:
+ mov ax,[esi]
+ mov [edi],ax
+
+ M_EXIT
+
+ align @WordSize
+Trail1:
+ mov al,[esi]
+ mov [edi],al
+
+Trail0:
+ M_EXIT
+
+;
+; Code to do optimal memory copies for non-dword-aligned destinations.
+;
+ align @WordSize
+byterampup:
+
+; The following length check is done for two reasons:
+;
+; 1. to ensure that the actual move length is greater than any possiale
+; alignment move, and
+;
+; 2. to skip the multiple move logic for small moves where it would
+; be faster to move the bytes with one instruction.
+;
+; Leading bytes could be handled faster via split-out optimizations and
+; a jump table (as trailing bytes are), at the cost of size.
+;
+; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
+; bytes to copy.
+;
+ cmp ecx,12 ; check for reasonable length
+ jbe short ShortMove ; do short move if appropriate
+ mov edx,edi
+ neg edx
+ and edx,11b ; # of leading bytes
+ sub ecx,edx ; subtract out leading bytes
+ mov eax,ecx ; # of bytes remaining after leading
+ mov ecx,edx ; # of leading bytes
+ rep movsb ; copy leading bytes
+ mov ecx,eax ; compute number of dwords to move
+ and eax,11b ; # of trailing bytes
+ shr ecx,2 ; # of whole dwords
+ rep movsd ; move whole dwords
+ jmp dword ptr TrailingVecs[eax*4] ; copy trailing bytes
+
+;
+; Simple copy, byte at a time. This could be faster with a jump table and
+; split-out optimizations, copying as much as possible a dword/word at a
+; time and using MOV with displacements, but such short cases are unlikely
+; to be called often (it seems silly to call a function to copy less than
+; three dwords).
+;
+ align @WordSize
+ShortMove:
+ rep movsb
+
+ M_EXIT
+
+;
+; Copy down to avoid propogation in overlapping buffers.
+;
+ align @WordSize
+CopyDown:
+ std ; Set Direction Flag = Down
+ add esi,ecx ; point to byte after end of source buffer
+ add edi,ecx ; point to byte after end of dest buffer
+;
+; See if the destination start is dword aligned
+;
+
+ test edi,11b
+ jnz short byterampup_copydown ; not dword aligned
+;
+; Destination start is dword aligned
+;
+ mov edx,ecx ; set aside count of bytes to copy
+ and edx,11b ; # of trailing bytes to copy
+ sub esi,4 ; point to start of first dword to copy
+ sub edi,4 ; point to start of first dword to copy to
+ shr ecx,2 ; dwords to copy
+ rep movsd ; copy as many dwords as possible
+ jmp dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes
+
+ align @WordSize
+TrailingVecs_copydown label dword
+ dd Trail0_copydown
+ dd Trail1_copydown
+ dd Trail2_copydown
+ dd Trail3_copydown
+
+ align @WordSize
+Trail3_copydown:
+ mov ax,[esi+2]
+ mov [edi+2],ax
+ mov al,[esi+1]
+ mov [edi+1],al
+ cld ; Set Direction Flag = Up
+
+ M_EXIT
+
+ align @WordSize
+Trail2_copydown:
+ mov ax,[esi+2]
+ mov [edi+2],ax
+ cld ; Set Direction Flag = Up
+
+ M_EXIT
+
+ align @WordSize
+Trail1_copydown:
+ mov al,[esi+3]
+ mov [edi+3],al
+Trail0_copydown:
+ cld ; Set Direction Flag = Up
+
+ M_EXIT
+
+;
+; Destination start is not dword aligned.
+;
+; Leading bytes could be handled faster via split-out optimizations and
+; a jump table (as trailing bytes are), at the cost of size.
+;
+; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
+; bytes to copy.
+;
+ align @WordSize
+byterampup_copydown:
+ dec esi ; point to first leading src byte
+ dec edi ; point to first leading dest byte
+ cmp ecx,12 ; check for reasonable length
+ jbe short ShortMove_copydown ; do short move if appropriate
+ neg edx
+ and edx,11b
+ sub ecx,edx ; # of bytes after leading bytes
+ mov eax,ecx ; set aside # of bytes remaining
+ mov ecx,edx ; # of leading bytes
+ rep movsb ; copy leading odd bytes
+ mov ecx,eax ; # of remaining bytes
+ and eax,11b ; # of trailing bytes
+ sub esi,3 ; point to start of first whole src dword
+ sub edi,3 ; point to start of first whole dest dword
+ shr ecx,2 ; # of whole dwords
+ rep movsd ; copy whole dwords
+ jmp dword ptr TrailingVecs_copydown[eax*4]
+
+ align @WordSize
+ShortMove_copydown:
+ rep movsb
+ cld ; Set Direction Flag = Up
+
+ M_EXIT
+
+_MEM_ endp
+ end
+
+