summaryrefslogtreecommitdiffstats
path: root/private/crt32/string/i386/memcpy.asm
blob: 4cde3cf72f560f9cf78e336893518883108921f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
        page    ,132
        title   memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
;       Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are not treated specially, so propogation may occur.
;       memmove() copies a source memory buffer to a destination buffer.
;       Overlapping buffers are treated specially, to avoid propogation.
;
;Revision History:
;       02-06-87  JCR   Added memmove entry
;       04-08-87  JCR   Conditionalized memmove/memcpy entries
;       06-30-87  SKS   Rewritten for speed and size
;       08-21-87  SKS   Fix return value for overlapping copies
;       05-17-88  SJM   Add model-independent (large model) ifdef
;       08-04-88  SJM   convert to cruntime/ add 32-bit support
;       08-19-88  JCR   Minor 386 corrections/enhancements
;       10-25-88  JCR   General cleanup for 386-only code
;       03-23-90  GJF   Changed to _stdcall. Also, fixed the copyright.
;       05-10-91  GJF   Back to _cdecl, sigh...
;       11-13-92  SRW   Make it fast with unaligned arguments
;
;*******************************************************************************

        .xlist
        include cruntime.inc
        .list

M_EXIT  macro
        mov     eax,[dst]       ; return pointer to destination
ifdef   _STDCALL_
        ret     2*DPSIZE + ISIZE ; _stdcall return
else
        ret                     ; _cdecl return
endif
        endm    ; M_EXIT

        CODESEG

page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
;       memcpy() copies a source memory buffer to a destination memory buffer.
;       This routine does NOT recognize overlapping buffers, and thus can lead
;       to propogation.
;       For cases where propogation must be avoided, memmove() must be used.
;
;       Algorithm:
;
;       void * memcpy(void * dst, void * src, size_t count)
;       {
;               void * ret = dst;
;
;               /*
;                * copy from lower addresses to higher addresses
;                */
;               while (count--)
;                       *dst++ = *src++;
;
;               return(ret);
;       }
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
;       memmove() copies a source memory buffer to a destination memory buffer.
;       This routine recognize overlapping buffers to avoid propogation.
;       For cases where propogation is not a problem, memcpy() can be used.
;
;   Algorithm:
;
;       void * memmove(void * dst, void * src, size_t count)
;       {
;               void * ret = dst;
;
;               if (dst <= src || dst >= (src + count)) {
;                       /*
;                        * Non-Overlapping Buffers
;                        * copy from lower addresses to higher addresses
;                        */
;                       while (count--)
;                               *dst++ = *src++;
;                       }
;               else {
;                       /*
;                        * Overlapping Buffers
;                        * copy from higher addresses to lower addresses
;                        */
;                       dst += count - 1;
;                       src += count - 1;
;
;                       while (count--)
;                               *dst-- = *src--;
;                       }
;
;               return(ret);
;       }
;
;
;Entry:
;       void *dst = pointer to destination buffer
;       const void *src = pointer to source buffer
;       size_t count = number of bytes to copy
;
;Exit:
;       Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
;       CX, DX
;
;Exceptions:
;*******************************************************************************

ifdef	MEM_MOVE
        _MEM_     equ <memmove>
else
 	_MEM_	  equ <memcpy>
endif

%       public  _MEM_
_MEM_   proc \
        uses edi esi, \
        dst:ptr byte, \
        src:ptr byte, \
        count:IWORD

              ; destination pointer
              ; source pointer
              ; number of bytes to copy

        mov     esi,[src]       ; esi = source
        mov     edi,[dst]       ; edi = dest
        mov     ecx,[count]     ; ecx = number of bytes to move

;
; Check for overlapping buffers:
;       If (dst <= src) Or (dst >= src + Count) Then
;               Do normal (Upwards) Copy
;       Else
;               Do Downwards Copy to avoid propagation
;

        cmp     edi,esi         ; dst <= src ?
        jbe     short CopyUp    ; yes, copy toward higher addresses

        mov     eax,esi
        add     eax,ecx
        cmp     edi,eax         ; dst >= (src + count) ?
        jnae    CopyDown        ; no, copy toward lower addresses

;
; Copy toward higher addresses.
;
CopyUp:

;
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination.  This
; occurs in 3 steps.
;
;   - move x = ((4 - Dest & 3) & 3) bytes
;   - move y = ((L-x) >> 2) dwords
;   - move (L - x - y*4) bytes
;
        test    edi,11b          ; destination dword aligned?
        jnz     short byterampup ; if we are not dword aligned already, align

        mov     edx,ecx         ; byte count
        and     edx,11b         ; trailing byte count
        shr     ecx,2           ; shift down to dword count
        rep     movsd           ; move all of our dwords

        jmp     dword ptr TrailingVecs[edx*4]

        align   @WordSize
TrailingVecs    dd      Trail0, Trail1, Trail2, Trail3

        align   @WordSize
Trail3:
        mov     ax,[esi]
        mov     [edi],ax
        mov     al,[esi+2]
        mov     [edi+2],al

        M_EXIT

        align   @WordSize
Trail2:
        mov     ax,[esi]
        mov     [edi],ax

        M_EXIT

        align   @WordSize
Trail1:
        mov     al,[esi]
        mov     [edi],al

Trail0:
        M_EXIT

;
; Code to do optimal memory copies for non-dword-aligned destinations.
;
        align   @WordSize
byterampup:

; The following length check is done for two reasons:
;
;    1. to ensure that the actual move length is greater than any possiale
;       alignment move, and
;
;    2. to skip the multiple move logic for small moves where it would
;       be faster to move the bytes with one instruction.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
        cmp     ecx,12                  ; check for reasonable length
        jbe     short ShortMove         ; do short move if appropriate
        mov     edx,edi
        neg     edx
        and     edx,11b                 ; # of leading bytes
        sub     ecx,edx                 ; subtract out leading bytes
        mov     eax,ecx                 ; # of bytes remaining after leading
        mov     ecx,edx                 ; # of leading bytes
        rep     movsb                   ; copy leading bytes
        mov     ecx,eax                 ; compute number of dwords to move
        and     eax,11b                 ; # of trailing bytes
        shr     ecx,2                   ; # of whole dwords
        rep     movsd                   ; move whole dwords
        jmp     dword ptr TrailingVecs[eax*4] ; copy trailing bytes

;
; Simple copy, byte at a time. This could be faster with a jump table and
; split-out optimizations, copying as much as possible a dword/word at a
; time and using MOV with displacements, but such short cases are unlikely
; to be called often (it seems silly to call a function to copy less than
; three dwords).
;
        align   @WordSize
ShortMove:
        rep movsb

        M_EXIT

;
; Copy down to avoid propogation in overlapping buffers.
;
        align   @WordSize
CopyDown:
        std                     ; Set Direction Flag = Down
        add     esi,ecx         ; point to byte after end of source buffer
        add     edi,ecx         ; point to byte after end of dest buffer
;
; See if the destination start is dword aligned
;

        test    edi,11b
        jnz     short byterampup_copydown       ; not dword aligned
;
; Destination start is dword aligned
;
        mov     edx,ecx         ; set aside count of bytes to copy
        and     edx,11b         ; # of trailing bytes to copy
        sub     esi,4           ; point to start of first dword to copy
        sub     edi,4           ; point to start of first dword to copy to
        shr     ecx,2           ; dwords to copy
        rep     movsd           ; copy as many dwords as possible
        jmp     dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes

        align   @WordSize
TrailingVecs_copydown   label   dword
        dd      Trail0_copydown
        dd      Trail1_copydown
        dd      Trail2_copydown
        dd      Trail3_copydown

        align   @WordSize
Trail3_copydown:
        mov     ax,[esi+2]
        mov     [edi+2],ax
        mov     al,[esi+1]
        mov     [edi+1],al
        cld                     ; Set Direction Flag = Up

        M_EXIT

        align   @WordSize
Trail2_copydown:
        mov     ax,[esi+2]
        mov     [edi+2],ax
        cld                     ; Set Direction Flag = Up

        M_EXIT

        align   @WordSize
Trail1_copydown:
        mov     al,[esi+3]
        mov     [edi+3],al
Trail0_copydown:
        cld                     ; Set Direction Flag = Up

        M_EXIT

;
; Destination start is not dword aligned.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
        align   @WordSize
byterampup_copydown:
        dec     esi             ; point to first leading src byte
        dec     edi             ; point to first leading dest byte
        cmp     ecx,12          ; check for reasonable length
        jbe     short ShortMove_copydown ; do short move if appropriate
        neg     edx
        and     edx,11b
        sub     ecx,edx         ; # of bytes after leading bytes
        mov     eax,ecx         ; set aside # of bytes remaining
        mov     ecx,edx         ; # of leading bytes
        rep     movsb           ; copy leading odd bytes
        mov     ecx,eax         ; # of remaining bytes
        and     eax,11b         ; # of trailing bytes
        sub     esi,3           ; point to start of first whole src dword
        sub     edi,3           ; point to start of first whole dest dword
        shr     ecx,2           ; # of whole dwords
        rep     movsd           ; copy whole dwords
        jmp     dword ptr TrailingVecs_copydown[eax*4]

        align   @WordSize
ShortMove_copydown:
        rep     movsb
        cld                     ; Set Direction Flag = Up

        M_EXIT

_MEM_   endp
        end