1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
|
page ,132
title memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
; Copyright (c) 1986-1991, Microsoft Corporation. All right reserved.
;
;Purpose:
; memcpy() copies a source memory buffer to a destination buffer.
; Overlapping buffers are not treated specially, so propogation may occur.
; memmove() copies a source memory buffer to a destination buffer.
; Overlapping buffers are treated specially, to avoid propogation.
;
;Revision History:
; 02-06-87 JCR Added memmove entry
; 04-08-87 JCR Conditionalized memmove/memcpy entries
; 06-30-87 SKS Rewritten for speed and size
; 08-21-87 SKS Fix return value for overlapping copies
; 05-17-88 SJM Add model-independent (large model) ifdef
; 08-04-88 SJM convert to cruntime/ add 32-bit support
; 08-19-88 JCR Minor 386 corrections/enhancements
; 10-25-88 JCR General cleanup for 386-only code
; 03-23-90 GJF Changed to _stdcall. Also, fixed the copyright.
; 05-10-91 GJF Back to _cdecl, sigh...
; 11-13-92 SRW Make it fast with unaligned arguments
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
M_EXIT macro
mov eax,[dst] ; return pointer to destination
ifdef _STDCALL_
ret 2*DPSIZE + ISIZE ; _stdcall return
else
ret ; _cdecl return
endif
endm ; M_EXIT
CODESEG
page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
; memcpy() copies a source memory buffer to a destination memory buffer.
; This routine does NOT recognize overlapping buffers, and thus can lead
; to propogation.
; For cases where propogation must be avoided, memmove() must be used.
;
; Algorithm:
;
; void * memcpy(void * dst, void * src, size_t count)
; {
; void * ret = dst;
;
; /*
; * copy from lower addresses to higher addresses
; */
; while (count--)
; *dst++ = *src++;
;
; return(ret);
; }
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
; memmove() copies a source memory buffer to a destination memory buffer.
; This routine recognize overlapping buffers to avoid propogation.
; For cases where propogation is not a problem, memcpy() can be used.
;
; Algorithm:
;
; void * memmove(void * dst, void * src, size_t count)
; {
; void * ret = dst;
;
; if (dst <= src || dst >= (src + count)) {
; /*
; * Non-Overlapping Buffers
; * copy from lower addresses to higher addresses
; */
; while (count--)
; *dst++ = *src++;
; }
; else {
; /*
; * Overlapping Buffers
; * copy from higher addresses to lower addresses
; */
; dst += count - 1;
; src += count - 1;
;
; while (count--)
; *dst-- = *src--;
; }
;
; return(ret);
; }
;
;
;Entry:
; void *dst = pointer to destination buffer
; const void *src = pointer to source buffer
; size_t count = number of bytes to copy
;
;Exit:
; Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
; CX, DX
;
;Exceptions:
;*******************************************************************************
ifdef MEM_MOVE
_MEM_ equ <memmove>
else
_MEM_ equ <memcpy>
endif
% public _MEM_
_MEM_ proc \
uses edi esi, \
dst:ptr byte, \
src:ptr byte, \
count:IWORD
; destination pointer
; source pointer
; number of bytes to copy
mov esi,[src] ; esi = source
mov edi,[dst] ; edi = dest
mov ecx,[count] ; ecx = number of bytes to move
;
; Check for overlapping buffers:
; If (dst <= src) Or (dst >= src + Count) Then
; Do normal (Upwards) Copy
; Else
; Do Downwards Copy to avoid propagation
;
cmp edi,esi ; dst <= src ?
jbe short CopyUp ; yes, copy toward higher addresses
mov eax,esi
add eax,ecx
cmp edi,eax ; dst >= (src + count) ?
jnae CopyDown ; no, copy toward lower addresses
;
; Copy toward higher addresses.
;
CopyUp:
;
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination. This
; occurs in 3 steps.
;
; - move x = ((4 - Dest & 3) & 3) bytes
; - move y = ((L-x) >> 2) dwords
; - move (L - x - y*4) bytes
;
test edi,11b ; destination dword aligned?
jnz short byterampup ; if we are not dword aligned already, align
mov edx,ecx ; byte count
and edx,11b ; trailing byte count
shr ecx,2 ; shift down to dword count
rep movsd ; move all of our dwords
jmp dword ptr TrailingVecs[edx*4]
align @WordSize
TrailingVecs dd Trail0, Trail1, Trail2, Trail3
align @WordSize
Trail3:
mov ax,[esi]
mov [edi],ax
mov al,[esi+2]
mov [edi+2],al
M_EXIT
align @WordSize
Trail2:
mov ax,[esi]
mov [edi],ax
M_EXIT
align @WordSize
Trail1:
mov al,[esi]
mov [edi],al
Trail0:
M_EXIT
;
; Code to do optimal memory copies for non-dword-aligned destinations.
;
align @WordSize
byterampup:
; The following length check is done for two reasons:
;
; 1. to ensure that the actual move length is greater than any possiale
; alignment move, and
;
; 2. to skip the multiple move logic for small moves where it would
; be faster to move the bytes with one instruction.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
cmp ecx,12 ; check for reasonable length
jbe short ShortMove ; do short move if appropriate
mov edx,edi
neg edx
and edx,11b ; # of leading bytes
sub ecx,edx ; subtract out leading bytes
mov eax,ecx ; # of bytes remaining after leading
mov ecx,edx ; # of leading bytes
rep movsb ; copy leading bytes
mov ecx,eax ; compute number of dwords to move
and eax,11b ; # of trailing bytes
shr ecx,2 ; # of whole dwords
rep movsd ; move whole dwords
jmp dword ptr TrailingVecs[eax*4] ; copy trailing bytes
;
; Simple copy, byte at a time. This could be faster with a jump table and
; split-out optimizations, copying as much as possible a dword/word at a
; time and using MOV with displacements, but such short cases are unlikely
; to be called often (it seems silly to call a function to copy less than
; three dwords).
;
align @WordSize
ShortMove:
rep movsb
M_EXIT
;
; Copy down to avoid propogation in overlapping buffers.
;
align @WordSize
CopyDown:
std ; Set Direction Flag = Down
add esi,ecx ; point to byte after end of source buffer
add edi,ecx ; point to byte after end of dest buffer
;
; See if the destination start is dword aligned
;
test edi,11b
jnz short byterampup_copydown ; not dword aligned
;
; Destination start is dword aligned
;
mov edx,ecx ; set aside count of bytes to copy
and edx,11b ; # of trailing bytes to copy
sub esi,4 ; point to start of first dword to copy
sub edi,4 ; point to start of first dword to copy to
shr ecx,2 ; dwords to copy
rep movsd ; copy as many dwords as possible
jmp dword ptr TrailingVecs_copydown[edx*4] ;do any trailing bytes
align @WordSize
TrailingVecs_copydown label dword
dd Trail0_copydown
dd Trail1_copydown
dd Trail2_copydown
dd Trail3_copydown
align @WordSize
Trail3_copydown:
mov ax,[esi+2]
mov [edi+2],ax
mov al,[esi+1]
mov [edi+1],al
cld ; Set Direction Flag = Up
M_EXIT
align @WordSize
Trail2_copydown:
mov ax,[esi+2]
mov [edi+2],ax
cld ; Set Direction Flag = Up
M_EXIT
align @WordSize
Trail1_copydown:
mov al,[esi+3]
mov [edi+3],al
Trail0_copydown:
cld ; Set Direction Flag = Up
M_EXIT
;
; Destination start is not dword aligned.
;
; Leading bytes could be handled faster via split-out optimizations and
; a jump table (as trailing bytes are), at the cost of size.
;
; At this point, ECX is the # of bytes to copy, and EDX is the # of leading
; bytes to copy.
;
align @WordSize
byterampup_copydown:
dec esi ; point to first leading src byte
dec edi ; point to first leading dest byte
cmp ecx,12 ; check for reasonable length
jbe short ShortMove_copydown ; do short move if appropriate
neg edx
and edx,11b
sub ecx,edx ; # of bytes after leading bytes
mov eax,ecx ; set aside # of bytes remaining
mov ecx,edx ; # of leading bytes
rep movsb ; copy leading odd bytes
mov ecx,eax ; # of remaining bytes
and eax,11b ; # of trailing bytes
sub esi,3 ; point to start of first whole src dword
sub edi,3 ; point to start of first whole dest dword
shr ecx,2 ; # of whole dwords
rep movsd ; copy whole dwords
jmp dword ptr TrailingVecs_copydown[eax*4]
align @WordSize
ShortMove_copydown:
rep movsb
cld ; Set Direction Flag = Up
M_EXIT
_MEM_ endp
end
|