summaryrefslogtreecommitdiffstats
path: root/private/crt32/misc/alpha/strcpy_.s
blob: de20577157aa482894eb6206b592cc5187095d6a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
 #****************************************************************************
 #*									     *
 #*  Copyright (c) 1991 by						     *
 #*  DIGITAL EQUIPMENT CORPORATION, Maynard, Massachusetts.		     *
 #*  All rights reserved.						     *
 #* 									     *
 #*  This software is furnished under a license and may be used and copied   *
 #*  only in  accordance with  the  terms  of  such  license  and with the   *
 #*  inclusion of the above copyright notice. This software or  any  other   *
 #*  copies thereof may not be provided or otherwise made available to any   *
 #*  other person.  No title to and ownership of  the  software is  hereby   *
 #*  transferred.							     *
 #* 									     *
 #*  The information in this software is  subject to change without notice   *
 #*  and  should  not  be  construed as  a commitment by Digital Equipment   *
 #*  Corporation.							     *
 #* 									     *
 #*  Digital assumes no responsibility for the use  or  reliability of its   *
 #*  software on equipment which is not supplied by Digital.		     *
 #* 									     *
 #*									     *
 #****************************************************************************
 #
 #++
 # Facility: 
 #	DEC C Run Time Library on the Alpha/WNT Platform
 #
 # Abstract:
 #
 #   Implements the C RTL function strcpy() for the compiler intrinsic.
 #
 # Author: 
 #	Bill Noyce		9-Aug-1991
 #
 # Modified by:
 #
 #	001	Kevin Routley	10-Sep-1991
 #		Modified to C RTL Coding standards.
 #
 #	002	Chris Bord	30 September 1991
 #		Add decc$ prefixes.
 #
 #	003	Chris Bord	24 January 1992
 #		Add second parameter to .procedure_descriptor directive
 #
 #	004	John Parks	22 January 1993
 #		Ported to Alpha/NT.
 #--

	.globl 	_Otsstrcpy
	.ent	_Otsstrcpy

 # r16 = dst
 # r17 = src
 # returns r0 = src
 # destroys r16-r21, r27-r28

_Otsstrcpy:
	.set 	noat
	.set	noreorder

	ldq_u	$27, ($17)		#  Get first src QW
	and	$16, 7, $28		#/ Is dst aligned?
	lda	$18, -1($31)		#  Get a mask of all 1's
	bne	$28, dst_unaligned	#/ Go handle unaligned dst
	and	$17, 7, $19		#  Is src aligned too?
	nop
	mov	$16, $0			#  Set up function result
	bne	$19, src_unaligned	#/ Go handle aligned dst, unaligned src

a_loop:
	cmpbge	$31, $27, $18		#  Any nulls in src QW?
	bne	$18, a_exit_1		#  Finish up if so
	ldq	$21, 8($17)		#  Load next QW if not
match:					# Enter if src matches unaligned dst
	addq	$17, 16, $17		#/ Update src pointer for unrolled loop
	stq_u	$27, ($16)		#  Store a whole QW
	addq	$16, 16, $16		#/ Update dst pointer for unrolled loop
	cmpbge	$31, $21, $18		#  Any nulls in src QW?
	bne	$18, a_exit_2		#  Finish up if so
	ldq	$27, ($17)		#  Load next QW if not
	stq_u	$21, -8($16)		#  Store a whole QW
	br	$31, a_loop		#  Repeat during load latency

a_exit_1:
	ldq_u	$21, ($16)		#  Get dst QW to update
	subq	$18, 1, $17		#/ Use location of null byte...
	xor	$18, $17, $18		#  ... to compute mask of what to keep
	zapnot	$27, $18, $27		#  Keep src up to & including null
	zap	$21, $18, $21		#  Make room for new data
	nop
	or	$21, $27, $21		#  Combine src & dst...
	stq_u	$21, ($16)		#/ ... and store
	ret	$31, ($26)

	nop
a_exit_2:
	ldq_u	$27, -8($16)		#  Get dst QW to update
	subq	$18, 1, $17		#/ Use location of null byte...
	xor	$18, $17, $18		#  ... to compute mask of what to keep
	zapnot	$21, $18, $21		#  Keep src up to & including null
	zap	$27, $18, $27		#  Make room for new data
	nop
	or	$27, $21, $27		#  Combine src & dst...
	stq_u	$27, -8($16)		#/ ... and store
	ret	$31, ($26)

src_unaligned:			# dst_unaligned code would work; is this faster?
	mskqh	$18, $17, $18		#  Zeros where src to be ignored
	ornot	$27, $18, $19		#  Make ignored bytes nonzero
	cmpbge	$31, $19, $21		#  Any null bytes in src data?
	extql	$27, $17, $27		#  Move src to position of dst
	bne	$21, short_ld		#/ Finish up if nulls seen
	ldq_u	$19, 8($17)		#  Next src QW needed to fill dst
	br	$31, u_entry_2		#  Enter loop for mismatched alignment

 # Here's the hard part.  Enter with
 #	r16 = dst address
 #	r17 = src address
 #	r18 = -1
 #	r27 = first src QW
 #	r28 = dst alignment (>0)
 # Check whether the first src QW has any nulls, and load the next one.
 # Combine these if needed to fill the first dst QW, and enter a loop
 # that fetches src QWs and checks them, while storing dst QWs.

dst_unaligned:
	ldq_u	$20, ($16)		#  Load dst to be updated
	mskqh	$18, $17, $18		#/ Zeros where src to be ignored
	mov	$16, $0			#  Set up function result
	ornot	$27, $18, $19		#  Make ignored bytes of src nonzero
	cmpbge	$31, $19, $21		#  Any null bytes in src data?
	extql	$27, $17, $27		#  Get only interesting src data
	bne	$21, short		#  Finish up if nulls seen
	mskql	$20, $16, $20		#/ Make room in dst
	ldq_u	$21, 8($17)		#  Load next src QW if no nulls
	mskql	$18, $16, $18		#/ Need two src QWs for first dst QW?
	insql	$27, $16, $27		#  Move src data to position of dst
	subq	$17, $28, $17		#  Adjust src ptr for partial move
	and	$17, 7, $28		#  Is src now aligned?
	bne	$18, u_loop		#/ Enter loop if one src QW fills dst
	or	$27, $20, $27		#  Combine first src QW with dst
	extqh	$21, $17, $20		#  Position 2nd src QW in 1st dst QW
	cmpbge	$31, $21, $18		#  Any nulls in next src QW?
	beq	$28, match		#/ If src aligned, use quick loop
	mov	$21, $19		#  Put src QW where loop expects
	bne	$18, short_a		#/ Finish up if nulls seen

 # r16 = address of next dst to store
 # r17 = address-16 of next src to load
 # r18
 # r19 = last loaded src QW
 # r20 = one piece of dst QW
 # r21
 # r27 = other piece of dst QW
 # r28

u_loop:
	ldq_u	$28, 16($17)		#  Load another src QW
	addq	$17, 16, $17		#/ Update src pointer for unrolled loop
	or	$27, $20, $27		#  Combine pieces
	extql	$19, $17, $20		#  Get second part of prior src QW
	stq_u	$27, ($16)		#  Store a dst QW
	cmpbge	$31, $28, $19		#/ Any nulls in this src QW?
	extqh	$28, $17, $27		#  Get first part of this src QW
	bne	$19, u_exit_2		#/ Finish up if nulls seen
	ldq_u	$19, 8($17)		#  Load another src QW
	addq	$16, 16, $16		#/ Update dst pointer for unrolled loop
	or	$27, $20, $20		#  Combine pieces
	extql	$28, $17, $27		#  Get second piece of prior src QW
	stq_u	$20, -8($16)		#  Store a dst QW
u_entry_2:
	cmpbge	$31, $19, $28		#/ Any nulls in this src QW?
	extqh	$19, $17, $20		#  Get first part of this src QW
	beq	$28, u_loop		#/ Repeat if no nulls seen

	subq	$16, 8, $16		#  Undo part of pointer update
	mov	$19, $28		#  Move src QW to expected place
u_exit_2:
	or	$27, $20, $27		#  Combine pieces
	ldq_u	$18, 8($16)		#/ Load dst to update
	cmpbge	$31, $27, $21		#  Is null in first dst QW?
	bne	$21, u_exit_3		#  Skip if so
	stq_u	$27, 8($16)		#  Store a whole dst QW
	extql	$28, $17, $27		#/ Get second part of src QW
	ldq_u	$18, 16($16)		#  We'll update next dst QW
	cmpbge	$31, $27, $21		#  Find location of null there
	addq	$16, 8, $16		#  Update dst pointer
u_exit_3:
	subq	$21, 1, $28		#  Using position of null byte...
	xor	$21, $28, $21		#  ... make mask for desired src data
	zapnot	$27, $21, $27		#  Trim src data after null
	zap	$18, $21, $18		#  Make room for it in dst
	nop
	or	$27, $18, $27		#  Combine pieces
	stq_u	$27, 8($16)		#/ Store dst QW
	ret	$31, ($26)
short_ld:
	ldq_u	$20, ($16)		#  Load dst QW to update
short:
	cmpbge	$31, $27, $17		#/ Get mask showing location of null
	insql	$27, $16, $18		#  Move src data to position of dst
	mskql	$20, $16, $19		#  Get dst bytes preceding string
	sll	$17, $28, $17		#  Move mask in the same way
	or	$18, $19, $18		#  Combine src & dst
	and	$17, 255, $28		#  Null byte in first dst QW?
	subq	$17, 1, $19		#  Using position of null byte...
	xor	$17, $19, $17		#  ... make mask for desired src data
	bne	$28, short_2		#/ Skip if null in first dst QW
	ldq_u	$20, 8($16)		#  Load second dst QW
	srl	$17, 8, $17		#/ Move mask down for use
	stq_u	$18, ($16)		#  Store first dst QW
	insqh	$27, $16, $18		#/ Move src data to position of dst
	addq	$16, 8, $16		#  Advance dst pointer
short_2:
	zap	$20, $17, $20		#  Preserve dst data following null
	zapnot	$18, $17, $18		#  Trim src data after null
	nop
	or	$18, $20, $18		#  Combine pieces
	stq_u	$18, ($16)		#/ Store dst QW
	ret	$31, ($26)

 # r16 = dst address
 # r17 = updated src address
 # r18 = null position
 # r19 = next src QW
 # r20 = first part of r19, positioned for dst
 # r21
 # r27 = dst QW so far
 # r28 = low bits of updated src address

short_a:
	sll	$18, 8, $18		#  Shift location of null byte...
	ldq_u	$21, ($16)		#/ Reload first dst QW
	or	$27, $20, $27		#  Combine pieces
	srl	$18, $28, $18		#  ... to position in dst QW's
	nop
	and	$18, 255, $20		#  Is null in first dst QW?
	subq	$18, 1, $28		#  Using position of null byte...
	xor	$18, $28, $18		#  ... make mask for desired src data
	bne	$20, short_a1		#/ Skip if null in first QW
	stq_u	$27, ($16)		#  Store a whole dst QW
	extql	$19, $17, $27		#/ Prepare next piece of src
	ldq_u	$21, 8($16)		#  Load second dst QW for update
	srl	$18, 8, $18		#/ Look at next 8 bits of mask
	addq	$16, 8, $16		#  Update dst pointer
short_a1:
	zapnot	$27, $18, $27		#  Keep src data
	zap	$21, $18, $21		#  Keep end of dst QW
	nop
	or	$27, $21, $27		#  Combine pieces
	stq_u	$27, ($16)		#  Store last dst QW
	ret	$31, ($26)

	.set	at
	.set	reorder
	.end	_Otsstrcpy