diff options
author | Adam <you@example.com> | 2020-05-17 05:51:50 +0200 |
---|---|---|
committer | Adam <you@example.com> | 2020-05-17 05:51:50 +0200 |
commit | e611b132f9b8abe35b362e5870b74bce94a1e58e (patch) | |
tree | a5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/crt32/string/alpha/strcpys.s | |
download | NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2 NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip |
Diffstat (limited to 'private/crt32/string/alpha/strcpys.s')
-rw-r--r-- | private/crt32/string/alpha/strcpys.s | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/private/crt32/string/alpha/strcpys.s b/private/crt32/string/alpha/strcpys.s new file mode 100644 index 000000000..c326688eb --- /dev/null +++ b/private/crt32/string/alpha/strcpys.s @@ -0,0 +1,257 @@ + #**************************************************************************** + #* * + #* Copyright (c) 1991 by * + #* DIGITAL EQUIPMENT CORPORATION, Maynard, Massachusetts. * + #* All rights reserved. * + #* * + #* This software is furnished under a license and may be used and copied * + #* only in accordance with the terms of such license and with the * + #* inclusion of the above copyright notice. This software or any other * + #* copies thereof may not be provided or otherwise made available to any * + #* other person. No title to and ownership of the software is hereby * + #* transferred. * + #* * + #* The information in this software is subject to change without notice * + #* and should not be construed as a commitment by Digital Equipment * + #* Corporation. * + #* * + #* Digital assumes no responsibility for the use or reliability of its * + #* software on equipment which is not supplied by Digital. * + #* * + #* * + #**************************************************************************** + # + #++ + # Facility: + # DEC C Run Time Library on the Alpha/WNT Platform + # + # Abstract: + # + # Implements the C RTL function strcpy(). + # + # Author: + # Bill Noyce 9-Aug-1991 + # + # Modified by: + # + # 001 Kevin Routley 10-Sep-1991 + # Modified to C RTL Coding standards. + # + # 002 Chris Bord 30 September 1991 + # Add decc$ prefixes. + # + # 003 Chris Bord 24 January 1992 + # Add second parameter to .procedure_descriptor directive + # + # 004 John Parks 22 January 1993 + # Ported to Alpha/NT. + #-- + + .globl strcpy + .ent strcpy + + # r16 = dst + # r17 = src + # returns r0 = src + # destroys r16-r21, r27-r28 + +strcpy: + .set noat + .set noreorder + + ldq_u $27, ($17) # Get first src QW + and $16, 7, $28 #/ Is dst aligned? + lda $18, -1($31) # Get a mask of all 1's + bne $28, dst_unaligned #/ Go handle unaligned dst + and $17, 7, $19 # Is src aligned too? + nop + mov $16, $0 # Set up function result + bne $19, src_unaligned #/ Go handle aligned dst, unaligned src + +a_loop: + cmpbge $31, $27, $18 # Any nulls in src QW? + bne $18, a_exit_1 # Finish up if so + ldq $21, 8($17) # Load next QW if not +match: # Enter if src matches unaligned dst + addq $17, 16, $17 #/ Update src pointer for unrolled loop + stq_u $27, ($16) # Store a whole QW + addq $16, 16, $16 #/ Update dst pointer for unrolled loop + cmpbge $31, $21, $18 # Any nulls in src QW? + bne $18, a_exit_2 # Finish up if so + ldq $27, ($17) # Load next QW if not + stq_u $21, -8($16) # Store a whole QW + br $31, a_loop # Repeat during load latency + +a_exit_1: + ldq_u $21, ($16) # Get dst QW to update + subq $18, 1, $17 #/ Use location of null byte... + xor $18, $17, $18 # ... to compute mask of what to keep + zapnot $27, $18, $27 # Keep src up to & including null + zap $21, $18, $21 # Make room for new data + nop + or $21, $27, $21 # Combine src & dst... + stq_u $21, ($16) #/ ... and store + ret $31, ($26) + + nop +a_exit_2: + ldq_u $27, -8($16) # Get dst QW to update + subq $18, 1, $17 #/ Use location of null byte... + xor $18, $17, $18 # ... to compute mask of what to keep + zapnot $21, $18, $21 # Keep src up to & including null + zap $27, $18, $27 # Make room for new data + nop + or $27, $21, $27 # Combine src & dst... + stq_u $27, -8($16) #/ ... and store + ret $31, ($26) + +src_unaligned: # dst_unaligned code would work; is this faster? + mskqh $18, $17, $18 # Zeros where src to be ignored + ornot $27, $18, $19 # Make ignored bytes nonzero + cmpbge $31, $19, $21 # Any null bytes in src data? + extql $27, $17, $27 # Move src to position of dst + bne $21, short_ld #/ Finish up if nulls seen + ldq_u $19, 8($17) # Next src QW needed to fill dst + br $31, u_entry_2 # Enter loop for mismatched alignment + + # Here's the hard part. Enter with + # r16 = dst address + # r17 = src address + # r18 = -1 + # r27 = first src QW + # r28 = dst alignment (>0) + # Check whether the first src QW has any nulls, and load the next one. + # Combine these if needed to fill the first dst QW, and enter a loop + # that fetches src QWs and checks them, while storing dst QWs. + +dst_unaligned: + ldq_u $20, ($16) # Load dst to be updated + mskqh $18, $17, $18 #/ Zeros where src to be ignored + mov $16, $0 # Set up function result + ornot $27, $18, $19 # Make ignored bytes of src nonzero + cmpbge $31, $19, $21 # Any null bytes in src data? + extql $27, $17, $27 # Get only interesting src data + bne $21, short # Finish up if nulls seen + mskql $20, $16, $20 #/ Make room in dst + ldq_u $21, 8($17) # Load next src QW if no nulls + mskql $18, $16, $18 #/ Need two src QWs for first dst QW? + insql $27, $16, $27 # Move src data to position of dst + subq $17, $28, $17 # Adjust src ptr for partial move + and $17, 7, $28 # Is src now aligned? + bne $18, u_loop #/ Enter loop if one src QW fills dst + or $27, $20, $27 # Combine first src QW with dst + extqh $21, $17, $20 # Position 2nd src QW in 1st dst QW + cmpbge $31, $21, $18 # Any nulls in next src QW? + beq $28, match #/ If src aligned, use quick loop + mov $21, $19 # Put src QW where loop expects + bne $18, short_a #/ Finish up if nulls seen + + # r16 = address of next dst to store + # r17 = address-16 of next src to load + # r18 + # r19 = last loaded src QW + # r20 = one piece of dst QW + # r21 + # r27 = other piece of dst QW + # r28 + +u_loop: + ldq_u $28, 16($17) # Load another src QW + addq $17, 16, $17 #/ Update src pointer for unrolled loop + or $27, $20, $27 # Combine pieces + extql $19, $17, $20 # Get second part of prior src QW + stq_u $27, ($16) # Store a dst QW + cmpbge $31, $28, $19 #/ Any nulls in this src QW? + extqh $28, $17, $27 # Get first part of this src QW + bne $19, u_exit_2 #/ Finish up if nulls seen + ldq_u $19, 8($17) # Load another src QW + addq $16, 16, $16 #/ Update dst pointer for unrolled loop + or $27, $20, $20 # Combine pieces + extql $28, $17, $27 # Get second piece of prior src QW + stq_u $20, -8($16) # Store a dst QW +u_entry_2: + cmpbge $31, $19, $28 #/ Any nulls in this src QW? + extqh $19, $17, $20 # Get first part of this src QW + beq $28, u_loop #/ Repeat if no nulls seen + + subq $16, 8, $16 # Undo part of pointer update + mov $19, $28 # Move src QW to expected place +u_exit_2: + or $27, $20, $27 # Combine pieces + ldq_u $18, 8($16) #/ Load dst to update + cmpbge $31, $27, $21 # Is null in first dst QW? + bne $21, u_exit_3 # Skip if so + stq_u $27, 8($16) # Store a whole dst QW + extql $28, $17, $27 #/ Get second part of src QW + ldq_u $18, 16($16) # We'll update next dst QW + cmpbge $31, $27, $21 # Find location of null there + addq $16, 8, $16 # Update dst pointer +u_exit_3: + subq $21, 1, $28 # Using position of null byte... + xor $21, $28, $21 # ... make mask for desired src data + zapnot $27, $21, $27 # Trim src data after null + zap $18, $21, $18 # Make room for it in dst + nop + or $27, $18, $27 # Combine pieces + stq_u $27, 8($16) #/ Store dst QW + ret $31, ($26) +short_ld: + ldq_u $20, ($16) # Load dst QW to update +short: + cmpbge $31, $27, $17 #/ Get mask showing location of null + insql $27, $16, $18 # Move src data to position of dst + mskql $20, $16, $19 # Get dst bytes preceding string + sll $17, $28, $17 # Move mask in the same way + or $18, $19, $18 # Combine src & dst + and $17, 255, $28 # Null byte in first dst QW? + subq $17, 1, $19 # Using position of null byte... + xor $17, $19, $17 # ... make mask for desired src data + bne $28, short_2 #/ Skip if null in first dst QW + ldq_u $20, 8($16) # Load second dst QW + srl $17, 8, $17 #/ Move mask down for use + stq_u $18, ($16) # Store first dst QW + insqh $27, $16, $18 #/ Move src data to position of dst + addq $16, 8, $16 # Advance dst pointer +short_2: + zap $20, $17, $20 # Preserve dst data following null + zapnot $18, $17, $18 # Trim src data after null + nop + or $18, $20, $18 # Combine pieces + stq_u $18, ($16) #/ Store dst QW + ret $31, ($26) + + # r16 = dst address + # r17 = updated src address + # r18 = null position + # r19 = next src QW + # r20 = first part of r19, positioned for dst + # r21 + # r27 = dst QW so far + # r28 = low bits of updated src address + +short_a: + sll $18, 8, $18 # Shift location of null byte... + ldq_u $21, ($16) #/ Reload first dst QW + or $27, $20, $27 # Combine pieces + srl $18, $28, $18 # ... to position in dst QW's + nop + and $18, 255, $20 # Is null in first dst QW? + subq $18, 1, $28 # Using position of null byte... + xor $18, $28, $18 # ... make mask for desired src data + bne $20, short_a1 #/ Skip if null in first QW + stq_u $27, ($16) # Store a whole dst QW + extql $19, $17, $27 #/ Prepare next piece of src + ldq_u $21, 8($16) # Load second dst QW for update + srl $18, 8, $18 #/ Look at next 8 bits of mask + addq $16, 8, $16 # Update dst pointer +short_a1: + zapnot $27, $18, $27 # Keep src data + zap $21, $18, $21 # Keep end of dst QW + nop + or $27, $21, $27 # Combine pieces + stq_u $27, ($16) # Store last dst QW + ret $31, ($26) + + .set at + .set reorder + .end strcpy |