#**************************************************************************** #* * #* Copyright (c) 1991 by * #* DIGITAL EQUIPMENT CORPORATION, Maynard, Massachusetts. * #* All rights reserved. * #* * #* This software is furnished under a license and may be used and copied * #* only in accordance with the terms of such license and with the * #* inclusion of the above copyright notice. This software or any other * #* copies thereof may not be provided or otherwise made available to any * #* other person. No title to and ownership of the software is hereby * #* transferred. * #* * #* The information in this software is subject to change without notice * #* and should not be construed as a commitment by Digital Equipment * #* Corporation. * #* * #* Digital assumes no responsibility for the use or reliability of its * #* software on equipment which is not supplied by Digital. * #* * #* * #**************************************************************************** # #++ # Facility: # DEC C Run Time Library on the Alpha/WNT Platform # # Abstract: # # Implements the C RTL function strcpy(). # # Author: # Bill Noyce 9-Aug-1991 # # Modified by: # # 001 Kevin Routley 10-Sep-1991 # Modified to C RTL Coding standards. # # 002 Chris Bord 30 September 1991 # Add decc$ prefixes. # # 003 Chris Bord 24 January 1992 # Add second parameter to .procedure_descriptor directive # # 004 John Parks 22 January 1993 # Ported to Alpha/NT. #-- .globl strcpy .ent strcpy # r16 = dst # r17 = src # returns r0 = src # destroys r16-r21, r27-r28 strcpy: .set noat .set noreorder ldq_u $27, ($17) # Get first src QW and $16, 7, $28 #/ Is dst aligned? lda $18, -1($31) # Get a mask of all 1's bne $28, dst_unaligned #/ Go handle unaligned dst and $17, 7, $19 # Is src aligned too? nop mov $16, $0 # Set up function result bne $19, src_unaligned #/ Go handle aligned dst, unaligned src a_loop: cmpbge $31, $27, $18 # Any nulls in src QW? bne $18, a_exit_1 # Finish up if so ldq $21, 8($17) # Load next QW if not match: # Enter if src matches unaligned dst addq $17, 16, $17 #/ Update src pointer for unrolled loop stq_u $27, ($16) # Store a whole QW addq $16, 16, $16 #/ Update dst pointer for unrolled loop cmpbge $31, $21, $18 # Any nulls in src QW? bne $18, a_exit_2 # Finish up if so ldq $27, ($17) # Load next QW if not stq_u $21, -8($16) # Store a whole QW br $31, a_loop # Repeat during load latency a_exit_1: ldq_u $21, ($16) # Get dst QW to update subq $18, 1, $17 #/ Use location of null byte... xor $18, $17, $18 # ... to compute mask of what to keep zapnot $27, $18, $27 # Keep src up to & including null zap $21, $18, $21 # Make room for new data nop or $21, $27, $21 # Combine src & dst... stq_u $21, ($16) #/ ... and store ret $31, ($26) nop a_exit_2: ldq_u $27, -8($16) # Get dst QW to update subq $18, 1, $17 #/ Use location of null byte... xor $18, $17, $18 # ... to compute mask of what to keep zapnot $21, $18, $21 # Keep src up to & including null zap $27, $18, $27 # Make room for new data nop or $27, $21, $27 # Combine src & dst... stq_u $27, -8($16) #/ ... and store ret $31, ($26) src_unaligned: # dst_unaligned code would work; is this faster? mskqh $18, $17, $18 # Zeros where src to be ignored ornot $27, $18, $19 # Make ignored bytes nonzero cmpbge $31, $19, $21 # Any null bytes in src data? extql $27, $17, $27 # Move src to position of dst bne $21, short_ld #/ Finish up if nulls seen ldq_u $19, 8($17) # Next src QW needed to fill dst br $31, u_entry_2 # Enter loop for mismatched alignment # Here's the hard part. Enter with # r16 = dst address # r17 = src address # r18 = -1 # r27 = first src QW # r28 = dst alignment (>0) # Check whether the first src QW has any nulls, and load the next one. # Combine these if needed to fill the first dst QW, and enter a loop # that fetches src QWs and checks them, while storing dst QWs. dst_unaligned: ldq_u $20, ($16) # Load dst to be updated mskqh $18, $17, $18 #/ Zeros where src to be ignored mov $16, $0 # Set up function result ornot $27, $18, $19 # Make ignored bytes of src nonzero cmpbge $31, $19, $21 # Any null bytes in src data? extql $27, $17, $27 # Get only interesting src data bne $21, short # Finish up if nulls seen mskql $20, $16, $20 #/ Make room in dst ldq_u $21, 8($17) # Load next src QW if no nulls mskql $18, $16, $18 #/ Need two src QWs for first dst QW? insql $27, $16, $27 # Move src data to position of dst subq $17, $28, $17 # Adjust src ptr for partial move and $17, 7, $28 # Is src now aligned? bne $18, u_loop #/ Enter loop if one src QW fills dst or $27, $20, $27 # Combine first src QW with dst extqh $21, $17, $20 # Position 2nd src QW in 1st dst QW cmpbge $31, $21, $18 # Any nulls in next src QW? beq $28, match #/ If src aligned, use quick loop mov $21, $19 # Put src QW where loop expects bne $18, short_a #/ Finish up if nulls seen # r16 = address of next dst to store # r17 = address-16 of next src to load # r18 # r19 = last loaded src QW # r20 = one piece of dst QW # r21 # r27 = other piece of dst QW # r28 u_loop: ldq_u $28, 16($17) # Load another src QW addq $17, 16, $17 #/ Update src pointer for unrolled loop or $27, $20, $27 # Combine pieces extql $19, $17, $20 # Get second part of prior src QW stq_u $27, ($16) # Store a dst QW cmpbge $31, $28, $19 #/ Any nulls in this src QW? extqh $28, $17, $27 # Get first part of this src QW bne $19, u_exit_2 #/ Finish up if nulls seen ldq_u $19, 8($17) # Load another src QW addq $16, 16, $16 #/ Update dst pointer for unrolled loop or $27, $20, $20 # Combine pieces extql $28, $17, $27 # Get second piece of prior src QW stq_u $20, -8($16) # Store a dst QW u_entry_2: cmpbge $31, $19, $28 #/ Any nulls in this src QW? extqh $19, $17, $20 # Get first part of this src QW beq $28, u_loop #/ Repeat if no nulls seen subq $16, 8, $16 # Undo part of pointer update mov $19, $28 # Move src QW to expected place u_exit_2: or $27, $20, $27 # Combine pieces ldq_u $18, 8($16) #/ Load dst to update cmpbge $31, $27, $21 # Is null in first dst QW? bne $21, u_exit_3 # Skip if so stq_u $27, 8($16) # Store a whole dst QW extql $28, $17, $27 #/ Get second part of src QW ldq_u $18, 16($16) # We'll update next dst QW cmpbge $31, $27, $21 # Find location of null there addq $16, 8, $16 # Update dst pointer u_exit_3: subq $21, 1, $28 # Using position of null byte... xor $21, $28, $21 # ... make mask for desired src data zapnot $27, $21, $27 # Trim src data after null zap $18, $21, $18 # Make room for it in dst nop or $27, $18, $27 # Combine pieces stq_u $27, 8($16) #/ Store dst QW ret $31, ($26) short_ld: ldq_u $20, ($16) # Load dst QW to update short: cmpbge $31, $27, $17 #/ Get mask showing location of null insql $27, $16, $18 # Move src data to position of dst mskql $20, $16, $19 # Get dst bytes preceding string sll $17, $28, $17 # Move mask in the same way or $18, $19, $18 # Combine src & dst and $17, 255, $28 # Null byte in first dst QW? subq $17, 1, $19 # Using position of null byte... xor $17, $19, $17 # ... make mask for desired src data bne $28, short_2 #/ Skip if null in first dst QW ldq_u $20, 8($16) # Load second dst QW srl $17, 8, $17 #/ Move mask down for use stq_u $18, ($16) # Store first dst QW insqh $27, $16, $18 #/ Move src data to position of dst addq $16, 8, $16 # Advance dst pointer short_2: zap $20, $17, $20 # Preserve dst data following null zapnot $18, $17, $18 # Trim src data after null nop or $18, $20, $18 # Combine pieces stq_u $18, ($16) #/ Store dst QW ret $31, ($26) # r16 = dst address # r17 = updated src address # r18 = null position # r19 = next src QW # r20 = first part of r19, positioned for dst # r21 # r27 = dst QW so far # r28 = low bits of updated src address short_a: sll $18, 8, $18 # Shift location of null byte... ldq_u $21, ($16) #/ Reload first dst QW or $27, $20, $27 # Combine pieces srl $18, $28, $18 # ... to position in dst QW's nop and $18, 255, $20 # Is null in first dst QW? subq $18, 1, $28 # Using position of null byte... xor $18, $28, $18 # ... make mask for desired src data bne $20, short_a1 #/ Skip if null in first QW stq_u $27, ($16) # Store a whole dst QW extql $19, $17, $27 #/ Prepare next piece of src ldq_u $21, 8($16) # Load second dst QW for update srl $18, 8, $18 #/ Look at next 8 bits of mask addq $16, 8, $16 # Update dst pointer short_a1: zapnot $27, $18, $27 # Keep src data zap $21, $18, $21 # Keep end of dst QW nop or $27, $21, $27 # Combine pieces stq_u $27, ($16) # Store last dst QW ret $31, ($26) .set at .set reorder .end strcpy