diff options
Diffstat (limited to 'private/crt32/misc/alpha/sloc.s')
-rw-r--r-- | private/crt32/misc/alpha/sloc.s | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/private/crt32/misc/alpha/sloc.s b/private/crt32/misc/alpha/sloc.s new file mode 100644 index 000000000..30fda8c6e --- /dev/null +++ b/private/crt32/misc/alpha/sloc.s @@ -0,0 +1,702 @@ + #++ + # + # Copyright (c) 1993 by + # Digital Equipment Corporation, Maynard, MA + # + # This software is furnished under a license and may be used and copied + # only in accordance with the terms of such license and with the + # inclusion of the above copyright notice. This software or any other + # copies thereof may not be provided or otherwise made available to any + # other person. No title to and ownership of the software is hereby + # transferred. + # + # The information in this software is subject to change without notice + # and should not be construed as a commitment by Digital Equipment + # Corporation. + # + # Digital assumes no responsibility for the use or reliability of its + # software on equipment which is not supplied by Digital. + # + + # Facility: + # + # GEM/OTS - GEM compiler system support library + # + # Abstract: + # + # OTS character string support, Alpha version + # This module provides support for string index, search, and verify. + # + # Authors: + # + # Bill Noyce + # Kent Glossop + # + # long ots_index(const char *str, long strlen, const char *pat, long patlen); + # + # Searches a string for a substring + # returns r0=zero-based position if found, or -1 if not. + # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra) + # + # long ots_search(const char *str, long strlen, const char *cset, long csetlen); + # + # Searches a string for any character in a set of characters + # returns r0=zero-based position if found, or -1 if not. + # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra) + # + # long ots_search_char(const char *str, long strlen, char pat); + # (also known as ots_index_char) + # + # Searches a string for a signle pattern character + # returns r0=zero-based position if found, or -1 if not. + # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra) + # (Note: GEM presumes r19 is also killed) + # + # long ots_search_mask(const char *str, long strlen, const char maskvec[], int mask) + # + # Searches a string until a character matching at least one bit + # in a mask is found in a table (similar to a VAX SCANC instruction.) + # returns r0=zero-based position if found, or -1 if not. + # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra) + # + # long ots_verify(char *str, long strlen, char *cset, long csetlen); + # + # Verifies a string against a set of characters + # returns r0=zero-based position for mismatch, or -1 if all validate. + # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra) + # + # long ots_verify_char(char *str, long strlen, char pat); + # + # Verifies a string against a single character + # returns r0=zero-based position for mismatch, or -1 if not. + # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra) + # (Note: GEM presumes r19 is also killed) + # + # long ots_verify_mask(const char *str, long strlen, const char maskvec[], int mask) + # + # Verifies a string until a character not matching at least one bit + # in a mask is found in a table (similar to a VAX SPANC instruction.) + # returns r0=zero-based position if found, or -1 if not. + # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra) + # + # Special conventions for all: + # No stack space + # No linkage pointer required. + # (Warning: The auto-loader potentially takes some regs across + # the call if this is being used in a shared lib. environment.) + # + # Modification history: + # + # 006 28 May 1992 WBN Initial version, replacing BLISS -005 + # + # 007 22 Sep 1992 KDG Add case-sensitive names + # + # 008 14 Nov 1992 KDG - Merge modules together (allows index/search/verify + # to use the single-character versions w/o calls) + # - initial multi-character index/search/verify + # + # 009 4 Dec 1992 KDG Fix bgt that should have been bge (GEM_BUGS #2091) + # + # 010 26 Jan 1993 KDG Add underscore + # + # All of the routines other than the single character search/verify could + # be significantly improved at some point in the future + #-- + +#include "ots_defs.hs" + + # "Package" + # + .globl _OtsLocation + .ent _OtsLocation +_OtsLocation: + .set noat + .set noreorder + + # ots_index + # This is currently a primitive brute-force string index (only marginally + # better than the original compiled code. Should be tailored to compare + # up to 8 at a time, particularly for patterns <= 8 characters.) + + # register use + # r0 - remaining match positions counter (-1) + # r1 - loop counter [rlen] + # r16 - source pointer (incremented on each match) + # r17 - source length + # r18 - pattern pointer + # r19 - pattern length + # r20 - loop source pointer [rsp] + # r21 - loop source temp [rs] + # r22 - loop pattern pointer [rpp] + # r23 - loop pattern temp [rp] + # r27 - available + # r28 - available + + .globl _OtsStringIndex + .aent _OtsStringIndex +_OtsStringIndex: + .frame sp,0,r26 + + cmpeq r19, 1, r20 # check for single-character index + beq r19, i_ret0 # pattern length 0 always matches @0 + subq r17, r19, r0 # number of match positions - 1 + bne r20, search_single # single character index + blt r0, i_retm1 # return -1 if no match positions + + # outer loop +i_outlp: + lda r20, -1(r16) # initialize source pointer + lda r22, -1(r18) # initialize pattern pointer + mov r19, r1 # initialize length counter + + # core brute-force matching loop +i_matlp: + ldq_u r21, 1(r20) # load qw containing source byte + lda r20, 1(r20) # bump source pointer + ldq_u r23, 1(r22) # load qw containing pattern byte + lda r22, 1(r22) # bump pattern pointer + subq r1, 1, r1 # decrement length + extbl r21, r20, r21 # extract source byte + extbl r23, r22, r23 # extract pattern byte + xor r21, r23, r21 # match? + bne r21, i_mismat # if not, try pattern at next position + bgt r1, i_matlp # continue matching pattern at current position? + + # matched +i_ret: + subq r17, r19, r1 # number of match positions - 1 + subq r1, r0, r0 # actual position + ret r31, (r26) + + # mismatch at current position - advance to next if more positions +i_mismat: + subq r0, 1, r0 # decrement match positions + lda r16, 1(r16) # set r16 to next match position + bge r0, i_outlp # if remaining positions, attempt match + +i_retm1: + lda r0, -1(r31) # return -1 + ret r31, (r26) + +i_ret0: clr r0 + ret r31, (r26) + + # ots_search + # R16 -> string + # R17 = length + # R18 -> character set + # R19 = character set length + # result in R0: -1 if all matched, or position in range 0..length-1 + # destroys R0-R1, R16-R23, R27-R28 + # + # This routine could definitely be improved. (It should only + # be necessary to go to memory for every 8th character for both + # the string and the character set, and for character sets + # <= 8 characters, it should be possible to simply keep the + # set in a register while the string is being processed.) + # + .globl _OtsStringSearch + .aent _OtsStringSearch +_OtsStringSearch: + .frame sp,0,r26 + + cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise + ble r19, s_retm1 # return -1 if no characters in the match set + bne r0, search_single # single character search + nop + + # outer loop +s_outlp: + ldq_u r20, (r16) # load qw containing source byte + lda r22, -1(r18) # initialize character set pointer + mov r19, r1 # initialize character set length counter + extbl r20, r16, r20 # extract the source byte to match + + # core brute-force matching loop +s_matlp: + ldq_u r23, 1(r22) # load qw containing character set byte + lda r22, 1(r22) # bump character set pointer + subq r1, 1, r1 # decrement remaining cset length + extbl r23, r22, r23 # extract character set byte + xor r20, r23, r21 # match? + beq r21, s_match # if match, we're done + bgt r1, s_matlp # continue matching pattern at current position? + + # no current position - advance to next if more positions + lda r16, 1(r16) # bump source pointer + addq r0, 1, r0 # increment position + subq r17, 1, r17 # decrement match count + bgt r17, s_outlp # if remaining positions, attempt match +s_retm1:lda r0, -1(r31) # if not, return -1 +s_match:ret r31, (r26) + +search_single: + ldq_u r19, (r18) # load the quadword containing the byte + extbl r19, r18, r18 # extract the byte of interest + # and fall through to the character search rtn + + # ots_search_char (ots_index_char) + # r16 -> string + # r17 = length + # r18 = character to find + # result in r0: -1 if not found, or position in range 0..length-1 + # destroys r16-r18, r27-r28 + # + .globl _OtsStringSearchChar + .aent _OtsStringSearchChar +_OtsStringSearchChar: + .globl _OtsStringIndexChar + .aent _OtsStringIndexChar +_OtsStringIndexChar: + .frame sp,0,r26 +search_char: + sll r18, 8, r28 # Replicate char in the quadword... + beq r17, sc_fail # Quick exit if length=0 + + ldq_u r27, (r16) # First quadword of string + addq r16, r17, r0 # Point to end of string + + subq r17, 8, r17 # Length > 8? + or r18, r28, r18 # ... + + sll r18, 16, r28 # ... + bgt r17, sc_long # Skip if length > 8 + + ldq_u r16, -1(r0) # Last quadword of string + extql r27, r0, r27 # Position string at high end of QW + + or r18, r28, r18 # ... + sll r18, 32, r28 # ... + + extqh r16, r0, r16 # Position string at high end of QW + or r18, r28, r18 # Pattern fills a quadword + + or r27, r16, r27 # String fills a quadword + xor r27, r18, r27 # Diff betw. string and pattern + + cmpbge r31, r27, r27 # Set 1's where string=pattern + subq r31, r17, r17 # Compute 8 - length + + srl r27, r17, r27 # Shift off bits not part of string + clr r0 # Set return value + + and r27, 0xF, r28 # One of first 4 characters? + blbs r27, sc_done # Return 0 if first char matched + + subq r27, 1, r0 # Flip the first '1' bit + beq r28, sc_geq_4 # Skip if no match in first 4 + + andnot r27, r0, r0 # Make one-bit mask of first match + srl r0, 2, r0 # Map 2/4/8 -> 0/1/2 + + # stall + + addq r0, 1, r0 # Bump by 1 + ret r31, (r26) # return + +sc_geq_4: + andnot r27, r0, r28 # Make one-bit mask of first match + beq r27, sc_done # Return -1 if there were none + + srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4 + srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1 + + addq r27, 4, r0 # Bump by 4 + subq r0, r28, r0 # and correct + +sc_done:ret r31, (r26) + + # Enter here if string length > 8. + # R16 -> start of string + # R17 = length - 8 + # R18 = fill in bytes 0,1 + # R27 = 1st QW of string + # R28 = fill in bytes 2,3 + + #.odd +sc_long:or r18, r28, r18 # R18 has pattern in low 4 bytes + + sll r18, 32, r28 # ... + and r16, 7, r0 # Where in QW did we start? + + or r18, r28, r18 # Pattern fills a QW + ldq_u r28, 8(r16) # Get next QW (string B) + + xor r27, r18, r27 # Diff Betw. string and pattern + cmpbge r31, r27, r27 # Set 1's where string=pattern + + addq r17, r0, r17 # Remaining length after 1st QW + srl r27, r0, r27 # Discard bits preceding string + + subq r17, 16, r17 # More than two QW's to go? + sll r27, r0, r27 # Reposition like other bits + + subq r17, r0, r0 # Remember start point to compute len + ble r17, sc_bottom # Skip the loop if 2 QW's or less + +sc_loop:xor r28, r18, r28 # Diff betw string B and pattern + bne r27, sc_done_a # Exit if a match in string A + + cmpbge r31, r28, r28 # 1's where string B = pattern + ldq_u r27, 16(r16) # Load string A + + subq r17, 16, r17 # Decrement remaining length + bne r28, sc_done_b # Exit if a match in string B + + ldq_u r28, 24(r16) # Load string B + addq r16, 16, r16 # Increment pointer + + xor r27, r18, r27 # Diff betw string A and pattern + cmpbge r31, r27, r27 # 1's where string A = pattern + + bgt r17, sc_loop # Repeat if more than 2 QW's left + + nop #.align quad + +sc_bottom: + bne r27, sc_done_a # Exit if a match in string A + addq r17, 8, r27 # More than 1 QW left? + + xor r28, r18, r28 # Diff betw string B and pattern + ble r27, sc_last # Skip if this is last QW + + cmpbge r31, r28, r27 # 1's where string B = pattern + ldq_u r28, 16(r16) # Load string A + + subq r17, 8, r17 # Adjust len for final return + bne r27, sc_done_a # Exit if a match in string B + + addq r17, 8, r27 # Ensure -7 <= (r27=len-8) <= 0 + xor r28, r18, r28 # Diff betw string A and pattern + +sc_last:mskqh r27, r27, r27 # Nonzero in bytes beyond string + subq r17, 8, r17 # Adjust len for final return + + or r28, r27, r28 # Zeros only for matches within string + cmpbge r31, r28, r27 # Where are the matches? + + bne r27, sc_done_a # Compute index if a match found +sc_fail:lda r0, -1(r31) # Else return -1 + + ret r31, (r26) + + nop #.align 8 + +sc_done_b: + addq r17, 8, r17 # Adjust length + mov r28, r27 # Put mask where it's expected + +sc_done_a: + subq r0, r17, r0 # (start - remaining) = base index + blbs r27, sc_exit # Return R0 if first char matched + + and r27, 0xF, r16 # One of first 4 characters? + subq r27, 1, r28 # Flip the first '1' bit + + andnot r27, r28, r28 # Make one-bit mask of first match + beq r16, sc_geq_4x # Skip if no match in first 4 + + srl r28, 2, r28 # Map 2/4/8 -> 0/1/2 + addq r0, 1, r0 # Bump by 1 + + addq r0, r28, r0 # Add byte offset +sc_exit:ret r31, (r26) # return + +sc_geq_4x: + addq r0, 4, r0 # Bump by 4 + srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4 + + srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1 + addq r0, r27, r0 # Add 0/1/2/4 + + subq r0, r28, r0 # and correct + ret r31, (r26) + + # ots_search_mask + # This routine could be tailored by loading a longword or + # a quadword at a time and doing table lookups on the + # characters largely in parallel. + # + .globl _OtsStringSearchMask + .aent _OtsStringSearchMask +_OtsStringSearchMask: + .frame sp,0,r26 + + lda r16, -1(r16) # bias initial address for better loop code + nop # should be lnop (unop) or fnop to dual issue + lda r0, -1(r31) # initialize position to -1 + ble r17, sm_ret # return -1 if source len is zero + # slow way - ~14 cycles/byte +sm_loop: + ldq_u r21, 1(r16) # load qw containing the byte + lda r16, 1(r16) # bump pointer + addq r0, 1, r0 # bump position + subq r17, 1, r17 # decrement the length + extbl r21, r16, r21 # extract the byte + addq r21, r18, r21 # get the byte in the table + ldq_u r20, (r21) # load qw from table containing lookup + extbl r20, r21, r20 # extract table byte + and r20, r19, r20 # check if any bits in the mask match + beq r17, sm_end # if last character, handle specially + beq r20, sm_loop # if no match, go do the loop again +sm_ret: + ret r31, (r26) # if not a match, we're done +sm_end: lda r21, -1(r31) # get -1 + cmoveq r20, r21, r0 # -1 if last char didn't match + ret r31, (r26) + + # ots_verify + # R16 -> string + # R17 = length + # R18 -> character set + # R19 = character set length + # result in R0: -1 if all matched, or position in range 0..length-1 + # destroys R0-R1, R16-R23, R27-R28 + # + # This routine could definitely be improved. (It should only + # be necessary to go to memory for every 8th character for both + # the string and the character set, and for character sets + # <= 8 characters, it should be possible to simply keep the + # set in a register while the string is being processed.) + # + .globl _OtsStringVerify + .aent _OtsStringVerify +_OtsStringVerify: + .frame sp,0,r26 + + cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise + ble r19, v_ret0 # return 0 if no characters in the match set + bne r0, verify_single # single character verify + nop + # outer loop +v_outlp: + ldq_u r20, (r16) # load qw containing source byte + lda r22, -1(r18) # initialize character set pointer + mov r19, r1 # initialize character set length counter + extbl r20, r16, r20 # extract the source byte to match + + # core brute-force matching loop +v_matlp: + ldq_u r23, 1(r22) # load qw containing character set byte + lda r22, 1(r22) # bump character set pointer + subq r1, 1, r1 # decrement remaining cset length + extbl r23, r22, r23 # extract character set byte + xor r20, r23, r21 # match? + beq r21, v_match # if match, move to the next character + bgt r1, v_matlp # continue matching pattern at current position? + # if we made it through the whole character set, this is a mismatch +v_ret0: ret r31, (r26) +v_match: # match at current position - advance to next if more positions + lda r16, 1(r16) # bump source pointer + addq r0, 1, r0 # increment position + subq r17, 1, r17 # decrement match count + bgt r17, v_outlp # if remaining positions, attempt match + lda r0, -1(r31) # if everything verified, return -1 + ret r31, (r26) + +verify_single: + ldq_u r19, (r18) # load the quadword containing the byte + extbl r19, r18, r18 # extract the byte of interest + # and fall through to the character verify rtn + + # ots_verify_char + # R16 -> string + # R17 = length + # R18 = character to check + # result in R0: -1 if all matched, or position in range 0..length-1 + # destroys R16-R18, R27-R28 + # + .globl _OtsStringVerifyChar + .aent _OtsStringVerifyChar +_OtsStringVerifyChar: + .frame sp,0,r26 + + sll r18, 8, r28 # Replicate char in the quadword... + beq r17, vc_fail # Quick exit if length=0 + + ldq_u r27, (r16) # First quadword of string + addq r16, r17, r0 # Point to end of string + + subq r17, 8, r17 # Length > 8? + or r18, r28, r18 # ... + + sll r18, 16, r28 # ... + bgt r17, vc_long # Skip if length > 8 + + ldq_u r16, -1(r0) # Last quadword of string + extql r27, r0, r27 # Position string at high end of QW + + or r18, r28, r18 # ... + sll r18, 32, r28 # ... + + extqh r16, r0, r16 # Position string at high end of QW + or r18, r28, r18 # Pattern fills a quadword + + or r27, r16, r27 # String fills a quadword + xor r27, r18, r18 # Diff betw. string and pattern + + subq r31, r17, r17 # 8 - length + extql r18, r17, r28 # Shift off bytes preceding string + + lda r0, -1(r31) # Prepare to return -1 for all matched + cmpbge r31, r28, r27 # Set 1's where string=pattern + + addl r28, 0, r18 # Is first LW all zero? + beq r28, vc_done # Quick exit if all matched + + addq r27, 1, r28 # Flip the first '0' bit + beq r18, vc_geq_4 # No diffs in first longword + + andnot r28, r27, r28 # Make one-bit mask of first diff + srl r28, 2, r0 # Map 1/2/4/8 -> 0/0/1/2 + + and r27, 1, r27 # 1 if first character matched + addq r0, r27, r0 # Bump by 1 if so + + ret r31, (r26) # return + + nop #.align 8 + +vc_geq_4: + andnot r28, r27, r28 # Make one-bit mask of first diff + srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4 + + srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1 + addq r27, 4, r0 # Bump by 4 + + subq r0, r28, r0 # and correct 4/5/6/8 -> 4/5/6/7 +vc_done:ret r31, (r26) + + # Enter here if string length > 8. + # R16 -> start of string + # R17 = length - 8 + # R18 = fill in bytes 0,1 + # R27 = 1st QW of string + # R28 = fill in bytes 2,3 + + #.align 8 +vc_long:and r16, 7, r0 # Where in QW did we start? + or r18, r28, r18 # R18 has pattern in low 4 bytes + + sll r18, 32, r28 # ... + addq r17, r0, r17 # Remaining length after 1st QW + + or r18, r28, r18 # Pattern fills a QW + ldq_u r28, 8(r16) # Get next QW (string B) + + xor r27, r18, r27 # Diff Betw. string and pattern + mskqh r27, r0, r27 # Discard diffs before string + + subq r17, 16, r17 # More than two QW's to go? + subq r17, r0, r0 # Remember start point to compute len + + ble r17, vc_bottom # Skip the loop if 2 QW's or less +vc_loop:bne r27, vc_done_a + + ldq_u r27, 16(r16) # Load string A + xor r28, r18, r28 # Diff betw string B and pattern + + subq r17, 16, r17 # Decrement remaining length + bne r28, vc_done_b # Exit if a diff in string B + + ldq_u r28, 24(r16) # Load string B + addq r16, 16, r16 # Increment pointer + + xor r27, r18, r27 # Diff betw string A and pattern + bgt r17, vc_loop # Repeat if more than 2 QW's left + +vc_bottom: + bne r27, vc_done_a # Exit if a match in string A + addq r17, 8, r17 # More than 1 QW left? + + xor r28, r18, r27 # Diff betw string B and pattern + ble r17, vc_last # Skip if this is last QW + + subq r17, 16, r17 # Adjust len for final return + bne r27, vc_done_a # Exit if a match in string B + + ldq_u r28, 16(r16) # Load string A + addq r17, 8, r17 # Ensure -7 <- (r17=len-8) <= 0 + + nop + xor r28, r18, r27 # Diff betw string A and pattern + +vc_last:mskqh r17, r17, r28 # -1 in bytes beyond string + subq r17, 16, r17 # Adjust len for final return + + andnot r27, r28, r27 # Nonzeros only for diffs within string + bne r27, vc_done_a # Compute index if a diff found + +vc_fail:lda r0, -1(r31) # Else return -1 + ret r31, (r26) + +vc_done_b: + addq r17, 8, r17 # Adjust length + mov r28, r27 # Put difference where it's expected + +vc_done_a: + cmpbge r31, r27, r28 # 1's where they match + subq r0, r17, r0 # (start - remaining) = base index + + addl r27, 0, r16 # First longword all zero? + blbc r28, vc_exit # Return R0 if first char different + + addq r28, 1, r27 # Flip the first '0' bit + beq r16, vc_geq_4x # Skip if no match in first 4 + + andnot r27, r28, r28 # Make one-bit mask of first match + srl r28, 2, r28 # Map 2/4/8 -> 0/1/2 + + addq r0, 1, r0 # Bump by 1 + addq r0, r28, r0 # Add byte offset + +vc_exit:ret r31, (r26) # return + +vc_geq_4x: + andnot r27, r28, r28 # Make one-bit mask of first match + + srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4 + addq r0, 4, r0 # Bump by 4 + + srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1 + addq r0, r27, r0 # Add 0/1/2/4 + + subq r0, r28, r0 # and correct + ret r31, (r26) + + # ots_verify_mask + # This routine could be tailored by loading a longword or + # a quadword at a time and doing table lookups on the + # characters largely in parallel. + # + .globl _OtsStringVerifyMask + .aent _OtsStringVerifyMask +_OtsStringVerifyMask: + .frame sp,0,r26 + + lda r16, -1(r16) # bias initial address for better loop code + nop # should be lnop (unop) or fnop to dual issue + lda r0, -1(r31) # initialize position to -1 + ble r17, vm_ret # return -1 if source len is zero + # slow way - ~14 cycles/byte +vm_loop: + ldq_u r21, 1(r16) # load qw containing the byte + lda r16, 1(r16) # bump pointer + addq r0, 1, r0 # bump position + subq r17, 1, r17 # decrement the length + extbl r21, r16, r21 # extract the byte + addq r21, r18, r21 # get the byte in the table + ldq_u r20, (r21) # load qw from table containing lookup + extbl r20, r21, r20 # extract table byte + and r20, r19, r20 # check if any bits in the mask match + beq r17, vm_end # if last character, handle specially + bne r20, vm_loop # if match, go do the loop again +vm_ret: + ret r31, (r26) # if not a match, we're done +vm_end: lda r21, -1(r31) # get -1 + cmovne r20, r21, r0 # -1 if last char matched + ret r31, (r26) + + .set at + .set reorder + .end _OtsLocation |