summaryrefslogtreecommitdiffstats
path: root/private/crt32/misc/alpha/sloc.s
diff options
context:
space:
mode:
Diffstat (limited to 'private/crt32/misc/alpha/sloc.s')
-rw-r--r--private/crt32/misc/alpha/sloc.s702
1 files changed, 702 insertions, 0 deletions
diff --git a/private/crt32/misc/alpha/sloc.s b/private/crt32/misc/alpha/sloc.s
new file mode 100644
index 000000000..30fda8c6e
--- /dev/null
+++ b/private/crt32/misc/alpha/sloc.s
@@ -0,0 +1,702 @@
+ #++
+ #
+ # Copyright (c) 1993 by
+ # Digital Equipment Corporation, Maynard, MA
+ #
+ # This software is furnished under a license and may be used and copied
+ # only in accordance with the terms of such license and with the
+ # inclusion of the above copyright notice. This software or any other
+ # copies thereof may not be provided or otherwise made available to any
+ # other person. No title to and ownership of the software is hereby
+ # transferred.
+ #
+ # The information in this software is subject to change without notice
+ # and should not be construed as a commitment by Digital Equipment
+ # Corporation.
+ #
+ # Digital assumes no responsibility for the use or reliability of its
+ # software on equipment which is not supplied by Digital.
+ #
+
+ # Facility:
+ #
+ # GEM/OTS - GEM compiler system support library
+ #
+ # Abstract:
+ #
+ # OTS character string support, Alpha version
+ # This module provides support for string index, search, and verify.
+ #
+ # Authors:
+ #
+ # Bill Noyce
+ # Kent Glossop
+ #
+ # long ots_index(const char *str, long strlen, const char *pat, long patlen);
+ #
+ # Searches a string for a substring
+ # returns r0=zero-based position if found, or -1 if not.
+ # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
+ #
+ # long ots_search(const char *str, long strlen, const char *cset, long csetlen);
+ #
+ # Searches a string for any character in a set of characters
+ # returns r0=zero-based position if found, or -1 if not.
+ # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
+ #
+ # long ots_search_char(const char *str, long strlen, char pat);
+ # (also known as ots_index_char)
+ #
+ # Searches a string for a signle pattern character
+ # returns r0=zero-based position if found, or -1 if not.
+ # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
+ # (Note: GEM presumes r19 is also killed)
+ #
+ # long ots_search_mask(const char *str, long strlen, const char maskvec[], int mask)
+ #
+ # Searches a string until a character matching at least one bit
+ # in a mask is found in a table (similar to a VAX SCANC instruction.)
+ # returns r0=zero-based position if found, or -1 if not.
+ # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
+ #
+ # long ots_verify(char *str, long strlen, char *cset, long csetlen);
+ #
+ # Verifies a string against a set of characters
+ # returns r0=zero-based position for mismatch, or -1 if all validate.
+ # Register usage: r0-r1, r16-r23 and r27-r28 ONLY (r26 is ra)
+ #
+ # long ots_verify_char(char *str, long strlen, char pat);
+ #
+ # Verifies a string against a single character
+ # returns r0=zero-based position for mismatch, or -1 if not.
+ # Register usage: r0, r16-r18 and r27-r28 ONLY (r26 is ra)
+ # (Note: GEM presumes r19 is also killed)
+ #
+ # long ots_verify_mask(const char *str, long strlen, const char maskvec[], int mask)
+ #
+ # Verifies a string until a character not matching at least one bit
+ # in a mask is found in a table (similar to a VAX SPANC instruction.)
+ # returns r0=zero-based position if found, or -1 if not.
+ # Register usage: r0-1, r16-r21 and r27-r28 ONLY (r26 is ra)
+ #
+ # Special conventions for all:
+ # No stack space
+ # No linkage pointer required.
+ # (Warning: The auto-loader potentially takes some regs across
+ # the call if this is being used in a shared lib. environment.)
+ #
+ # Modification history:
+ #
+ # 006 28 May 1992 WBN Initial version, replacing BLISS -005
+ #
+ # 007 22 Sep 1992 KDG Add case-sensitive names
+ #
+ # 008 14 Nov 1992 KDG - Merge modules together (allows index/search/verify
+ # to use the single-character versions w/o calls)
+ # - initial multi-character index/search/verify
+ #
+ # 009 4 Dec 1992 KDG Fix bgt that should have been bge (GEM_BUGS #2091)
+ #
+ # 010 26 Jan 1993 KDG Add underscore
+ #
+ # All of the routines other than the single character search/verify could
+ # be significantly improved at some point in the future
+ #--
+
+#include "ots_defs.hs"
+
+ # "Package"
+ #
+ .globl _OtsLocation
+ .ent _OtsLocation
+_OtsLocation:
+ .set noat
+ .set noreorder
+
+ # ots_index
+ # This is currently a primitive brute-force string index (only marginally
+ # better than the original compiled code. Should be tailored to compare
+ # up to 8 at a time, particularly for patterns <= 8 characters.)
+
+ # register use
+ # r0 - remaining match positions counter (-1)
+ # r1 - loop counter [rlen]
+ # r16 - source pointer (incremented on each match)
+ # r17 - source length
+ # r18 - pattern pointer
+ # r19 - pattern length
+ # r20 - loop source pointer [rsp]
+ # r21 - loop source temp [rs]
+ # r22 - loop pattern pointer [rpp]
+ # r23 - loop pattern temp [rp]
+ # r27 - available
+ # r28 - available
+
+ .globl _OtsStringIndex
+ .aent _OtsStringIndex
+_OtsStringIndex:
+ .frame sp,0,r26
+
+ cmpeq r19, 1, r20 # check for single-character index
+ beq r19, i_ret0 # pattern length 0 always matches @0
+ subq r17, r19, r0 # number of match positions - 1
+ bne r20, search_single # single character index
+ blt r0, i_retm1 # return -1 if no match positions
+
+ # outer loop
+i_outlp:
+ lda r20, -1(r16) # initialize source pointer
+ lda r22, -1(r18) # initialize pattern pointer
+ mov r19, r1 # initialize length counter
+
+ # core brute-force matching loop
+i_matlp:
+ ldq_u r21, 1(r20) # load qw containing source byte
+ lda r20, 1(r20) # bump source pointer
+ ldq_u r23, 1(r22) # load qw containing pattern byte
+ lda r22, 1(r22) # bump pattern pointer
+ subq r1, 1, r1 # decrement length
+ extbl r21, r20, r21 # extract source byte
+ extbl r23, r22, r23 # extract pattern byte
+ xor r21, r23, r21 # match?
+ bne r21, i_mismat # if not, try pattern at next position
+ bgt r1, i_matlp # continue matching pattern at current position?
+
+ # matched
+i_ret:
+ subq r17, r19, r1 # number of match positions - 1
+ subq r1, r0, r0 # actual position
+ ret r31, (r26)
+
+ # mismatch at current position - advance to next if more positions
+i_mismat:
+ subq r0, 1, r0 # decrement match positions
+ lda r16, 1(r16) # set r16 to next match position
+ bge r0, i_outlp # if remaining positions, attempt match
+
+i_retm1:
+ lda r0, -1(r31) # return -1
+ ret r31, (r26)
+
+i_ret0: clr r0
+ ret r31, (r26)
+
+ # ots_search
+ # R16 -> string
+ # R17 = length
+ # R18 -> character set
+ # R19 = character set length
+ # result in R0: -1 if all matched, or position in range 0..length-1
+ # destroys R0-R1, R16-R23, R27-R28
+ #
+ # This routine could definitely be improved. (It should only
+ # be necessary to go to memory for every 8th character for both
+ # the string and the character set, and for character sets
+ # <= 8 characters, it should be possible to simply keep the
+ # set in a register while the string is being processed.)
+ #
+ .globl _OtsStringSearch
+ .aent _OtsStringSearch
+_OtsStringSearch:
+ .frame sp,0,r26
+
+ cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
+ ble r19, s_retm1 # return -1 if no characters in the match set
+ bne r0, search_single # single character search
+ nop
+
+ # outer loop
+s_outlp:
+ ldq_u r20, (r16) # load qw containing source byte
+ lda r22, -1(r18) # initialize character set pointer
+ mov r19, r1 # initialize character set length counter
+ extbl r20, r16, r20 # extract the source byte to match
+
+ # core brute-force matching loop
+s_matlp:
+ ldq_u r23, 1(r22) # load qw containing character set byte
+ lda r22, 1(r22) # bump character set pointer
+ subq r1, 1, r1 # decrement remaining cset length
+ extbl r23, r22, r23 # extract character set byte
+ xor r20, r23, r21 # match?
+ beq r21, s_match # if match, we're done
+ bgt r1, s_matlp # continue matching pattern at current position?
+
+ # no current position - advance to next if more positions
+ lda r16, 1(r16) # bump source pointer
+ addq r0, 1, r0 # increment position
+ subq r17, 1, r17 # decrement match count
+ bgt r17, s_outlp # if remaining positions, attempt match
+s_retm1:lda r0, -1(r31) # if not, return -1
+s_match:ret r31, (r26)
+
+search_single:
+ ldq_u r19, (r18) # load the quadword containing the byte
+ extbl r19, r18, r18 # extract the byte of interest
+ # and fall through to the character search rtn
+
+ # ots_search_char (ots_index_char)
+ # r16 -> string
+ # r17 = length
+ # r18 = character to find
+ # result in r0: -1 if not found, or position in range 0..length-1
+ # destroys r16-r18, r27-r28
+ #
+ .globl _OtsStringSearchChar
+ .aent _OtsStringSearchChar
+_OtsStringSearchChar:
+ .globl _OtsStringIndexChar
+ .aent _OtsStringIndexChar
+_OtsStringIndexChar:
+ .frame sp,0,r26
+search_char:
+ sll r18, 8, r28 # Replicate char in the quadword...
+ beq r17, sc_fail # Quick exit if length=0
+
+ ldq_u r27, (r16) # First quadword of string
+ addq r16, r17, r0 # Point to end of string
+
+ subq r17, 8, r17 # Length > 8?
+ or r18, r28, r18 # ...
+
+ sll r18, 16, r28 # ...
+ bgt r17, sc_long # Skip if length > 8
+
+ ldq_u r16, -1(r0) # Last quadword of string
+ extql r27, r0, r27 # Position string at high end of QW
+
+ or r18, r28, r18 # ...
+ sll r18, 32, r28 # ...
+
+ extqh r16, r0, r16 # Position string at high end of QW
+ or r18, r28, r18 # Pattern fills a quadword
+
+ or r27, r16, r27 # String fills a quadword
+ xor r27, r18, r27 # Diff betw. string and pattern
+
+ cmpbge r31, r27, r27 # Set 1's where string=pattern
+ subq r31, r17, r17 # Compute 8 - length
+
+ srl r27, r17, r27 # Shift off bits not part of string
+ clr r0 # Set return value
+
+ and r27, 0xF, r28 # One of first 4 characters?
+ blbs r27, sc_done # Return 0 if first char matched
+
+ subq r27, 1, r0 # Flip the first '1' bit
+ beq r28, sc_geq_4 # Skip if no match in first 4
+
+ andnot r27, r0, r0 # Make one-bit mask of first match
+ srl r0, 2, r0 # Map 2/4/8 -> 0/1/2
+
+ # stall
+
+ addq r0, 1, r0 # Bump by 1
+ ret r31, (r26) # return
+
+sc_geq_4:
+ andnot r27, r0, r28 # Make one-bit mask of first match
+ beq r27, sc_done # Return -1 if there were none
+
+ srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
+ srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
+
+ addq r27, 4, r0 # Bump by 4
+ subq r0, r28, r0 # and correct
+
+sc_done:ret r31, (r26)
+
+ # Enter here if string length > 8.
+ # R16 -> start of string
+ # R17 = length - 8
+ # R18 = fill in bytes 0,1
+ # R27 = 1st QW of string
+ # R28 = fill in bytes 2,3
+
+ #.odd
+sc_long:or r18, r28, r18 # R18 has pattern in low 4 bytes
+
+ sll r18, 32, r28 # ...
+ and r16, 7, r0 # Where in QW did we start?
+
+ or r18, r28, r18 # Pattern fills a QW
+ ldq_u r28, 8(r16) # Get next QW (string B)
+
+ xor r27, r18, r27 # Diff Betw. string and pattern
+ cmpbge r31, r27, r27 # Set 1's where string=pattern
+
+ addq r17, r0, r17 # Remaining length after 1st QW
+ srl r27, r0, r27 # Discard bits preceding string
+
+ subq r17, 16, r17 # More than two QW's to go?
+ sll r27, r0, r27 # Reposition like other bits
+
+ subq r17, r0, r0 # Remember start point to compute len
+ ble r17, sc_bottom # Skip the loop if 2 QW's or less
+
+sc_loop:xor r28, r18, r28 # Diff betw string B and pattern
+ bne r27, sc_done_a # Exit if a match in string A
+
+ cmpbge r31, r28, r28 # 1's where string B = pattern
+ ldq_u r27, 16(r16) # Load string A
+
+ subq r17, 16, r17 # Decrement remaining length
+ bne r28, sc_done_b # Exit if a match in string B
+
+ ldq_u r28, 24(r16) # Load string B
+ addq r16, 16, r16 # Increment pointer
+
+ xor r27, r18, r27 # Diff betw string A and pattern
+ cmpbge r31, r27, r27 # 1's where string A = pattern
+
+ bgt r17, sc_loop # Repeat if more than 2 QW's left
+
+ nop #.align quad
+
+sc_bottom:
+ bne r27, sc_done_a # Exit if a match in string A
+ addq r17, 8, r27 # More than 1 QW left?
+
+ xor r28, r18, r28 # Diff betw string B and pattern
+ ble r27, sc_last # Skip if this is last QW
+
+ cmpbge r31, r28, r27 # 1's where string B = pattern
+ ldq_u r28, 16(r16) # Load string A
+
+ subq r17, 8, r17 # Adjust len for final return
+ bne r27, sc_done_a # Exit if a match in string B
+
+ addq r17, 8, r27 # Ensure -7 <= (r27=len-8) <= 0
+ xor r28, r18, r28 # Diff betw string A and pattern
+
+sc_last:mskqh r27, r27, r27 # Nonzero in bytes beyond string
+ subq r17, 8, r17 # Adjust len for final return
+
+ or r28, r27, r28 # Zeros only for matches within string
+ cmpbge r31, r28, r27 # Where are the matches?
+
+ bne r27, sc_done_a # Compute index if a match found
+sc_fail:lda r0, -1(r31) # Else return -1
+
+ ret r31, (r26)
+
+ nop #.align 8
+
+sc_done_b:
+ addq r17, 8, r17 # Adjust length
+ mov r28, r27 # Put mask where it's expected
+
+sc_done_a:
+ subq r0, r17, r0 # (start - remaining) = base index
+ blbs r27, sc_exit # Return R0 if first char matched
+
+ and r27, 0xF, r16 # One of first 4 characters?
+ subq r27, 1, r28 # Flip the first '1' bit
+
+ andnot r27, r28, r28 # Make one-bit mask of first match
+ beq r16, sc_geq_4x # Skip if no match in first 4
+
+ srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
+ addq r0, 1, r0 # Bump by 1
+
+ addq r0, r28, r0 # Add byte offset
+sc_exit:ret r31, (r26) # return
+
+sc_geq_4x:
+ addq r0, 4, r0 # Bump by 4
+ srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
+
+ srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
+ addq r0, r27, r0 # Add 0/1/2/4
+
+ subq r0, r28, r0 # and correct
+ ret r31, (r26)
+
+ # ots_search_mask
+ # This routine could be tailored by loading a longword or
+ # a quadword at a time and doing table lookups on the
+ # characters largely in parallel.
+ #
+ .globl _OtsStringSearchMask
+ .aent _OtsStringSearchMask
+_OtsStringSearchMask:
+ .frame sp,0,r26
+
+ lda r16, -1(r16) # bias initial address for better loop code
+ nop # should be lnop (unop) or fnop to dual issue
+ lda r0, -1(r31) # initialize position to -1
+ ble r17, sm_ret # return -1 if source len is zero
+ # slow way - ~14 cycles/byte
+sm_loop:
+ ldq_u r21, 1(r16) # load qw containing the byte
+ lda r16, 1(r16) # bump pointer
+ addq r0, 1, r0 # bump position
+ subq r17, 1, r17 # decrement the length
+ extbl r21, r16, r21 # extract the byte
+ addq r21, r18, r21 # get the byte in the table
+ ldq_u r20, (r21) # load qw from table containing lookup
+ extbl r20, r21, r20 # extract table byte
+ and r20, r19, r20 # check if any bits in the mask match
+ beq r17, sm_end # if last character, handle specially
+ beq r20, sm_loop # if no match, go do the loop again
+sm_ret:
+ ret r31, (r26) # if not a match, we're done
+sm_end: lda r21, -1(r31) # get -1
+ cmoveq r20, r21, r0 # -1 if last char didn't match
+ ret r31, (r26)
+
+ # ots_verify
+ # R16 -> string
+ # R17 = length
+ # R18 -> character set
+ # R19 = character set length
+ # result in R0: -1 if all matched, or position in range 0..length-1
+ # destroys R0-R1, R16-R23, R27-R28
+ #
+ # This routine could definitely be improved. (It should only
+ # be necessary to go to memory for every 8th character for both
+ # the string and the character set, and for character sets
+ # <= 8 characters, it should be possible to simply keep the
+ # set in a register while the string is being processed.)
+ #
+ .globl _OtsStringVerify
+ .aent _OtsStringVerify
+_OtsStringVerify:
+ .frame sp,0,r26
+
+ cmpeq r19, 1, r0 # check for single-character search, clear r0 otherwise
+ ble r19, v_ret0 # return 0 if no characters in the match set
+ bne r0, verify_single # single character verify
+ nop
+ # outer loop
+v_outlp:
+ ldq_u r20, (r16) # load qw containing source byte
+ lda r22, -1(r18) # initialize character set pointer
+ mov r19, r1 # initialize character set length counter
+ extbl r20, r16, r20 # extract the source byte to match
+
+ # core brute-force matching loop
+v_matlp:
+ ldq_u r23, 1(r22) # load qw containing character set byte
+ lda r22, 1(r22) # bump character set pointer
+ subq r1, 1, r1 # decrement remaining cset length
+ extbl r23, r22, r23 # extract character set byte
+ xor r20, r23, r21 # match?
+ beq r21, v_match # if match, move to the next character
+ bgt r1, v_matlp # continue matching pattern at current position?
+ # if we made it through the whole character set, this is a mismatch
+v_ret0: ret r31, (r26)
+v_match: # match at current position - advance to next if more positions
+ lda r16, 1(r16) # bump source pointer
+ addq r0, 1, r0 # increment position
+ subq r17, 1, r17 # decrement match count
+ bgt r17, v_outlp # if remaining positions, attempt match
+ lda r0, -1(r31) # if everything verified, return -1
+ ret r31, (r26)
+
+verify_single:
+ ldq_u r19, (r18) # load the quadword containing the byte
+ extbl r19, r18, r18 # extract the byte of interest
+ # and fall through to the character verify rtn
+
+ # ots_verify_char
+ # R16 -> string
+ # R17 = length
+ # R18 = character to check
+ # result in R0: -1 if all matched, or position in range 0..length-1
+ # destroys R16-R18, R27-R28
+ #
+ .globl _OtsStringVerifyChar
+ .aent _OtsStringVerifyChar
+_OtsStringVerifyChar:
+ .frame sp,0,r26
+
+ sll r18, 8, r28 # Replicate char in the quadword...
+ beq r17, vc_fail # Quick exit if length=0
+
+ ldq_u r27, (r16) # First quadword of string
+ addq r16, r17, r0 # Point to end of string
+
+ subq r17, 8, r17 # Length > 8?
+ or r18, r28, r18 # ...
+
+ sll r18, 16, r28 # ...
+ bgt r17, vc_long # Skip if length > 8
+
+ ldq_u r16, -1(r0) # Last quadword of string
+ extql r27, r0, r27 # Position string at high end of QW
+
+ or r18, r28, r18 # ...
+ sll r18, 32, r28 # ...
+
+ extqh r16, r0, r16 # Position string at high end of QW
+ or r18, r28, r18 # Pattern fills a quadword
+
+ or r27, r16, r27 # String fills a quadword
+ xor r27, r18, r18 # Diff betw. string and pattern
+
+ subq r31, r17, r17 # 8 - length
+ extql r18, r17, r28 # Shift off bytes preceding string
+
+ lda r0, -1(r31) # Prepare to return -1 for all matched
+ cmpbge r31, r28, r27 # Set 1's where string=pattern
+
+ addl r28, 0, r18 # Is first LW all zero?
+ beq r28, vc_done # Quick exit if all matched
+
+ addq r27, 1, r28 # Flip the first '0' bit
+ beq r18, vc_geq_4 # No diffs in first longword
+
+ andnot r28, r27, r28 # Make one-bit mask of first diff
+ srl r28, 2, r0 # Map 1/2/4/8 -> 0/0/1/2
+
+ and r27, 1, r27 # 1 if first character matched
+ addq r0, r27, r0 # Bump by 1 if so
+
+ ret r31, (r26) # return
+
+ nop #.align 8
+
+vc_geq_4:
+ andnot r28, r27, r28 # Make one-bit mask of first diff
+ srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
+
+ srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
+ addq r27, 4, r0 # Bump by 4
+
+ subq r0, r28, r0 # and correct 4/5/6/8 -> 4/5/6/7
+vc_done:ret r31, (r26)
+
+ # Enter here if string length > 8.
+ # R16 -> start of string
+ # R17 = length - 8
+ # R18 = fill in bytes 0,1
+ # R27 = 1st QW of string
+ # R28 = fill in bytes 2,3
+
+ #.align 8
+vc_long:and r16, 7, r0 # Where in QW did we start?
+ or r18, r28, r18 # R18 has pattern in low 4 bytes
+
+ sll r18, 32, r28 # ...
+ addq r17, r0, r17 # Remaining length after 1st QW
+
+ or r18, r28, r18 # Pattern fills a QW
+ ldq_u r28, 8(r16) # Get next QW (string B)
+
+ xor r27, r18, r27 # Diff Betw. string and pattern
+ mskqh r27, r0, r27 # Discard diffs before string
+
+ subq r17, 16, r17 # More than two QW's to go?
+ subq r17, r0, r0 # Remember start point to compute len
+
+ ble r17, vc_bottom # Skip the loop if 2 QW's or less
+vc_loop:bne r27, vc_done_a
+
+ ldq_u r27, 16(r16) # Load string A
+ xor r28, r18, r28 # Diff betw string B and pattern
+
+ subq r17, 16, r17 # Decrement remaining length
+ bne r28, vc_done_b # Exit if a diff in string B
+
+ ldq_u r28, 24(r16) # Load string B
+ addq r16, 16, r16 # Increment pointer
+
+ xor r27, r18, r27 # Diff betw string A and pattern
+ bgt r17, vc_loop # Repeat if more than 2 QW's left
+
+vc_bottom:
+ bne r27, vc_done_a # Exit if a match in string A
+ addq r17, 8, r17 # More than 1 QW left?
+
+ xor r28, r18, r27 # Diff betw string B and pattern
+ ble r17, vc_last # Skip if this is last QW
+
+ subq r17, 16, r17 # Adjust len for final return
+ bne r27, vc_done_a # Exit if a match in string B
+
+ ldq_u r28, 16(r16) # Load string A
+ addq r17, 8, r17 # Ensure -7 <- (r17=len-8) <= 0
+
+ nop
+ xor r28, r18, r27 # Diff betw string A and pattern
+
+vc_last:mskqh r17, r17, r28 # -1 in bytes beyond string
+ subq r17, 16, r17 # Adjust len for final return
+
+ andnot r27, r28, r27 # Nonzeros only for diffs within string
+ bne r27, vc_done_a # Compute index if a diff found
+
+vc_fail:lda r0, -1(r31) # Else return -1
+ ret r31, (r26)
+
+vc_done_b:
+ addq r17, 8, r17 # Adjust length
+ mov r28, r27 # Put difference where it's expected
+
+vc_done_a:
+ cmpbge r31, r27, r28 # 1's where they match
+ subq r0, r17, r0 # (start - remaining) = base index
+
+ addl r27, 0, r16 # First longword all zero?
+ blbc r28, vc_exit # Return R0 if first char different
+
+ addq r28, 1, r27 # Flip the first '0' bit
+ beq r16, vc_geq_4x # Skip if no match in first 4
+
+ andnot r27, r28, r28 # Make one-bit mask of first match
+ srl r28, 2, r28 # Map 2/4/8 -> 0/1/2
+
+ addq r0, 1, r0 # Bump by 1
+ addq r0, r28, r0 # Add byte offset
+
+vc_exit:ret r31, (r26) # return
+
+vc_geq_4x:
+ andnot r27, r28, r28 # Make one-bit mask of first match
+
+ srl r28, 5, r27 # Map 10/20/48/80 -> 0/1/2/4
+ addq r0, 4, r0 # Bump by 4
+
+ srl r28, 7, r28 # Map 10/20/40/80 -> 0/0/0/1
+ addq r0, r27, r0 # Add 0/1/2/4
+
+ subq r0, r28, r0 # and correct
+ ret r31, (r26)
+
+ # ots_verify_mask
+ # This routine could be tailored by loading a longword or
+ # a quadword at a time and doing table lookups on the
+ # characters largely in parallel.
+ #
+ .globl _OtsStringVerifyMask
+ .aent _OtsStringVerifyMask
+_OtsStringVerifyMask:
+ .frame sp,0,r26
+
+ lda r16, -1(r16) # bias initial address for better loop code
+ nop # should be lnop (unop) or fnop to dual issue
+ lda r0, -1(r31) # initialize position to -1
+ ble r17, vm_ret # return -1 if source len is zero
+ # slow way - ~14 cycles/byte
+vm_loop:
+ ldq_u r21, 1(r16) # load qw containing the byte
+ lda r16, 1(r16) # bump pointer
+ addq r0, 1, r0 # bump position
+ subq r17, 1, r17 # decrement the length
+ extbl r21, r16, r21 # extract the byte
+ addq r21, r18, r21 # get the byte in the table
+ ldq_u r20, (r21) # load qw from table containing lookup
+ extbl r20, r21, r20 # extract table byte
+ and r20, r19, r20 # check if any bits in the mask match
+ beq r17, vm_end # if last character, handle specially
+ bne r20, vm_loop # if match, go do the loop again
+vm_ret:
+ ret r31, (r26) # if not a match, we're done
+vm_end: lda r21, -1(r31) # get -1
+ cmovne r20, r21, r0 # -1 if last char matched
+ ret r31, (r26)
+
+ .set at
+ .set reorder
+ .end _OtsLocation