diff options
Diffstat (limited to 'private/crt32/string/mips')
-rw-r--r-- | private/crt32/string/mips/memcmpm.s | 125 | ||||
-rw-r--r-- | private/crt32/string/mips/memcmpt.c | 334 | ||||
-rw-r--r-- | private/crt32/string/mips/memcpym.s | 298 | ||||
-rw-r--r-- | private/crt32/string/mips/memorym.s | 1218 | ||||
-rw-r--r-- | private/crt32/string/mips/memsetm.s | 105 | ||||
-rw-r--r-- | private/crt32/string/mips/memsett.c | 20 | ||||
-rw-r--r-- | private/crt32/string/mips/strcatm.s | 98 | ||||
-rw-r--r-- | private/crt32/string/mips/strchrm.s | 26 | ||||
-rw-r--r-- | private/crt32/string/mips/strchrt.c | 20 | ||||
-rw-r--r-- | private/crt32/string/mips/strcmpm.s | 50 | ||||
-rw-r--r-- | private/crt32/string/mips/strcpym.s | 148 | ||||
-rw-r--r-- | private/crt32/string/mips/strcpyt.c | 23 | ||||
-rw-r--r-- | private/crt32/string/mips/strlenm.s | 19 | ||||
-rw-r--r-- | private/crt32/string/mips/strrchrm.s | 24 | ||||
-rw-r--r-- | private/crt32/string/mips/strrchrt.c | 20 | ||||
-rw-r--r-- | private/crt32/string/mips/wcscmpm.s | 67 | ||||
-rw-r--r-- | private/crt32/string/mips/wcscmpt.c | 62 | ||||
-rw-r--r-- | private/crt32/string/mips/wcscpym.s | 139 | ||||
-rw-r--r-- | private/crt32/string/mips/wcslenm.s | 45 |
19 files changed, 2841 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memcmpm.s b/private/crt32/string/mips/memcmpm.s new file mode 100644 index 000000000..961939432 --- /dev/null +++ b/private/crt32/string/mips/memcmpm.s @@ -0,0 +1,125 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +#ident "$Header" + +/* + * Copyright 1985 by MIPS Computer Systems, Inc. + */ + +/* bcmp(s1, s2, n) */ + +#include "kxmips.h" + +/* + * bcmp(src, dst, bcount) + * + * MINCMP is minimum number of byte that its worthwhile to try and + * align cmp into word transactions + * + * Calculating MINCMP + * Overhead =~ 15 instructions => 90 cycles + * Byte cmp =~ 38 cycles/word + * Word cmp =~ 17 cycles/word + * Breakeven =~ 16 bytes + */ +#define MINCMP 16 +#define NBPW 4 + +LEAF_ENTRY(memcmp) + xor v0,a0,a1 + blt a2,MINCMP,bytecmp # too short, just byte cmp + and v0,NBPW-1 + subu t8,zero,a0 # number of bytes til aligned + bne v0,zero,unalgncmp # src and dst not alignable +/* + * src and dst can be simultaneously word aligned + */ + and t8,NBPW-1 + subu a2,t8 + beq t8,zero,wordcmp # already aligned + move v0,v1 # lw[lr] don't clear target reg + lwr v0,0(a0) + lwr v1,0(a1) + addu a0,t8 + addu a1,t8 + bne v0,v1,cmpne + +/* + * word cmp loop + */ +wordcmp: + and a3,a2,~(NBPW-1) + subu a2,a3 + beq a3,zero,bytecmp + addu a3,a0 # src1 endpoint +1: lw v0,0(a0) + lw v1,0(a1) + addu a0,NBPW # 1st BDSLOT + addu a1,NBPW # 2nd BDSLOT (asm doesn't move) + bne v0,v1,cmpne + bne a0,a3,1b # at least one more word + b bytecmp + +/* + * deal with simultaneously unalignable cmp by aligning one src + */ +unalgncmp: + subu a3,zero,a1 # calc byte cnt to get src2 aligned + and a3,NBPW-1 + subu a2,a3 + beq a3,zero,partaligncmp # already aligned + addu a3,a0 # src1 endpoint +1: lbu v0,0(a0) + lbu v1,0(a1) + addu a0,1 + addu a1,1 + bne v0,v1,cmpne + bne a0,a3,1b + +/* + * src unaligned, dst aligned loop + */ +partaligncmp: + and a3,a2,~(NBPW-1) + subu a2,a3 + beq a3,zero,bytecmp + addu a3,a0 +1: + lwr v0,0(a0) + lwl v0,3(a0) + lw v1,0(a1) + addu a0,NBPW + addu a1,NBPW + bne v0,v1,cmpne + bne a0,a3,1b + +/* + * brute force byte cmp loop + */ +bytecmp: + addu a3,a2,a0 # src1 endpoint; BDSLOT + ble a2,zero,cmpdone +1: lbu v0,0(a0) + lbu v1,0(a1) + addu a0,1 + addu a1,1 + bne v0,v1,cmpne + bne a0,a3,1b +cmpdone: + move v0,zero + j ra + +cmpne: + sltu a2,v1,v0 + bne a2,zero,9f + li v0,-1 + j ra +9: + li v0,1 + j ra +.end bcmp diff --git a/private/crt32/string/mips/memcmpt.c b/private/crt32/string/mips/memcmpt.c new file mode 100644 index 000000000..3adb427b9 --- /dev/null +++ b/private/crt32/string/mips/memcmpt.c @@ -0,0 +1,334 @@ +/* + * Test memcpy() function. + */ + +char buffer[100]; +#include <stdio.h> +#include <memory.h> + +#define FALSE 0 +#define TRUE 1 + +#define NTUL 7 +#define TEST16 4 +#define TEST32 8 + +#define BUFSIZE 256 + +void printbuf(char *identifier, char *buf, int length) +{ + int i; + printf("%s = '", identifier); + for (i = 0; i < length; i++) + printf("%c", buf[i]); + printf("'\n"); +} + +void main() +{ + int i, j, n, k, l; + int rc; + char *s1, *s2; + + char TavEqFailed = FALSE; + char TvaEqFailed = FALSE; + char TavltFailed = FALSE; + char TvaltFailed = FALSE; + char TavgtFailed = FALSE; + char TvagtFailed = FALSE; + + char TvveqFailed = FALSE; + char TvvltFailed = FALSE; + char TvvgtFailed = FALSE; + + int Tmisc = 0; + + unsigned long source1_16[TEST16] = { + 0x00003000, + 0x30003000, + 0x30003000, + 0x36003000 + }; + + unsigned long source2_16[TEST16] = { + 0x00003000, + 0x30003000, + 0x30003000, + 0x00000000 + }; + + unsigned long tul[NTUL] = { + 0x35004600, + 0x37004600, + 0x36002f00, + 0x37002f00, + 0x30004600, + 0x30003000, + 0x36003000 + }; + int tul_test[NTUL] = { + -1, + -1, + +1, + +1, + -1, + +1, + 0 + }; + + struct { + double dummy; + char source1[BUFSIZE]; + char source2[BUFSIZE]; + } buffer; + + char source32[32] = "0X0042036C 002477CD BREAK 0x91DF"; + char source[BUFSIZE]; + + for (j = 0; j < BUFSIZE; ) { + for (i = 0; i <= j % 32; i++, j++) { + buffer.source1[j] = source32[i]; + buffer.source2[j] = source32[i]; + } + } + + j = BUFSIZE; + s1 = buffer.source1; + s2 = buffer.source2; + while (j--) { + if (*s1++ != *s2++) { + printf("\n\nbuffer.source1 != buffer.source2, exiting test!!!\n"); + exit(-1); + } + } + + if (memcmp(buffer.source1, buffer.source2, BUFSIZE) != 0) { + printf("\n\tbuffer.source1 != buffer.source2, exiting test!!!\n"); + exit(-1); + } + + /* Test for zero length */ + for (i = 0; i < BUFSIZE; i++ ) { + int l; + + s1 = &(buffer.source1[i]); + s2 = &(buffer.source2[i]); + l = 0; + rc = memcmp(s1, s2, l); + if (rc) { + printf("%s, line #%d: Zero length test failed!!!\n", __FILE__, __LINE__); + break; + } + } + + + for (k = BUFSIZE; k > 0; k-- ) { + for (n = 0; n < k; n++) { + char c; + int l; + int m; + + /* Test with aligned start and variable end */ + if (!TavEqFailed) { + s1 = buffer.source1; + s2 = buffer.source2; + l = k; + rc = memcmp(s1, s2, l); + if (rc != 0) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte aligned block equal test failed!!!\n", __FILE__, __LINE__, k); + TavEqFailed = TRUE; + } + } + + /* Test with variable start and aligned end */ + if (!TvaEqFailed) { + s1 = &(buffer.source1[n]); + s2 = &(buffer.source2[n]); + l = k - n; + rc = memcmp(s1, s2, l); + if (rc != 0) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte unaligned block equal test failed!!!\n", __FILE__, __LINE__, k); + TvaEqFailed = TRUE; + } + } + + /* Test with aligned start and variable end */ + s1 = buffer.source1; + s2 = buffer.source2; + l = k - n; + for (m = 0; m < l && !TavltFailed; m++) { + c = s1[m]; + s1[m] -= 1; + rc = memcmp(s1, s2, l); + if (rc != -1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte aligned block less than test failed!!!\n", __FILE__, __LINE__, k); + TavltFailed = TRUE; + } + s1[m] = c; + } + + /* Test with variable start and aligned end */ + s1 = &(buffer.source1[n]); + s2 = &(buffer.source2[n]); + l = k - n; + for (m = 0; m < l && !TvaltFailed; m++) { + c = s1[m]; + s1[m] -= 1; + rc = memcmp(s1, s2, l); + if (rc != -1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte unaligned block less than test failed!!!\n", __FILE__, __LINE__, k); + TvaltFailed = TRUE; + } + s1[m] = c; + } + + /* Test with aligned start and variable end */ + s1 = buffer.source1; + s2 = buffer.source2; + l = k - n; + for (m = 0; m < l && !TavgtFailed; m++) { + c = s1[m]; + s1[m] += 1; + rc = memcmp(s1, s2, l); + if (rc != 1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte aligned block greater than test failed!!!\n", __FILE__, __LINE__, k); + TavgtFailed = TRUE; + } + s1[m] = c; + } + + /* Test with variable start and aligned end */ + s1 = &(buffer.source1[n]); + s2 = &(buffer.source2[n]); + l = k - n; + for (m = 0; m < l && !TvagtFailed; m++) { + c = s1[m]; + s1[m] += 1; + rc = memcmp(s1, s2, l); + if (rc != 1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte unaligned block greater than test failed!!!\n", __FILE__, __LINE__, k); + TvagtFailed = TRUE; + } + s1[m] = c; + } + } + } + + for (k = BUFSIZE; k > 0; k-- ) { + for (n = 0; n < k/2; n++) { + char c; + int m; + + /* Test equal with variable start and end */ + if (!TvveqFailed) { + l = k - 2*n; + s1 = &(buffer.source1[n]); + s2 = &(buffer.source2[n]); + rc = memcmp(s1, s2, l); + if (rc != 0) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte variable block equal test failed!!!\n", __FILE__, __LINE__, l); + TvveqFailed = TRUE; + } + } + + /* Test less than with variable start and end */ + l = k - 2*n; + s1 = buffer.source1; + s2 = buffer.source2; + for (m = 0; m < l && !TvvltFailed; m++) { + c = s1[m]; + s1[m] -= 1; + rc = memcmp(s1, s2, l); + if (rc != -1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte variable block less than test failed!!!\n", __FILE__, __LINE__, l); + TvvltFailed = TRUE; + } + s1[m] = c; + } + + /* Test greater than with variable start and end */ + l = k - 2*n; + s1 = buffer.source1; + s2 = buffer.source2; + for (m = 0; m < l && !TvvgtFailed; m++) { + c = s1[m]; + s1[m] += 1; + rc = memcmp(s1, s2, l); + if (rc != 1) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: %d byte variable block greater than test failed!!!\n", __FILE__, __LINE__, l); + TvvgtFailed = TRUE; + } + s1[m] = c; + } + } + } + + + /* Misc test1 */ + for (k = 0; k < NTUL; k++) { + + source2_16[3] = tul[k]; + + rc = memcmp(source1_16,source2_16,TEST16*sizeof(unsigned long)); + if (rc != tul_test[k]) { + + printf("source1_16 = "); + for (i = 0; i < TEST16*sizeof(unsigned long); i++) + printf("%2.2x ", ((char *)source1_16)[i]); + printf("\n"); + + printf("source2_16 = "); + for (i = 0; i < TEST16*sizeof(unsigned long); i++) + printf("%2.2x ", ((char *)source2_16)[i]); + printf("%s, line #%d: Misc Test #1, case #%d of %d failed!!!\n", __FILE__, __LINE__, k+1, NTUL); + printf("Return Code = %d, Should be = %d\n",rc,tul_test[k]); + Tmisc++; + } + } + + + /* Misc test2 */ + l = 32; + buffer.source2[0] = '"'; + for (i = 0; i < l; i++) { + buffer.source1[i] = source32[i]; + buffer.source2[i+1] = source32[i]; + } + buffer.source2[l+1] = '"'; + s1 = &(buffer.source1[0]); + s2 = &(buffer.source2[1]); + if (0 != memcmp(s1, s2, l)) { + printbuf("source1", s1, l); + printbuf("source2", s2, l); + printf("%s, line #%d: Misc Test #2 failed!!!\n", __FILE__, __LINE__); + Tmisc++; + } + + + rc = TavEqFailed + TvaEqFailed + TavltFailed + TvaltFailed + TavgtFailed + TvagtFailed + TvveqFailed + TvvltFailed + TvvgtFailed + Tmisc; + if (rc) { + printf("\n\tMEMCMP failed %d tests!!!\n", rc); + exit(rc); + } else { + printf("\n\tMEMCMP passed all tests!!!\n"); + exit(0); + } +} diff --git a/private/crt32/string/mips/memcpym.s b/private/crt32/string/mips/memcpym.s new file mode 100644 index 000000000..ca0f8fe78 --- /dev/null +++ b/private/crt32/string/mips/memcpym.s @@ -0,0 +1,298 @@ +/* + * Fast bcopy code which supports overlapped copies. + * Not fully optimized yet. + * + * Written by: Kipp Hickman + * + * $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $ + * $Revision: 1.7 $ + * $Date: 1993/11/20 19:23:11 $ + */ + +#include <kxmips.h> + +/* + * char *bcopy(from, to, count); + * unsigned char *from, *to; + * unsigned long count; + * + * OR + * + * void *memcpy/memmove(to, from, count); + * void *to, *from; + * unsigned long count; + * + * Both functions return "to" + */ + +#define MINCOPY 16 + +/* registers used */ + +#define to a0 +#define from a1 +#define count a2 + +LEAF_ENTRY(memcpy) +ALTERNATE_ENTRY(memmove) + move a3,to # Save to in a3 + beq count,zero,ret # Test for zero count + beq from,to,ret # Test for from == to + + /* use backwards copying code if the from and to regions overlap */ + blt to,from,goforwards # If to < from then use forwards copy + add v0,from,count # v0 := from + count + bge to,v0,goforwards # If to >= from + count; no overlap + b gobackwards # Oh well, go backwards + +/*****************************************************************************/ + +/* + * Forward copy code. Check for pointer alignment and try to get both + * pointers aligned on a long boundary. + */ +goforwards: + /* small byte counts use byte at a time copy */ + blt count,MINCOPY,forwards_bytecopy + and v0,from,3 # v0 := from & 3 + and v1,to,3 # v1 := to & 3 + beq v0,v1,forwalignable # low bits are identical +/* + * Byte at a time copy code. This is used when the pointers are not + * alignable, when the byte count is small, or when cleaning up any + * remaining bytes on a larger transfer. + */ +forwards_bytecopy: + beq count,zero,ret # If count is zero, then we are done + addu v1,from,count # v1 := from + count + +99: lb v0,0(from) # v0 = *from + addu from,1 # advance pointer + sb v0,0(to) # Store byte + addu to,1 # advance pointer + bne from,v1,99b # Loop until done +ret: move v0,a3 # Set v0 to old "to" pointer + j ra # return to caller + +/* + * Pointers are alignable, and may be aligned. Since v0 == v1, we need only + * check what value v0 has to see how to get aligned. Also, since we have + * eliminated tiny copies, we know that the count is large enough to + * encompass the alignment copies. + */ +forwalignable: + beq v0,zero,forwards # If v0==v1 && v0==0 then aligned + beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned + beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned + +/* need to copy 1 byte */ + lb v0,0(from) # get one byte + addu from,1 # advance pointer + sb v0,0(to) # store one byte + addu to,1 # advance pointer + subu count,1 # and reduce count + b forwards # Now pointers are aligned + +/* need to copy 2 bytes */ +forw_copy2: + lh v0,0(from) # get one short + addu from,2 # advance pointer + sh v0,0(to) # store one short + addu to,2 # advance pointer + subu count,2 # and reduce count + b forwards + +/* need to copy 3 bytes */ +forw_copy3: + lb v0,0(from) # get one byte + lh v1,1(from) # and one short + addu from,3 # advance pointer + sb v0,0(to) # store one byte + sh v1,1(to) # and one short + addu to,3 # advance pointer + subu count,3 # and reduce count + /* FALLTHROUGH */ +/* + * Once we are here, the pointers are aligned on long boundaries. + * Begin copying in large chunks. + */ +forwards: + +/* 32 byte at a time loop */ +forwards_32: + blt count,32,forwards_16 # do 16 bytes at a time + lw v0,0(from) + lw v1,4(from) + lw t0,8(from) + lw t1,12(from) + lw t2,16(from) + lw t3,20(from) + lw t4,24(from) + lw t5,28(from) # Fetch 8*4 bytes + addu from,32 # advance from pointer now + sw v0,0(to) + sw v1,4(to) + sw t0,8(to) + sw t1,12(to) + sw t2,16(to) + sw t3,20(to) + sw t4,24(to) + sw t5,28(to) # Store 8*4 bytes + addu to,32 # advance to pointer now + subu count,32 # Reduce count + b forwards_32 # Try some more + +/* 16 byte at a time loop */ +forwards_16: + blt count,16,forwards_4 # Do rest in words + lw v0,0(from) + lw v1,4(from) + lw t0,8(from) + lw t1,12(from) + addu from,16 # advance from pointer now + sw v0,0(to) + sw v1,4(to) + sw t0,8(to) + sw t1,12(to) + addu to,16 # advance to pointer now + subu count,16 # Reduce count + b forwards_16 # Try some more + +/* 4 bytes at a time loop */ +forwards_4: + blt count,4,forwards_bytecopy # Do rest + lw v0,0(from) + addu from,4 # advance pointer + sw v0,0(to) + addu to,4 # advance pointer + subu count,4 + b forwards_4 + +/*****************************************************************************/ + +/* + * Backward copy code. Check for pointer alignment and try to get both + * pointers aligned on a long boundary. + */ +gobackwards: + add from,count # Advance to end + 1 + add to,count # Advance to end + 1 + + /* small byte counts use byte at a time copy */ + blt count,MINCOPY,backwards_bytecopy + and v0,from,3 # v0 := from & 3 + and v1,to,3 # v1 := to & 3 + beq v0,v1,backalignable # low bits are identical +/* + * Byte at a time copy code. This is used when the pointers are not + * alignable, when the byte count is small, or when cleaning up any + * remaining bytes on a larger transfer. + */ +backwards_bytecopy: + beq count,zero,ret # If count is zero quit + subu from,1 # Reduce by one (point at byte) + subu to,1 # Reduce by one (point at byte) + subu v1,from,count # v1 := original from - 1 + +99: lb v0,0(from) # v0 = *from + subu from,1 # backup pointer + sb v0,0(to) # Store byte + subu to,1 # backup pointer + bne from,v1,99b # Loop until done + move v0,a3 # Set v0 to old "to" pointer + j ra # return to caller + +/* + * Pointers are alignable, and may be aligned. Since v0 == v1, we need only + * check what value v0 has to see how to get aligned. Also, since we have + * eliminated tiny copies, we know that the count is large enough to + * encompass the alignment copies. + */ +backalignable: + beq v0,zero,backwards # If v0==v1 && v0==0 then aligned + beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned + beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned + +/* need to copy 1 byte */ + lb v0,-1(from) # get one byte + subu from,1 # backup pointer + sb v0,-1(to) # store one byte + subu to,1 # backup pointer + subu count,1 # and reduce count + b backwards # Now pointers are aligned + +/* need to copy 2 bytes */ +back_copy2: + lh v0,-2(from) # get one short + subu from,2 # backup pointer + sh v0,-2(to) # store one short + subu to,2 # backup pointer + subu count,2 # and reduce count + b backwards + +/* need to copy 3 bytes */ +back_copy3: + lb v0,-1(from) # get one byte + lh v1,-3(from) # and one short + subu from,3 # backup pointer + sb v0,-1(to) # store one byte + sh v1,-3(to) # and one short + subu to,3 # backup pointer + subu count,3 # and reduce count + /* FALLTHROUGH */ +/* + * Once we are here, the pointers are aligned on long boundaries. + * Begin copying in large chunks. + */ +backwards: + +/* 32 byte at a time loop */ +backwards_32: + blt count,32,backwards_16 # do 16 bytes at a time + lw v0,-4(from) + lw v1,-8(from) + lw t0,-12(from) + lw t1,-16(from) + lw t2,-20(from) + lw t3,-24(from) + lw t4,-28(from) + lw t5,-32(from) # Fetch 8*4 bytes + subu from,32 # backup from pointer now + sw v0,-4(to) + sw v1,-8(to) + sw t0,-12(to) + sw t1,-16(to) + sw t2,-20(to) + sw t3,-24(to) + sw t4,-28(to) + sw t5,-32(to) # Store 8*4 bytes + subu to,32 # backup to pointer now + subu count,32 # Reduce count + b backwards_32 # Try some more + +/* 16 byte at a time loop */ +backwards_16: + blt count,16,backwards_4 # Do rest in words + lw v0,-4(from) + lw v1,-8(from) + lw t0,-12(from) + lw t1,-16(from) + subu from,16 # backup from pointer now + sw v0,-4(to) + sw v1,-8(to) + sw t0,-12(to) + sw t1,-16(to) + subu to,16 # backup to pointer now + subu count,16 # Reduce count + b backwards_16 # Try some more + +/* 4 byte at a time loop */ +backwards_4: + blt count,4,backwards_bytecopy # Do rest + lw v0,-4(from) + subu from,4 # backup from pointer + sw v0,-4(to) + subu to,4 # backup to pointer + subu count,4 # Reduce count + b backwards_4 +.end memcpy diff --git a/private/crt32/string/mips/memorym.s b/private/crt32/string/mips/memorym.s new file mode 100644 index 000000000..6f98423dc --- /dev/null +++ b/private/crt32/string/mips/memorym.s @@ -0,0 +1,1218 @@ +// TITLE("Compare, Move, and Fill Memory Support") +//++ +// +// Copyright (c) 1990 Microsoft Corporation +// +// Module Name: +// +// memory.s +// +// Abstract: +// +// This module implements functions to compare, move, zero, and fill +// blocks of memory. If the memory is aligned, then these functions +// are very efficient. +// +// N.B. These routines MUST preserve all floating state since they are +// frequently called from interrupt service routines that normally +// do not save or restore floating state. +// +// Author: +// +// David N. Cutler (davec) 11-Apr-1990 +// +// Environment: +// +// User or Kernel mode. +// +// Revision History: +// 02/02/94 RDL This is a cloned version of ntos\rtl\mips\xxmvmem.s +// Used RtlMoveMemory and RtlFillMemory. +// 02/15/94 RDL Used RtlCompareMemory, changed return code for memcmp. +// 02/22/94 RDL Fixed memcmp, zero length and equal aligned 32-byte +// buffers return wrong code. +// +//-- + +#include "ksmips.h" + SBTTL("Compare Memory") + +//++ +// +// ULONG +// RtlCompareMemory ( +// IN PVOID Source1, +// IN PVOID Source2, +// IN ULONG Length +// ) +// +// Routine Description: +// +// This function compares two blocks of memory and returns the number +// of bytes that compared equal. +// +// Arguments: +// +// Source1 (a0) - Supplies a pointer to the first block of memory to +// compare. +// +// Source2 (a1) - Supplies a pointer to the second block of memory to +// compare. +// +// Length (a2) - Supplies the length, in bytes, of the memory to be +// compared. +// +// Return Value: +// +// zero if source1 == source2 +// -1 if source1 < source2 +// 1 if source1 > source2 +// value. If all bytes compared equal, then the length of the orginal +// block of memory is returned. +// +//-- + + LEAF_ENTRY(memcmp) + + addu a3,a0,a2 // compute ending address of source1 + move v0,a2 // save length of comparison + and t0,a2,32 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + addu t4,a0,t1 // compute ending block address + beq zero,t1,100f // if eq, no 32-byte block to compare + or t0,a0,a1 // merge and isolate alignment bits + and t0,t0,0x3 // + bne zero,t0,CompareUnaligned // if ne, unalignment comparison + +// +// Compare memory aligned. +// + +CompareAligned: // + + .set noreorder +10: lw t0,0(a0) // compare 32-byte block + lw t1,0(a1) // + lw t2,4(a0) // + bne t0,t1,90f // if ne, first word not equal + lw t3,4(a1) // + lw t0,8(a0) // + bne t2,t3,20f // if ne, second word not equal + lw t1,8(a1) // + lw t2,12(a0) // + bne t0,t1,30f // if ne, third word not equal + lw t3,12(a1) // + lw t0,16(a0) // + bne t2,t3,40f // if ne, fourth word not equal + lw t1,16(a1) // + lw t2,20(a0) // + bne t0,t1,50f // if ne, fifth word not equal + lw t3,20(a1) // + lw t0,24(a0) // + bne t2,t3,60f // if ne, sixth word not equal + lw t1,24(a1) // + lw t2,28(a0) // + bne t0,t1,70f // if ne, seventh word not equal + lw t3,28(a1) // + addu a0,a0,32 // advance source1 to next block + bne t2,t3,80f // if ne, eighth word not equal + nop // + bne a0,t4,10b // if ne, more 32-byte blocks to compare + addu a1,a1,32 // update source2 address + .set reorder + + subu a2,a3,a0 // compute remaining bytes + b 100f // + +// +// Compare memory unaligned. +// + +CompareUnaligned: // + and t0,a0,0x3 // isolate source1 alignment + bne zero,t0,CompareUnalignedS1 // if ne, source1 unaligned + +// +// Source1 is aligned and Source2 is unaligned. +// + +CompareUnalignedS2: // + + .set noreorder +10: lw t0,0(a0) // compare 32-byte block + lwr t1,0(a1) // + lwl t1,3(a1) // + lw t2,4(a0) // + bne t0,t1,90f // if ne, first word not equal + lwr t3,4(a1) // + lwl t3,7(a1) // + lw t0,8(a0) // + bne t2,t3,20f // if ne, second word not equal + lwr t1,8(a1) // + lwl t1,11(a1) // + lw t2,12(a0) // + bne t0,t1,30f // if ne, third word not equal + lwr t3,12(a1) // + lwl t3,15(a1) // + lw t0,16(a0) // + bne t2,t3,40f // if ne, fourth word not equal + lwr t1,16(a1) // + lwl t1,19(a1) // + lw t2,20(a0) // + bne t0,t1,50f // if ne, fifth word not equal + lwr t3,20(a1) // + lwl t3,23(a1) // + lw t0,24(a0) // + bne t2,t3,60f // if ne, sixth word not equal + lwr t1,24(a1) // + lwl t1,27(a1) // + lw t2,28(a0) // + bne t0,t1,70f // if ne, seventh word not equal + lwr t3,28(a1) // + lwl t3,31(a1) // + addu a0,a0,32 // advance source1 to next block + bne t2,t3,80f // if ne, eighth word not equal + nop // + bne a0,t4,10b // if ne, more 32-byte blocks to compare + addu a1,a1,32 // update source2 address + .set reorder + + subu a2,a3,a0 // compute remaining bytes + b 100f // + +// +// Source1 is unaligned, check Source2 alignment. +// + +CompareUnalignedS1: // + and t0,a1,0x3 // isolate Source2 alignment + bne zero,t0,CompareUnalignedS1AndS2 // if ne, Source2 unaligned + +// +// Source1 is unaligned and Source2 is aligned. +// + + .set noreorder +10: lwr t0,0(a0) // compare 32-byte block + lwl t0,3(a0) // + lw t1,0(a1) // + lwr t2,4(a0) // + lwl t2,7(a0) // + bne t0,t1,90f // if ne, first word not equal + lw t3,4(a1) // + lwr t0,8(a0) // + lwl t0,11(a0) // + bne t2,t3,20f // if ne, second word not equal + lw t1,8(a1) // + lwr t2,12(a0) // + lwl t2,15(a0) // + bne t0,t1,30f // if ne, third word not equal + lw t3,12(a1) // + lwr t0,16(a0) // + lwl t0,19(a0) // + bne t2,t3,40f // if ne, fourth word not equal + lw t1,16(a1) // + lwr t2,20(a0) // + lwl t2,23(a0) // + bne t0,t1,50f // if ne, fifth word not equal + lw t3,20(a1) // + lwr t0,24(a0) // + lwl t0,27(a0) // + bne t2,t3,60f // if ne, sixth word not equal + lw t1,24(a1) // + lwr t2,28(a0) // + lwl t2,31(a0) // + bne t0,t1,70f // if ne, seventh word not equal + lw t3,28(a1) // + addu a0,a0,32 // advance source1 to next block + bne t2,t3,80f // if ne, eighth word not equal + nop // + bne a0,t4,10b // if ne, more 32-byte blocks to compare + addu a1,a1,32 // update source2 address + .set reorder + + subu a2,a3,a0 // compute remaining bytes + b 100f // + +// +// Source1 and Source2 are unaligned. +// + +CompareUnalignedS1AndS2: // + + .set noreorder +10: lwr t0,0(a0) // compare 32-byte block + lwl t0,3(a0) // + lwr t1,0(a1) // + lwl t1,3(a1) // + lwr t2,4(a0) // + lwl t2,7(a0) // + bne t0,t1,90f // if ne, first word not equal + lwr t3,4(a1) // + lwl t3,7(a1) // + lwr t0,8(a0) // + lwl t0,11(a0) // + bne t2,t3,20f // if ne, second word not equal + lwr t1,8(a1) // + lwl t1,11(a1) // + lwr t2,12(a0) // + lwl t2,15(a0) // + bne t0,t1,30f // if ne, third word not equal + lwr t3,12(a1) // + lwl t3,15(a1) // + lwr t0,16(a0) // + lwl t0,19(a0) // + bne t2,t3,40f // if ne, fourth word not equal + lwr t1,16(a1) // + lwl t1,19(a1) // + lwr t2,20(a0) // + lwl t2,23(a0) // + bne t0,t1,50f // if ne, fifth word not equal + lwr t3,20(a1) // + lwl t3,23(a1) // + lwr t0,24(a0) // + lwl t0,27(a0) // + bne t2,t3,60f // if ne, sixth word not equal + lwr t1,24(a1) // + lwl t1,27(a1) // + lwr t2,28(a0) // + lwl t2,31(a0) // + bne t0,t1,70f // if ne, seventh word not equal + lwr t3,28(a1) // + lwl t3,31(a1) // + addu a0,a0,32 // advance source1 to next block + bne t2,t3,80f // if ne, eighth word not equal + nop // + bne a0,t4,10b // if ne, more 32-byte blocks to compare + addu a1,a1,32 // update source2 address + .set reorder + + subu a2,a3,a0 // compute remaining bytes + b 100f // + +// +// Adjust source1 and source2 pointers dependent on position of miscompare in +// block. +// + +20: addu a0,a0,4 // mismatch on second word + addu a1,a1,4 // + b 90f // + +30: addu a0,a0,8 // mismatch on third word + addu a1,a1,8 // + b 90f // + +40: addu a0,a0,12 // mistmatch on fourth word + addu a1,a1,12 // + b 90f // + +50: addu a0,a0,16 // mismatch on fifth word + addu a1,a1,16 // + b 90f // + +60: addu a0,a0,20 // mismatch on sixth word + addu a1,a1,20 // + b 90f // + +70: addu a0,a0,24 // mismatch on seventh word + addu a1,a1,24 // + b 90f // + +80: subu a0,a0,4 // mismatch on eighth word + addu a1,a1,28 // +90: subu a2,a3,a0 // compute remaining bytes + +// +// Compare 1-byte blocks. +// + +100: addu t2,a0,a2 // compute ending block address + beq zero,a2,120f // if eq, buffers equal +110: lb t0,0(a0) // compare 1-byte block + lb t1,0(a1) // + addu a1,a1,1 // advance pointers to next block + bne t0,t1,130f // if ne, byte not equal + addu a0,a0,1 // + bne a0,t2,110b // if ne, more 1-byte block to zero + +120: move v0,zero // source1 == source2 + j ra // return + +130: sltu v0,t1,t0 // compare source1 to source2 + beq v0,zero,140f + j ra // return, source1 > source2 +140: + li v0,-1 + j ra // return, source1 < source2 + + .end memcmp + + SBTTL("Move Memory") +//++ +// +// VOID +// RtlMoveMemory ( +// IN PVOID Destination, +// IN PVOID Source, +// IN ULONG Length +// ) +// +// Routine Description: +// +// This function moves memory either forward or backward, aligned or +// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed +// by any remaining bytes. +// +// Arguments: +// +// Destination (a0) - Supplies a pointer to the destination address of +// the move operation. +// +// Source (a1) - Supplies a pointer to the source address of the move +// operation. +// +// Length (a2) - Supplies the length, in bytes, of the memory to be moved. +// +// Return Value: +// +// None. +// +// N.B. The C runtime entry points memmove and memcpy are equivalent to +// RtlMoveMemory thus alternate entry points are provided for these +// routines. +//-- + + LEAF_ENTRY(memmove) + j memcpy + .end memmove + + LEAF_ENTRY(memcpy) + + move v0,a0 // return destination + +// +// If the source address is less than the destination address and source +// address plus the length of the move is greater than the destination +// address, then the source and destination overlap such that the move +// must be performed backwards. +// + +10: bgeu a1,a0,MoveForward // if geu, no overlap possible + addu t0,a1,a2 // compute source ending address + bgtu t0,a0,MoveBackward // if gtu, source and destination overlap + +// +// Move memory forward aligned and unaligned. +// + +MoveForward: // + sltu t0,a2,4 // check if less than four bytes + bne zero,t0,50f // if ne, less than four bytes to move + xor t0,a0,a1 // compare alignment bits + and t0,t0,0x3 // isolate alignment comparison + bne zero,t0,MoveForwardUnaligned // if ne, incompatible alignment + +// +// Move memory forward aligned. +// + +MoveForwardAligned: // + subu t0,zero,a0 // compute bytes until aligned + and t0,t0,0x3 // isolate residual byte count + subu a2,a2,t0 // reduce number of bytes to move + beq zero,t0,10f // if eq, already aligned + lwr t1,0(a1) // move unaligned bytes + swr t1,0(a0) // + addu a0,a0,t0 // align destination address + addu a1,a1,t0 // align source address + +// +// Check for 32-byte blocks to move. +// + +10: and t0,a2,32 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + addu t8,a0,t1 // compute ending block address + beq zero,t1,30f // if eq, no 32-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 32-byte blocks. +// + +#if defined(R4000) + + and t0,a0,1 << 2 // check if destination quadword aligned + beq zero,t0,15f // if eq, destination quadword aligned + lw t0,0(a1) // get source longword + addu a1,a1,4 // align source address + sw t0,0(a0) // store destination longword + addu a0,a0,4 // align destination address + addu a2,a2,t1 // recompute bytes to move + subu a2,a2,4 // reduce count by 4 + b 10b // + +// +// The destination is quadword aligned, check the source operand. +// + +15: and t0,a1,1 << 2 // check if source quadword aligned + beq zero,t0,22f // if eq, source quadword aligned + +// +// The source is longword aligned and the destination is quadword aligned. +// + + .set noreorder +20: lwc1 f0,0(a1) // move 32-byte block + lwc1 f1,4(a1) // + lwc1 f2,8(a1) // + lwc1 f3,12(a1) // + lwc1 f4,16(a1) // + lwc1 f5,20(a1) // + lwc1 f6,24(a1) // + lwc1 f7,28(a1) // + sdc1 f0,0(a0) // + sdc1 f2,8(a0) // + sdc1 f4,16(a0) // + sdc1 f6,24(a0) // + addu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + addu a1,a1,32 // + .set reorder + + b 30f // + +// +// Both the source and the destination are quadword aligned. +// + +22: and t0,t1,1 << 5 // test if even number of 32-byte blocks + beq zero,t0,26f // if eq, even number of 32-byte blocks + +// +// Move one 32-byte block quadword aligned. +// + + .set noreorder + ldc1 f0,0(a1) // move 32-byte block + ldc1 f2,8(a1) // + ldc1 f4,16(a1) // + ldc1 f6,24(a1) // + sdc1 f0,0(a0) // + sdc1 f2,8(a0) // + sdc1 f4,16(a0) // + sdc1 f6,24(a0) // + addu a0,a0,32 // advance pointers to next block + beq a0,t8,30f // if eq, end of block + addu a1,a1,32 // + .set reorder + +// +// Move 64-byte blocks quadword aligned. +// + + .set noreorder +26: ldc1 f0,0(a1) // move 64-byte block + ldc1 f2,8(a1) // + ldc1 f4,16(a1) // + ldc1 f6,24(a1) // + ldc1 f8,32(a1) // + ldc1 f10,40(a1) // + ldc1 f12,48(a1) // + ldc1 f14,56(a1) // + sdc1 f0,0(a0) // + sdc1 f2,8(a0) // + sdc1 f4,16(a0) // + sdc1 f6,24(a0) // + sdc1 f8,32(a0) // + sdc1 f10,40(a0) // + sdc1 f12,48(a0) // + sdc1 f14,56(a0) // + addu a0,a0,64 // advance pointers to next block + bne a0,t8,26b // if ne, more 64-byte blocks to zero + addu a1,a1,64 // + .set reorder + +#endif + +// +// The source is longword aligned and the destination is longword aligned. +// + +#if defined(R3000) + + .set noreorder +20: lw t0,0(a1) // move 32-byte block + lw t1,4(a1) // + lw t2,8(a1) // + lw t3,12(a1) // + lw t4,16(a1) // + lw t5,20(a1) // + lw t6,24(a1) // + lw t7,28(a1) // + sw t0,0(a0) // + sw t1,4(a0) // + sw t2,8(a0) // + sw t3,12(a0) // + sw t4,16(a0) // + sw t5,20(a0) // + sw t6,24(a0) // + sw t7,28(a0) // + addu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + addu a1,a1,32 // + .set reorder + +#endif + +// +// Check for 4-byte blocks to move. +// + +30: and t0,a2,4 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + addu t2,a0,t1 // compute ending block address + beq zero,t1,50f // if eq, no 4-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 4-byte block. +// + + .set noreorder +40: lw t0,0(a1) // move 4-byte block + addu a0,a0,4 // advance pointers to next block + sw t0,-4(a0) // + bne a0,t2,40b // if ne, more 4-byte blocks to zero + addu a1,a1,4 // + .set reorder + +// +// Move 1-byte blocks. +// + +50: addu t2,a0,a2 // compute ending block address + beq zero,a2,70f // if eq, no bytes to zero + + .set noreorder +60: lb t0,0(a1) // move 1-byte block + addu a0,a0,1 // advance pointers to next block + sb t0,-1(a0) // + bne a0,t2,60b // if ne, more 1-byte block to zero + addu a1,a1,1 // + .set reorder + +70: j ra // return + +// +// Move memory forward unaligned. +// + +MoveForwardUnaligned: // + subu t0,zero,a0 // compute bytes until aligned + and t0,t0,0x3 // isolate residual byte count + subu a2,a2,t0 // reduce number of bytes to move + beq zero,t0,10f // if eq, already aligned + lwr t1,0(a1) // move unaligned bytes + lwl t1,3(a1) // + swr t1,0(a0) // + addu a0,a0,t0 // align destination address + addu a1,a1,t0 // update source address + +// +// Check for 32-byte blocks to move. +// + +10: and t0,a2,32 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + addu t8,a0,t1 // compute ending block address + beq zero,t1,30f // if eq, no 32-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 32-byte block. +// + + .set noreorder +20: lwr t0,0(a1) // move 32-byte block + lwl t0,3(a1) // + lwr t1,4(a1) // + lwl t1,7(a1) // + lwr t2,8(a1) // + lwl t2,11(a1) // + lwr t3,12(a1) // + lwl t3,15(a1) // + lwr t4,16(a1) // + lwl t4,19(a1) // + lwr t5,20(a1) // + lwl t5,23(a1) // + lwr t6,24(a1) // + lwl t6,27(a1) // + lwr t7,28(a1) // + lwl t7,31(a1) // + sw t0,0(a0) // + sw t1,4(a0) // + sw t2,8(a0) // + sw t3,12(a0) // + sw t4,16(a0) // + sw t5,20(a0) // + sw t6,24(a0) // + sw t7,28(a0) // + addu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + addu a1,a1,32 // + .set reorder + +// +// Check for 4-byte blocks to move. +// + +30: and t0,a2,4 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + addu t2,a0,t1 // compute ending block address + beq zero,t1,50f // if eq, no 4-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 4-byte block. +// + + .set noreorder +40: lwr t0,0(a1) // move 4-byte block + lwl t0,3(a1) // + addu a0,a0,4 // advance pointers to next block + sw t0,-4(a0) // + bne a0,t2,40b // if ne, more 4-byte blocks to zero + addu a1,a1,4 // + .set reorder + +// +// Move 1-byte blocks. +// + +50: addu t2,a0,a2 // compute ending block address + beq zero,a2,70f // if eq, no bytes to zero + + .set noreorder +60: lb t0,0(a1) // move 1-byte block + addu a0,a0,1 // advance pointers to next block + sb t0,-1(a0) // + bne a0,t2,60b // if ne, more 1-byte block to zero + addu a1,a1,1 // + .set reorder + +70: j ra // return + +// +// Move memory backward. +// + +MoveBackward: // + addu a0,a0,a2 // compute ending destination address + addu a1,a1,a2 // compute ending source address + sltu t0,a2,4 // check if less than four bytes + bne zero,t0,50f // if ne, less than four bytes to move + xor t0,a0,a1 // compare alignment bits + and t0,t0,0x3 // isolate alignment comparison + bne zero,t0,MoveBackwardUnaligned // if ne, incompatible alignment + +// +// Move memory backward aligned. +// + +MoveBackwardAligned: // + and t0,a0,0x3 // isolate residual byte count + subu a2,a2,t0 // reduce number of bytes to move + beq zero,t0,10f // if eq, already aligned + lwl t1,-1(a1) // move unaligned bytes + swl t1,-1(a0) // + subu a0,a0,t0 // align destination address + subu a1,a1,t0 // align source address + +// +// Check for 32-byte blocks to move. +// + +10: and t0,a2,32 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + subu t8,a0,t1 // compute ending block address + beq zero,t1,30f // if eq, no 32-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 32-byte block. +// + +#if defined(R4000) + + and t0,a0,1 << 2 // check if destination quadword aligned + beq zero,t0,15f // if eq, destination quadword aligned + lw t0,-4(a1) // get source longword + subu a1,a1,4 // align source address + sw t0,-4(a0) // store destination longword + subu a0,a0,4 // align destination address + addu a2,a2,t1 // recompute byte to move + subu a2,a2,4 // reduce count by 4 + b 10b // + +// +// The destination is quadword aligned, check the source operand. +// + +15: and t0,a1,1 << 2 // check if source quadword aligned + beq zero,t0,22f // if eq, source quadword aligned + +// +// The source is longword aligned and the destination is quadword aligned. +// + + .set noreorder +20: lwc1 f1,-4(a1) // move 32-byte block + lwc1 f0,-8(a1) // + lwc1 f3,-12(a1) // + lwc1 f2,-16(a1) // + lwc1 f5,-20(a1) // + lwc1 f4,-24(a1) // + lwc1 f7,-28(a1) // + lwc1 f6,-32(a1) // + sdc1 f0,-8(a0) // + sdc1 f2,-16(a0) // + sdc1 f4,-24(a0) // + sdc1 f6,-32(a0) // + subu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + subu a1,a1,32 // + .set reorder + + b 30f // + +// +// Both the source and the destination are quadword aligned. +// + +22: and t0,t1,1 << 5 // test if even number of 32-byte blocks + beq zero,t0,26f // if eq, even number of 32-byte blocks + +// +// Move one 32-byte block quadword aligned. +// + + .set noreorder + ldc1 f0,-8(a1) // move 32-byte block + ldc1 f2,-16(a1) // + ldc1 f4,-24(a1) // + ldc1 f6,-32(a1) // + sdc1 f0,-8(a0) // + sdc1 f2,-16(a0) // + sdc1 f4,-24(a0) // + sdc1 f6,-32(a0) // + subu a0,a0,32 // advance pointers to next block + beq a0,t8,30f // if eq, end of block + subu a1,a1,32 // + .set reorder + +// +// Move 64-byte blocks quadword aligned. +// + + .set noreorder +26: ldc1 f0,-8(a1) // move 64-byte block + ldc1 f2,-16(a1) // + ldc1 f4,-24(a1) // + ldc1 f6,-32(a1) // + ldc1 f8,-40(a1) // + ldc1 f10,-48(a1) // + ldc1 f12,-56(a1) // + ldc1 f14,-64(a1) // + sdc1 f0,-8(a0) // + sdc1 f2,-16(a0) // + sdc1 f4,-24(a0) // + sdc1 f6,-32(a0) // + sdc1 f8,-40(a0) // + sdc1 f10,-48(a0) // + sdc1 f12,-56(a0) // + sdc1 f14,-64(a0) // + subu a0,a0,64 // advance pointers to next block + bne a0,t8,26b // if ne, more 64-byte blocks to zero + subu a1,a1,64 // + .set reorder + +#endif + +// +// The source is longword aligned and the destination is longword aligned. +// + +#if defined(R3000) + + .set noreorder +20: lw t0,-4(a1) // move 32-byte block + lw t1,-8(a1) // + lw t2,-12(a1) // + lw t3,-16(a1) // + lw t4,-20(a1) // + lw t5,-24(a1) // + lw t6,-28(a1) // + lw t7,-32(a1) // + sw t0,-4(a0) // + sw t1,-8(a0) // + sw t2,-12(a0) // + sw t3,-16(a0) // + sw t4,-20(a0) // + sw t5,-24(a0) // + sw t6,-28(a0) // + sw t7,-32(a0) // + subu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + subu a1,a1,32 // + .set reorder + +#endif + +// +// Check for 4-byte blocks to move. +// + +30: and t0,a2,4 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + subu t2,a0,t1 // compute ending block address + beq zero,t1,50f // if eq, no 4-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 4-byte block. +// + + .set noreorder +40: lw t0,-4(a1) // move 4-byte block + subu a0,a0,4 // advance pointers to next block + sw t0,0(a0) // + bne a0,t2,40b // if ne, more 4-byte blocks to zero + subu a1,a1,4 // + .set reorder + +// +// Move 1-byte blocks. +// + +50: subu t2,a0,a2 // compute ending block address + beq zero,a2,70f // if eq, no bytes to zero + + .set noreorder +60: lb t0,-1(a1) // move 1-byte block + subu a0,a0,1 // advance pointers to next block + sb t0,0(a0) // + bne a0,t2,60b // if ne, more 1-byte block to zero + subu a1,a1,1 // + .set reorder + +70: j ra // return + +// +// Move memory backward unaligned. +// + +MoveBackwardUnaligned: // + and t0,a0,0x3 // isolate residual byte count + subu a2,a2,t0 // reduce number of bytes to move + beq zero,t0,10f // if eq, already aligned + lwl t1,-1(a1) // move unaligned bytes + lwr t1,-4(a1) // + swl t1,-1(a0) // + subu a0,a0,t0 // align destination address + subu a1,a1,t0 // update source address + +// +// Check for 32-byte blocks to move. +// + +10: and t0,a2,32 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + subu t8,a0,t1 // compute ending block address + beq zero,t1,30f // if eq, no 32-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 32-byte block. +// + + .set noreorder +20: lwr t0,-4(a1) // move 32-byte block + lwl t0,-1(a1) // + lwr t1,-8(a1) // + lwl t1,-5(a1) // + lwr t2,-12(a1) // + lwl t2,-9(a1) // + lwr t3,-16(a1) // + lwl t3,-13(a1) // + lwr t4,-20(a1) // + lwl t4,-17(a1) // + lwr t5,-24(a1) // + lwl t5,-21(a1) // + lwr t6,-28(a1) // + lwl t6,-25(a1) // + lwr t7,-32(a1) // + lwl t7,-29(a1) // + sw t0,-4(a0) // + sw t1,-8(a0) // + sw t2,-12(a0) // + sw t3,-16(a0) // + sw t4,-20(a0) // + sw t5,-24(a0) // + sw t6,-28(a0) // + sw t7,-32(a0) // + subu a0,a0,32 // advance pointers to next block + bne a0,t8,20b // if ne, more 32-byte blocks to zero + subu a1,a1,32 // + .set reorder + +// +// Check for 4-byte blocks to move. +// + +30: and t0,a2,4 - 1 // isolate residual bytes + subu t1,a2,t0 // subtract out residual bytes + subu t2,a0,t1 // compute ending block address + beq zero,t1,50f // if eq, no 4-byte block to zero + move a2,t0 // set residual number of bytes + +// +// Move 4-byte block. +// + + .set noreorder +40: lwr t0,-4(a1) // move 4-byte block + lwl t0,-1(a1) // + subu a0,a0,4 // advance pointers to next block + sw t0,0(a0) // + bne a0,t2,40b // if ne, more 4-byte blocks to zero + subu a1,a1,4 // + .set reorder + +// +// Move 1-byte blocks. +// + +50: subu t2,a0,a2 // compute ending block address + beq zero,a2,70f // if eq, no bytes to zero + + .set noreorder +60: lb t0,-1(a1) // move 1-byte block + subu a0,a0,1 // advance pointers to next block + sb t0,0(a0) // + bne a0,t2,60b // if ne, more 1-byte block to zero + subu a1,a1,1 // + .set reorder + +70: j ra // return + + .end memcpy + + SBTTL("Fill Memory") +//++ +// +// VOID +// RtlFillMemory ( +// IN PVOID Destination, +// IN ULONG Length, +// IN UCHAR Fill +// ) +// +// Routine Description: +// +// This function fills memory by first aligning the destination address to +// a longword boundary, and then filling 32-byte blocks, followed by 4-byte +// blocks, followed by any remaining bytes. +// +// Arguments: +// +// Destination (a0) - Supplies a pointer to the memory to fill. +// +// Length (a1) - Supplies the length, in bytes, of the memory to be filled. +// +// Fill (a2) - Supplies the fill byte. +// +// N.B. The alternate entry memset expects the length and fill arguments +// to be reversed. +// +// Return Value: +// +// None. +// +//-- + + LEAF_ENTRY(memset) + + move a3,a1 // swap length and fill arguments + move a1,a2 // + move a2,a3 // + move v0,a0 // return destination + + and a2,a2,0xff // clear excess bits + sll t0,a2,8 // duplicate fill byte + or a2,a2,t0 // generate fill word + sll t0,a2,16 // duplicate fill word + or a2,a2,t0 // generate fill longword + +// +// Fill memory with the pattern specified in register a2. +// + +#if DBG + + mtc1 a2,f0 // set pattern to store + mtc1 a2,f1 // + +#endif + + subu t0,zero,a0 // compute bytes until aligned + and t0,t0,0x3 // isolate residual byte count + subu t1,a1,t0 // reduce number of bytes to fill + blez t1,60f // if lez, less than 4 bytes to fill + move a1,t1 // set number of bytes to fill + beq zero,t0,10f // if eq, already aligned + swr a2,0(a0) // fill unaligned bytes + addu a0,a0,t0 // align destination address + +// +// Check for 32-byte blocks to fill. +// + +10: and t0,a1,32 - 1 // isolate residual bytes + subu t1,a1,t0 // subtract out residual bytes + addu t2,a0,t1 // compute ending block address + beq zero,t1,40f // if eq, no 32-byte blocks to fill + move a1,t0 // set residual number of bytes + +// +// Fill 32-byte blocks. +// + +#if defined(R4000) + + and t0,a0,1 << 2 // check if destintion quadword aligned + beq zero,t0,20f // if eq, yes + sw a2,0(a0) // store destination longword + addu a0,a0,4 // align destination address + addu a1,a1,t1 // recompute bytes to fill + subu a1,a1,4 // reduce count by 4 + b 10b // + +// +// The destination is quadword aligned. +// + +20: mtc1 a2,f0 // set pattern value + mtc1 a2,f1 // + and t0,t1,1 << 5 // test if even number of 32-byte blocks + beq zero,t0,30f // if eq, even number of 32-byte blocks + +// +// Fill one 32-byte block. +// + + .set noreorder + sdc1 f0,0(a0) // fill 32-byte block + sdc1 f0,8(a0) // + sdc1 f0,16(a0) // + addu a0,a0,32 // advance pointer to next block + beq a0,t2,40f // if ne, no 64-byte blocks to fill + sdc1 f0,-8(a0) // + .set reorder + +// +// Fill 64-byte block. +// + + .set noreorder +30: sdc1 f0,0(a0) // fill 32-byte block + sdc1 f0,8(a0) // + sdc1 f0,16(a0) // + sdc1 f0,24(a0) // + sdc1 f0,32(a0) // + sdc1 f0,40(a0) // + sdc1 f0,48(a0) // + addu a0,a0,64 // advance pointer to next block + bne a0,t2,30b // if ne, more 32-byte blocks to fill + sdc1 f0,-8(a0) // + .set reorder + +#endif + +// +// Fill 32-byte blocks. +// + +#if defined(R3000) + + .set noreorder +20: sw a2,0(a0) // fill 32-byte block + sw a2,4(a0) // + sw a2,8(a0) // + sw a2,12(a0) // + addu a0,a0,32 // advance pointer to next block + sw a2,-4(a0) // + sw a2,-8(a0) // + sw a2,-12(a0) // + bne a0,t2,20b // if ne, more 32-byte blocks to fill + sw a2,-16(a0) // + .set reorder + +#endif + +// +// Check for 4-byte blocks to fill. +// + +40: and t0,a1,4 - 1 // isolate residual bytes + subu t1,a1,t0 // subtract out residual bytes + addu t2,a0,t1 // compute ending block address + beq zero,t1,60f // if eq, no 4-byte block to fill + move a1,t0 // set residual number of bytes + +// +// Fill 4-byte blocks. +// + + .set noreorder +50: addu a0,a0,4 // advance pointer to next block + bne a0,t2,50b // if ne, more 4-byte blocks to fill + sw a2,-4(a0) // fill 4-byte block + .set reorder + +// +// Check for 1-byte blocks to fill. +// + +60: addu t2,a0,a1 // compute ending block address + beq zero,a1,80f // if eq, no bytes to fill + +// +// Fill 1-byte blocks. +// + + .set noreorder +70: addu a0,a0,1 // advance pointer to next block + bne a0,t2,70b // if ne, more 1-byte block to fill + sb a2,-1(a0) // fill 1-byte block + .set reorder + +#if DBG + +80: mfc1 t0,f0 // get fill pattern + mfc1 t1,f1 // + bne t0,a2,90f // if ne, pattern altered + bne t1,a2,90f // if ne, pattern altered + j ra // return + +90: break KERNEL_BREAKPOINT // + +#else + +80: j ra // return + +#endif + + .end memset diff --git a/private/crt32/string/mips/memsetm.s b/private/crt32/string/mips/memsetm.s new file mode 100644 index 000000000..a53f8f0a1 --- /dev/null +++ b/private/crt32/string/mips/memsetm.s @@ -0,0 +1,105 @@ +/* --------------------------------------------------- */ +/* | Copyright (c) 1986 MIPS Computer Systems, Inc. | */ +/* | All Rights Reserved. | */ +/* --------------------------------------------------- */ +/* $Revision: 1.3 $ */ + +/* + * char * + * memset(s, c, n) + * register char * s; + * register c, n; + * { + * register char * p = s; + * + * while (--n >= 0) + * *s++ = c; + * + * return (p); + * } + */ + +/* + * Copyright 1986 by MIPS Computer Systems, Inc. + */ + +#include <kxmips.h> + +#define NBPW 4 + +/* + * memset(dst, c, bcount) + * set block of memory with blanks + * + * Calculating MINSET, assuming 10% cache-miss on non-loop code: + * Overhead =~ 18 instructions => 28 (30) cycles + * Byte set =~ 12 (24) cycles/word for 08M44 (08V11) + * Word set =~ 3 (5) cycles/word for 08M44 (08V11) + * If I-cache-miss nears 0, MINSET ==> 4 bytes; otherwise, times are: + * breakeven (MEM) = 28 / (12 - 3) =~ 3 words + * breakeven (VME) = 30 / (24 - 5) =~ 1.5 words + * Since the overhead is pessimistic (worst-case alignment), and many calls + * will be for well-aligned data, and since Word-set at least leaves + * the set in the cache, we shade these values (6-12) down to 8 bytes + */ +#define MINSET 8 + +/* It turns out better to think of lwl/lwr and swl/swr as + smaller-vs-bigger address rather than left-vs-right. + Such a representation makes the code endian-independent. */ + +#define LWS lwr +#define LWB lwl +#define SWS swr +#define SWB swl + +LEAF_ENTRY(memset) + move v0,a0 # return first argument; BDSLOT + blt a2,MINSET,byteset + subu v1,zero,a0 # number of bytes til aligned; BDSLOT + beq a1,$0,1f # make memset(s, 0, n) faster + sll t0,a1,8 + or a1,t0 + sll t0,a1,16 + or a1,t0 +1: and v1,NBPW-1 + subu a2,v1 # adjust count; BDSLOT + beq v1,zero,blkset # already aligned + SWS a1,0(a0) + addu a0,v1 + +/* + * set 8 byte, aligned block (no point in unrolling further, + * since maximum write rate in M/500 is two cycles/word write) + */ +blkset: + and t0,a2,NBPW+NBPW-1 # count after by-8-byte loop done + subu a3,a2,t0 # total in 8 byte chunks; BDSLOT + beq a2,t0,wordset # less than 8 bytes to set + addu a3,a0 # dst endpoint +1: addu a0,NBPW+NBPW + sw a1,-NBPW-NBPW(a0) + sw a1,-NBPW(a0) + bne a0,a3,1b + move a2,t0 # set end-of loop count + +/* + * do a word (if required) this is not a loop since loop above + * guarantees that at most one word must be written here. + */ +wordset: + and t0,a2,NBPW # count after by-word non-loop done + subu a2,t0 # adjust count; BDSLOT + beq t0,zero,byteset # less than word to set + sw a1,0(a0) + addu a0,NBPW + +byteset: + addu a3,a2,a0 # dst endpoint; BDSLOT + ble a2,zero,setdone +1: addu a0,1 + sb a1,-1(a0) + bne a0,a3,1b +setdone: + j ra +.end memset diff --git a/private/crt32/string/mips/memsett.c b/private/crt32/string/mips/memsett.c new file mode 100644 index 000000000..c653803ab --- /dev/null +++ b/private/crt32/string/mips/memsett.c @@ -0,0 +1,20 @@ +char buffer[100]; +#include <stdio.h> +#include <string.h> + +void main() +{ + char *f = buffer; + char *g = buffer; + + printf("%8.8x\n", f); + f=(char*)memset(f,0x0a,12); + printf("%8.8x\n", f); + + if (f == g) { + int k = 12; + while (k--) + printf("%2.2x", *f++); + } +} + diff --git a/private/crt32/string/mips/strcatm.s b/private/crt32/string/mips/strcatm.s new file mode 100644 index 000000000..0c22c47de --- /dev/null +++ b/private/crt32/string/mips/strcatm.s @@ -0,0 +1,98 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +/* strcat.s 1.1 */ + +/* This function is an assembly-code replacement for the libc function + * strcat. + + * strcat and strcpy are very similar, but we waste about 40 words of + * code when both are used, so that they can be independently replaced. + + * There are one caveat to consider: this function is written in + * assembler code, and as such, cannot be merged using the U-code + * loader. */ + +/* Craig Hansen - 3-September-86 */ + +#include <kxmips.h> + +/* It turns out better to think of lwl/lwr and swl/swr as + smaller-vs-bigger address rather than left-vs-right. + Such a representation makes the code endian-independent. */ + +#define LWS lwr +#define LWB lwl +#define SWS swr +#define SWB swl + +.text + +LEAF_ENTRY(strcat) +.set noreorder + // a0/ destination + // a1/ source + move v0, a0 # a copy of destination address is returned +$findz: lb t0,0(a0) + nop + bne t0,0,$findz + add a0,1 + // go back over null byte + add a0,-1 + // start up first word + // adjust pointers so that a0 points to next word + // t7 = a1 adjusted by same amount minus one + // t0,t1,t2,t3 are filled with 4 consecutive bytes + // t4 is filled with the same 4 bytes in a single word + lb t0, 0(a1) + ori t5, a0, 3 # get an early start + beq t0, 0, $doch0 + sub t6, t5, a0 # number of char in 1st word of dest - 1 + lb t1, 1(a1) + add t7, a1, t6 # offset starting point for source string + beq t1, 0, $doch1 + nop + lb t2, 2(a1) + nop + beq t2, 0, $doch2 + LWS t4, 0(a1) # safe: always in same word as 0(a1) + lb t3, 3(a1) + LWB t4, 3(a1) # fill out word + beq t3, 0, $doch3 + SWS t4, 0(a0) # store entire or part word + addi a0, t5, 1-4 # adjust destination ptr + + // inner loop +1: lb t0, 1(t7) + addi t7, 4 + beq t0, 0, $doch0 + addi a0, 4 + lb t1, 1+1-4(t7) + nop + beq t1, 0, $doch1 + nop + lb t2, 2+1-4(t7) + nop + beq t2, 0, $doch2 + LWS t4, 0+1-4(t7) + lb t3, 3+1-4(t7) + LWB t4, 3+1-4(t7) + bne t3, 0, 1b + sw t4, 0(a0) + j ra + nop + + // store four bytes using swl/swr +$doch3: j ra + SWB t4, 3(a0) + // store up to three bytes, a byte at a time. +$doch2: sb t2, 2(a0) +$doch1: sb t1, 1(a0) +$doch0: j ra + sb t0, 0(a0) + +.end strcat diff --git a/private/crt32/string/mips/strchrm.s b/private/crt32/string/mips/strchrm.s new file mode 100644 index 000000000..b92f79fd6 --- /dev/null +++ b/private/crt32/string/mips/strchrm.s @@ -0,0 +1,26 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/index.s,v 1.3 1992/03/07 15:37:04 jleong Exp $" + +/* + * Copyright 1985 by MIPS Computer Systems, Inc. + */ + +#include "kxmips.h" + +LEAF_ENTRY(strchr) +1: lbu a2,0(a0) + addu a0,1 + beq a2,a1,2f + bne a2,zero,1b + move v0,zero + j ra + +2: subu v0,a0,1 + j ra +.end strchr diff --git a/private/crt32/string/mips/strchrt.c b/private/crt32/string/mips/strchrt.c new file mode 100644 index 000000000..1da4e1d0d --- /dev/null +++ b/private/crt32/string/mips/strchrt.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <string.h> +#include <limits.h> + +void main( int argc, char **argv ) + { + int c; + unsigned char *pstr; + unsigned char string[100]; + + strcpy(string, "ABCDEFGHIJKLMNOPQRST"); + for (c = 'a'; c <= UCHAR_MAX; c++) + { + string[9] = c; + pstr = strchr( string, c); + if (!pstr) + printf("Fail - Could not find %d in %s\n", c, string); + } + return; + } diff --git a/private/crt32/string/mips/strcmpm.s b/private/crt32/string/mips/strcmpm.s new file mode 100644 index 000000000..dfaaa9c39 --- /dev/null +++ b/private/crt32/string/mips/strcmpm.s @@ -0,0 +1,50 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +/* strcmp.s 1.1 */ + +/* This function is an assembly-code replacement for + the libc function "strcmp." */ +/* Libc currently has a mips-specific C version that uses 7 instructions/byte. + (It claims to use 6 cycles/byte, but is wrong!) + This function uses an unrolled loop, which uses 5 instructions per byte. + + Under some circumstances more characters are read than are + required for determining the collating order, but it + never reads beyond the end of either string. + + There are one caveat to consider: this function is written + in assembler code, and as such, cannot be merged + using the U-code loader. */ + +/* Craig Hansen - 6-June-86 */ + +#include <kxmips.h> + + .text + +LEAF_ENTRY(strcmp) + + .set noreorder + lbu t0,0(a0) +1: lbu t1,0(a1) + beq t0,0,2f + addi a0,2 + bne t0,t1,3f + lbu t2,-1(a0) # ok to load since -2(a0)!=0 + lbu t1,1(a1) + beq t2,0,2f + addi a1,2 + beq t2,t1,1b + lbu t0,0(a0) # ok to load since -1(a0) != 0 + j ra + subu v0,t2,t1 +2: j ra + subu v0,zero,t1 +3: j ra + subu v0,t0,t1 + .end strcmp diff --git a/private/crt32/string/mips/strcpym.s b/private/crt32/string/mips/strcpym.s new file mode 100644 index 000000000..4f5bc416a --- /dev/null +++ b/private/crt32/string/mips/strcpym.s @@ -0,0 +1,148 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +/* strcpy.s 1.2 */ + +/* This function is an assembly-code replacement for the libc function + * strcpy. It uses the MIPS special instructions "lwl", "lwr", "swl", + * and "swr", which handle unaligned words. + + * The standard C version of this function is a 5-instruction loop, + * working one byte at a time: + + * Copy string s2 to s1. s1 must be large enough. + * return s1 + * char *strcpy(s1, s2) + * register char *s1, *s2; + * { + * register char *os1; + * os1 = s1; + * while (*s1++ = *s2++); + * return(os1); + * } + + * A better C version is 4 cycles/byte. Loop is unrolled once. + * char * + * strcpy(s1, s2) + * register char *s1, *s2; + * { + * register char *os1 = s1; + * while (1) { + * register unsigned c; + * c = s2[0]; + * s2 += 2; + * s1[0] = c; + * if (c == 0) break; + * c = s2[1-2]; + * s1 += 2; + * s1[1-2] = c; + * if (c == 0) break; + * } + * return(os1); + * } + + * This function starts with an unrolled loop, which uses 5 + * instructions per byte (including the store bytes at the end) for + * the first few bytes. + + * After filling a word, the first word or portion of a word is saved + * using a "swl" instruction. If the start of destination string is at + * a word boundary, this leaves the result valid in the cache. Because + * this replaces up to 4 store byte instructions, we are still near 3 + * instructions per byte, but there is only one write. + + * The inner loop moves 4 bytes in 16 cycles, an average of 4 cycles + * per byte. This is 1 cycle faster than the standard C code, the + * same speed as the unrolled version, and it also leaves the result + * valid in the cache. + + * Finally, when a zero byte is found, the end of the string is stored + * using store byte instructions. This adds one instruction per byte + * for as much as three bytes, but elminates the up to four cycles of + * overhead we counted before. + + * The end result is that this function is never slower than the C + * function, is faster by up to 30% in instruction count, uses up to + * 75% fewer writes, and leaves most of the result valid in the cache. + + * There are one caveat to consider: this function is written in + * assembler code, and as such, cannot be merged using the U-code + * loader. */ + +/* Craig Hansen - 3-September-86 */ + +#include <kxmips.h> + +/* It turns out better to think of lwl/lwr and swl/swr as + smaller-vs-bigger address rather than left-vs-right. + Such a representation makes the code endian-independent. */ + +#define LWS lwr +#define LWB lwl +#define SWS swr +#define SWB swl + +.text + +LEAF_ENTRY(strcpy) +.set noreorder + // a0/ destination + // a1/ source + move v0, a0 # a copy of destination address is returned + // start up first word + // adjust pointers so that a0 points to next word + // t7 = a1 adjusted by same amount minus one + // t0,t1,t2,t3 are filled with 4 consecutive bytes + // t4 is filled with the same 4 bytes in a single word + lb t0, 0(a1) + ori t5, a0, 3 # get an early start + beq t0, 0, $doch0 + sub t6, t5, a0 # number of char in 1st word of dest - 1 + lb t1, 1(a1) + add t7, a1, t6 # offset starting point for source string + beq t1, 0, $doch1 + nop + lb t2, 2(a1) + nop + beq t2, 0, $doch2 + LWS t4, 0(a1) # safe: always in same word as 0(a1) + lb t3, 3(a1) + LWB t4, 3(a1) # fill out word + beq t3, 0, $doch3 + SWS t4, 0(a0) # store entire or part word + addi a0, t5, 1-4 # adjust destination ptr + + // inner loop +1: lb t0, 1(t7) + addi t7, 4 + beq t0, 0, $doch0 + addi a0, 4 + lb t1, 1+1-4(t7) + nop + beq t1, 0, $doch1 + nop + lb t2, 2+1-4(t7) + nop + beq t2, 0, $doch2 + LWS t4, 0+1-4(t7) + lb t3, 3+1-4(t7) + LWB t4, 3+1-4(t7) + bne t3, 0, 1b + sw t4, 0(a0) + j ra + nop + + // store four bytes using swl/swr +$doch3: j ra + SWB t4, 3(a0) + // store up to three bytes, a byte at a time. +$doch2: sb t2, 2(a0) +$doch1: sb t1, 1(a0) +$doch0: j ra + sb t0, 0(a0) + +.end strcpy diff --git a/private/crt32/string/mips/strcpyt.c b/private/crt32/string/mips/strcpyt.c new file mode 100644 index 000000000..4d0d99279 --- /dev/null +++ b/private/crt32/string/mips/strcpyt.c @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <limits.h> + +#define SRCLEN 21 /* to avoid complicating errors */ + +void main( int argc, char **argv ) +{ + int c; + unsigned char *psrc, *pdst; + unsigned char src[SRCLEN] = "ABCDEFGHIJKLMNOPQRST"; + unsigned char dst[100]; + + for (c = 'a'; c <= UCHAR_MAX; c++) { + src[9] = c; + strcpy( dst, src); + for (psrc = src, pdst = dst; *psrc; psrc++, pdst++) { + if (*psrc != *pdst) { + printf("Fail - Could not find '%c' 0x%x in %s\n", c, c, src); + break; + } + } + } +} diff --git a/private/crt32/string/mips/strlenm.s b/private/crt32/string/mips/strlenm.s new file mode 100644 index 000000000..24027e0fc --- /dev/null +++ b/private/crt32/string/mips/strlenm.s @@ -0,0 +1,19 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +/* strlen.s 1.1 */ + +#include <kxmips.h> + +LEAF_ENTRY(strlen) + subu v0,a0,1 +1: lbu v1,1(v0) + add v0,1 + bne v1,zero,1b + subu v0,v0,a0 + j ra + .end strlen diff --git a/private/crt32/string/mips/strrchrm.s b/private/crt32/string/mips/strrchrm.s new file mode 100644 index 000000000..feb1f7945 --- /dev/null +++ b/private/crt32/string/mips/strrchrm.s @@ -0,0 +1,24 @@ +/* ------------------------------------------------------------------ */ +/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */ +/* | Reserved. This software contains proprietary and confidential | */ +/* | information of MIPS and its suppliers. Use, disclosure or | */ +/* | reproduction is prohibited without the prior express written | */ +/* | consent of MIPS. | */ +/* ------------------------------------------------------------------ */ +#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/rindex.s,v 1.3 1992/03/07 15:37:36 jleong Exp $" + +/* + * Copyright 1985 by MIPS Computer Systems, Inc. + */ + +#include "kxmips.h" + +LEAF_ENTRY(strrchr) + move v0,zero +1: lbu a3,0(a0) + addu a0,1 + bne a3,a1,2f + subu v0,a0,1 +2: bne a3,zero,1b + j ra +.end strrchr diff --git a/private/crt32/string/mips/strrchrt.c b/private/crt32/string/mips/strrchrt.c new file mode 100644 index 000000000..f608c2fc6 --- /dev/null +++ b/private/crt32/string/mips/strrchrt.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <string.h> +#include <limits.h> + +void main( int argc, char **argv ) + { + int c; + unsigned char *pstr; + unsigned char string[100]; + + strcpy(string, "ABCDEFGHIJKLMNOPQRST"); + for (c = 'a'; c <= UCHAR_MAX; c++) + { + string[9] = c; + pstr = strrchr( string, c); + if (!pstr) + printf("Fail - Could not find %d in %s\n", c, string); + } + return; + } diff --git a/private/crt32/string/mips/wcscmpm.s b/private/crt32/string/mips/wcscmpm.s new file mode 100644 index 000000000..d3997945d --- /dev/null +++ b/private/crt32/string/mips/wcscmpm.s @@ -0,0 +1,67 @@ +/******************************************************************************* + * wcscmpm.s - contains wcscmp() + * + * ------------------------------------------------------------------ + * | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | + * | Reserved. This software contains proprietary and confidential | + * | information of MIPS and its suppliers. Use, disclosure or | + * | reproduction is prohibited without the prior express written | + * | consent of MIPS. | + * ------------------------------------------------------------------ + * strcmp.s 1.1 + * + * Purpose: + * wcscmp() compares two wide-character strings and returns an integer + * to indicate whether the first is less than the second, the two are + * equal, or whether the first is greater than the second. + * + * Comparison is done wchar_t by wchar_t on an UNSIGNED basis, which is to + * say that Null wchar_t(0) is less than any other character. + * + * This function is a MIPS assembly-code replacement for the C version. + * + * Entry: + * + * const wchar_t * src - string for left-hand side of comparison + * const wchar_t * dst - string for right-hand side of comparison + * + *Exit: + * returns -1 if src < dst + * returns 0 if src == dst + * returns +1 if src > dst + * + *Exceptions: + * + *Revision History: + * Craig Hansen (MIPS) 06-June-86 Created. + * Roger Lanser (MS) 02-April-94 Cloned for Wide Characters (16-bits). + * + ******************************************************************************/ + +#include <kxmips.h> + + .text + +LEAF_ENTRY(wcscmp) + + lhu t0,0(a0) +1: lhu t1,0(a1) + addi a0,4 + beq t0,0,2f + lhu t2,-2(a0) # ok to load since -4(a0)!=0 + bne t0,t1,2f + lhu t1,2(a1) + addi a1,4 + beq t2,0,2f + lhu t0,0(a0) # ok to load since -2(a0) != 0 + beq t2,t1,1b + move v0,zero + j ra // source1 == source2, return 0 +2: + sltu v0,t1,t0 // compare source1 to source2 + beq v0,zero,3f + j ra // source1 > source2, return 1 +3: + li v0,-1 + j ra // source1 < source2, return 1 + .end wcscmp diff --git a/private/crt32/string/mips/wcscmpt.c b/private/crt32/string/mips/wcscmpt.c new file mode 100644 index 000000000..629f61ca5 --- /dev/null +++ b/private/crt32/string/mips/wcscmpt.c @@ -0,0 +1,62 @@ +char buffer[100]; +#include <stdio.h> +#include <memory.h> + +#define NTUL 7 + +void main() +{ + int i, k; + int rc; + + unsigned long source1[4] = { + 0x30003000, + 0x30003000, + 0x30003000, + 0x36003000 + }; + + unsigned long source2[4] = { + 0x30003000, + 0x30003000, + 0x30003000, + 0x00000000 + }; + + unsigned long tul[NTUL] = { + 0x35004600, + 0x37004600, + 0x36002f00, + 0x37002f00, + 0x30004600, + 0x30003000, + 0x36003000 + }; + + + for (k = 0; k < NTUL; k++) { + unsigned short *s1 = (unsigned short *)source1; + unsigned short *s2 = (unsigned short *)source2; + + source2[3] = tul[k]; + + printf("source1 = "); + for (i = 0; i < 4*sizeof(unsigned long); i++) + printf("%2.2x ", ((char *)source1)[i]); + printf("\n"); + + printf("source2 = "); + for (i = 0; i < 4*sizeof(unsigned long); i++) + printf("%2.2x ", ((char *)source2)[i]); + + rc = wcscmp(source1,source2); + if (rc < 0) { + printf(" source1 < source2\n"); + } else if (rc > 0) { + printf(" source1 > source2\n"); + } else { + printf(" source1 == source2\n"); + } + printf("Return Code = %d\n",rc); + } +} diff --git a/private/crt32/string/mips/wcscpym.s b/private/crt32/string/mips/wcscpym.s new file mode 100644 index 000000000..41b3544ec --- /dev/null +++ b/private/crt32/string/mips/wcscpym.s @@ -0,0 +1,139 @@ +/******************************************************************************* + * wcscpym.s - contains wcscpy() + * + * Copyright (c) 1994, Microsoft Corporation. All rights reserved. + * + * Purpose: + * wcscpy() copies one wchar_t string into another. + * + * wcscpy() copies the source string to the destination string + * assuming no overlap and enough room in the destination. The + * destination string is returned. Strings are wide-character + * strings. + * + * This function is a MIPS assembly-code replacement for the C version. + * The only thing that this code tries to do is to produce a loop that + * uses a lw/sw pair versus running a lhu/sh loop twice. A small + * penality will be paid for very short wide-character strings due + * to the setup tests. + * + * Entry: + * + * wchar_t *wcscpy(dst, src) + * wchar_t * dst - wchar_t string over which "src" is to be copied + * const wchar_t * src - wchar_t string to be copied over "dst" + * + *Exit: + * The address of "dst". + * + *Exceptions: + * + *Revision History: + * 02-08-97 RDL Created initial version. + * + ******************************************************************************/ + +#include <kxmips.h> + +.text + +LEAF_ENTRY(wcscat) + + .set noreorder + + // a0 destination + // a1 source + + move v0, a0 // a copy of destination address is returned +1: lhu t2,0(a0) + bnel zero,t2,1b + addiu a0,a0,2 + b 2f + nop + +ALTERNATE_ENTRY(wcscpy) + + // a0 destination + // a1 source + + move v0, a0 // a copy of destination address is returned + +2: andi t1,a1,2 // assume at least halfword alignment +3: andi t0,a0,2 // assume at least halfword alignment +5: bne t0,t1,30f + nop + +10: // buffers start on same alignment + beq zero,t0,20f + nop + // halfword alignment + lhu t1,0(a1) + addiu a0,2 + addiu a1,2 + beq zero,t1,99f + sh t1,-2(a0) + +20: // word alignment + lw t0,0(a1) + addiu a0,4 + addiu a1,4 + andi t1,t0,0xffff + beq zero,t1,92f + srl t2,t0,16 + bne zero,t2,20b + sw t0,-4(a0) + j ra + nop + +30: // buffers start on different alignment + beq zero,t1,40f + nop + // destination on word boundary, source on halfword boundary + lhu t0,0(a1) + addiu a1,2 +35: beq zero,t0,92f + addiu a0,4 + lw t1,0(a1) + addiu a1,4 + srl t2,t1,16 + andi t1,0xffff + sll t3,t1,16 + or t0,t0,t3 + sw t0,-4(a0) + bne zero,t1,35b + or t0,zero,t2 + j ra + nop + +40: // destination on halfword boundary, source on word boundary + lw t3,0(a1) + addiu a0,2 + addiu a1,4 + srl t2,t3,16 + andi t0,t3,0xffff + beq zero,t0,99f + sh t0,-2(a0) +45: lw t3,0(a1) + addiu a0,4 + addiu a1,4 + srl t1,t3,16 + sll t3,t3,16 + beq zero,t3,94f + or t0,t2,t3 + sw t0,-4(a0) + bne zero,t1,45b + or t2,t1,zero + j ra + sh t1,0(a0) + +92: j ra + sh t0,-4(a0) + +94: j ra + sw t0,-4(a0) + +99: j ra + nop + .set reorder + + .end wcscat diff --git a/private/crt32/string/mips/wcslenm.s b/private/crt32/string/mips/wcslenm.s new file mode 100644 index 000000000..ef45c114e --- /dev/null +++ b/private/crt32/string/mips/wcslenm.s @@ -0,0 +1,45 @@ +/******************************************************************************* + * wcslenm.s - contains wcslen() + * + * ------------------------------------------------------------------ + * | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | + * | Reserved. This software contains proprietary and confidential | + * | information of MIPS and its suppliers. Use, disclosure or | + * | reproduction is prohibited without the prior express written | + * | consent of MIPS. | + * ------------------------------------------------------------------ + * strlen.s 1.1 + * + * Purpose: + * Finds the length in wchar_t's of the given string, not including + * the final null wchar_t (wide-characters). + * + * This function is a MIPS assembly-code replacement for the C version. + * + * Entry: + * + * wchar_t *wcslen(wcs) + * wchar_t * wcs - wchar_t string + * + *Exit: + * The "length" of wcs in wchar_t's. + * + *Exceptions: + * + *Revision History: + * Craig Hansen (MIPS) 06-June-86 Created. + * Roger Lanser (MS) 02-April-94 Cloned for Wide Characters (16-bits). + * + ******************************************************************************/ + +#include <kxmips.h> + +LEAF_ENTRY(wcslen) + subu v0,a0,2 +1: lhu v1,2(v0) + addiu v0,v0,2 + bne v1,zero,1b + subu v0,v0,a0 + srl v0,v0,1 + j ra + .end wcslen |