summaryrefslogtreecommitdiffstats
path: root/private/crt32/string/mips
diff options
context:
space:
mode:
authorAdam <you@example.com>2020-05-17 05:51:50 +0200
committerAdam <you@example.com>2020-05-17 05:51:50 +0200
commite611b132f9b8abe35b362e5870b74bce94a1e58e (patch)
treea5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/crt32/string/mips
downloadNT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip
Diffstat (limited to 'private/crt32/string/mips')
-rw-r--r--private/crt32/string/mips/memcmpm.s125
-rw-r--r--private/crt32/string/mips/memcmpt.c334
-rw-r--r--private/crt32/string/mips/memcpym.s298
-rw-r--r--private/crt32/string/mips/memorym.s1218
-rw-r--r--private/crt32/string/mips/memsetm.s105
-rw-r--r--private/crt32/string/mips/memsett.c20
-rw-r--r--private/crt32/string/mips/strcatm.s98
-rw-r--r--private/crt32/string/mips/strchrm.s26
-rw-r--r--private/crt32/string/mips/strchrt.c20
-rw-r--r--private/crt32/string/mips/strcmpm.s50
-rw-r--r--private/crt32/string/mips/strcpym.s148
-rw-r--r--private/crt32/string/mips/strcpyt.c23
-rw-r--r--private/crt32/string/mips/strlenm.s19
-rw-r--r--private/crt32/string/mips/strrchrm.s24
-rw-r--r--private/crt32/string/mips/strrchrt.c20
-rw-r--r--private/crt32/string/mips/wcscmpm.s67
-rw-r--r--private/crt32/string/mips/wcscmpt.c62
-rw-r--r--private/crt32/string/mips/wcscpym.s139
-rw-r--r--private/crt32/string/mips/wcslenm.s45
19 files changed, 2841 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memcmpm.s b/private/crt32/string/mips/memcmpm.s
new file mode 100644
index 000000000..961939432
--- /dev/null
+++ b/private/crt32/string/mips/memcmpm.s
@@ -0,0 +1,125 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+#ident "$Header"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+/* bcmp(s1, s2, n) */
+
+#include "kxmips.h"
+
+/*
+ * bcmp(src, dst, bcount)
+ *
+ * MINCMP is minimum number of byte that its worthwhile to try and
+ * align cmp into word transactions
+ *
+ * Calculating MINCMP
+ * Overhead =~ 15 instructions => 90 cycles
+ * Byte cmp =~ 38 cycles/word
+ * Word cmp =~ 17 cycles/word
+ * Breakeven =~ 16 bytes
+ */
+#define MINCMP 16
+#define NBPW 4
+
+LEAF_ENTRY(memcmp)
+ xor v0,a0,a1
+ blt a2,MINCMP,bytecmp # too short, just byte cmp
+ and v0,NBPW-1
+ subu t8,zero,a0 # number of bytes til aligned
+ bne v0,zero,unalgncmp # src and dst not alignable
+/*
+ * src and dst can be simultaneously word aligned
+ */
+ and t8,NBPW-1
+ subu a2,t8
+ beq t8,zero,wordcmp # already aligned
+ move v0,v1 # lw[lr] don't clear target reg
+ lwr v0,0(a0)
+ lwr v1,0(a1)
+ addu a0,t8
+ addu a1,t8
+ bne v0,v1,cmpne
+
+/*
+ * word cmp loop
+ */
+wordcmp:
+ and a3,a2,~(NBPW-1)
+ subu a2,a3
+ beq a3,zero,bytecmp
+ addu a3,a0 # src1 endpoint
+1: lw v0,0(a0)
+ lw v1,0(a1)
+ addu a0,NBPW # 1st BDSLOT
+ addu a1,NBPW # 2nd BDSLOT (asm doesn't move)
+ bne v0,v1,cmpne
+ bne a0,a3,1b # at least one more word
+ b bytecmp
+
+/*
+ * deal with simultaneously unalignable cmp by aligning one src
+ */
+unalgncmp:
+ subu a3,zero,a1 # calc byte cnt to get src2 aligned
+ and a3,NBPW-1
+ subu a2,a3
+ beq a3,zero,partaligncmp # already aligned
+ addu a3,a0 # src1 endpoint
+1: lbu v0,0(a0)
+ lbu v1,0(a1)
+ addu a0,1
+ addu a1,1
+ bne v0,v1,cmpne
+ bne a0,a3,1b
+
+/*
+ * src unaligned, dst aligned loop
+ */
+partaligncmp:
+ and a3,a2,~(NBPW-1)
+ subu a2,a3
+ beq a3,zero,bytecmp
+ addu a3,a0
+1:
+ lwr v0,0(a0)
+ lwl v0,3(a0)
+ lw v1,0(a1)
+ addu a0,NBPW
+ addu a1,NBPW
+ bne v0,v1,cmpne
+ bne a0,a3,1b
+
+/*
+ * brute force byte cmp loop
+ */
+bytecmp:
+ addu a3,a2,a0 # src1 endpoint; BDSLOT
+ ble a2,zero,cmpdone
+1: lbu v0,0(a0)
+ lbu v1,0(a1)
+ addu a0,1
+ addu a1,1
+ bne v0,v1,cmpne
+ bne a0,a3,1b
+cmpdone:
+ move v0,zero
+ j ra
+
+cmpne:
+ sltu a2,v1,v0
+ bne a2,zero,9f
+ li v0,-1
+ j ra
+9:
+ li v0,1
+ j ra
+.end bcmp
diff --git a/private/crt32/string/mips/memcmpt.c b/private/crt32/string/mips/memcmpt.c
new file mode 100644
index 000000000..3adb427b9
--- /dev/null
+++ b/private/crt32/string/mips/memcmpt.c
@@ -0,0 +1,334 @@
+/*
+ * Test memcpy() function.
+ */
+
+char buffer[100];
+#include <stdio.h>
+#include <memory.h>
+
+#define FALSE 0
+#define TRUE 1
+
+#define NTUL 7
+#define TEST16 4
+#define TEST32 8
+
+#define BUFSIZE 256
+
+void printbuf(char *identifier, char *buf, int length)
+{
+ int i;
+ printf("%s = '", identifier);
+ for (i = 0; i < length; i++)
+ printf("%c", buf[i]);
+ printf("'\n");
+}
+
+void main()
+{
+ int i, j, n, k, l;
+ int rc;
+ char *s1, *s2;
+
+ char TavEqFailed = FALSE;
+ char TvaEqFailed = FALSE;
+ char TavltFailed = FALSE;
+ char TvaltFailed = FALSE;
+ char TavgtFailed = FALSE;
+ char TvagtFailed = FALSE;
+
+ char TvveqFailed = FALSE;
+ char TvvltFailed = FALSE;
+ char TvvgtFailed = FALSE;
+
+ int Tmisc = 0;
+
+ unsigned long source1_16[TEST16] = {
+ 0x00003000,
+ 0x30003000,
+ 0x30003000,
+ 0x36003000
+ };
+
+ unsigned long source2_16[TEST16] = {
+ 0x00003000,
+ 0x30003000,
+ 0x30003000,
+ 0x00000000
+ };
+
+ unsigned long tul[NTUL] = {
+ 0x35004600,
+ 0x37004600,
+ 0x36002f00,
+ 0x37002f00,
+ 0x30004600,
+ 0x30003000,
+ 0x36003000
+ };
+ int tul_test[NTUL] = {
+ -1,
+ -1,
+ +1,
+ +1,
+ -1,
+ +1,
+ 0
+ };
+
+ struct {
+ double dummy;
+ char source1[BUFSIZE];
+ char source2[BUFSIZE];
+ } buffer;
+
+ char source32[32] = "0X0042036C 002477CD BREAK 0x91DF";
+ char source[BUFSIZE];
+
+ for (j = 0; j < BUFSIZE; ) {
+ for (i = 0; i <= j % 32; i++, j++) {
+ buffer.source1[j] = source32[i];
+ buffer.source2[j] = source32[i];
+ }
+ }
+
+ j = BUFSIZE;
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ while (j--) {
+ if (*s1++ != *s2++) {
+ printf("\n\nbuffer.source1 != buffer.source2, exiting test!!!\n");
+ exit(-1);
+ }
+ }
+
+ if (memcmp(buffer.source1, buffer.source2, BUFSIZE) != 0) {
+ printf("\n\tbuffer.source1 != buffer.source2, exiting test!!!\n");
+ exit(-1);
+ }
+
+ /* Test for zero length */
+ for (i = 0; i < BUFSIZE; i++ ) {
+ int l;
+
+ s1 = &(buffer.source1[i]);
+ s2 = &(buffer.source2[i]);
+ l = 0;
+ rc = memcmp(s1, s2, l);
+ if (rc) {
+ printf("%s, line #%d: Zero length test failed!!!\n", __FILE__, __LINE__);
+ break;
+ }
+ }
+
+
+ for (k = BUFSIZE; k > 0; k-- ) {
+ for (n = 0; n < k; n++) {
+ char c;
+ int l;
+ int m;
+
+ /* Test with aligned start and variable end */
+ if (!TavEqFailed) {
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ l = k;
+ rc = memcmp(s1, s2, l);
+ if (rc != 0) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte aligned block equal test failed!!!\n", __FILE__, __LINE__, k);
+ TavEqFailed = TRUE;
+ }
+ }
+
+ /* Test with variable start and aligned end */
+ if (!TvaEqFailed) {
+ s1 = &(buffer.source1[n]);
+ s2 = &(buffer.source2[n]);
+ l = k - n;
+ rc = memcmp(s1, s2, l);
+ if (rc != 0) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte unaligned block equal test failed!!!\n", __FILE__, __LINE__, k);
+ TvaEqFailed = TRUE;
+ }
+ }
+
+ /* Test with aligned start and variable end */
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ l = k - n;
+ for (m = 0; m < l && !TavltFailed; m++) {
+ c = s1[m];
+ s1[m] -= 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != -1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte aligned block less than test failed!!!\n", __FILE__, __LINE__, k);
+ TavltFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+
+ /* Test with variable start and aligned end */
+ s1 = &(buffer.source1[n]);
+ s2 = &(buffer.source2[n]);
+ l = k - n;
+ for (m = 0; m < l && !TvaltFailed; m++) {
+ c = s1[m];
+ s1[m] -= 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != -1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte unaligned block less than test failed!!!\n", __FILE__, __LINE__, k);
+ TvaltFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+
+ /* Test with aligned start and variable end */
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ l = k - n;
+ for (m = 0; m < l && !TavgtFailed; m++) {
+ c = s1[m];
+ s1[m] += 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != 1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte aligned block greater than test failed!!!\n", __FILE__, __LINE__, k);
+ TavgtFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+
+ /* Test with variable start and aligned end */
+ s1 = &(buffer.source1[n]);
+ s2 = &(buffer.source2[n]);
+ l = k - n;
+ for (m = 0; m < l && !TvagtFailed; m++) {
+ c = s1[m];
+ s1[m] += 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != 1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte unaligned block greater than test failed!!!\n", __FILE__, __LINE__, k);
+ TvagtFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+ }
+ }
+
+ for (k = BUFSIZE; k > 0; k-- ) {
+ for (n = 0; n < k/2; n++) {
+ char c;
+ int m;
+
+ /* Test equal with variable start and end */
+ if (!TvveqFailed) {
+ l = k - 2*n;
+ s1 = &(buffer.source1[n]);
+ s2 = &(buffer.source2[n]);
+ rc = memcmp(s1, s2, l);
+ if (rc != 0) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte variable block equal test failed!!!\n", __FILE__, __LINE__, l);
+ TvveqFailed = TRUE;
+ }
+ }
+
+ /* Test less than with variable start and end */
+ l = k - 2*n;
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ for (m = 0; m < l && !TvvltFailed; m++) {
+ c = s1[m];
+ s1[m] -= 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != -1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte variable block less than test failed!!!\n", __FILE__, __LINE__, l);
+ TvvltFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+
+ /* Test greater than with variable start and end */
+ l = k - 2*n;
+ s1 = buffer.source1;
+ s2 = buffer.source2;
+ for (m = 0; m < l && !TvvgtFailed; m++) {
+ c = s1[m];
+ s1[m] += 1;
+ rc = memcmp(s1, s2, l);
+ if (rc != 1) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: %d byte variable block greater than test failed!!!\n", __FILE__, __LINE__, l);
+ TvvgtFailed = TRUE;
+ }
+ s1[m] = c;
+ }
+ }
+ }
+
+
+ /* Misc test1 */
+ for (k = 0; k < NTUL; k++) {
+
+ source2_16[3] = tul[k];
+
+ rc = memcmp(source1_16,source2_16,TEST16*sizeof(unsigned long));
+ if (rc != tul_test[k]) {
+
+ printf("source1_16 = ");
+ for (i = 0; i < TEST16*sizeof(unsigned long); i++)
+ printf("%2.2x ", ((char *)source1_16)[i]);
+ printf("\n");
+
+ printf("source2_16 = ");
+ for (i = 0; i < TEST16*sizeof(unsigned long); i++)
+ printf("%2.2x ", ((char *)source2_16)[i]);
+ printf("%s, line #%d: Misc Test #1, case #%d of %d failed!!!\n", __FILE__, __LINE__, k+1, NTUL);
+ printf("Return Code = %d, Should be = %d\n",rc,tul_test[k]);
+ Tmisc++;
+ }
+ }
+
+
+ /* Misc test2 */
+ l = 32;
+ buffer.source2[0] = '"';
+ for (i = 0; i < l; i++) {
+ buffer.source1[i] = source32[i];
+ buffer.source2[i+1] = source32[i];
+ }
+ buffer.source2[l+1] = '"';
+ s1 = &(buffer.source1[0]);
+ s2 = &(buffer.source2[1]);
+ if (0 != memcmp(s1, s2, l)) {
+ printbuf("source1", s1, l);
+ printbuf("source2", s2, l);
+ printf("%s, line #%d: Misc Test #2 failed!!!\n", __FILE__, __LINE__);
+ Tmisc++;
+ }
+
+
+ rc = TavEqFailed + TvaEqFailed + TavltFailed + TvaltFailed + TavgtFailed + TvagtFailed + TvveqFailed + TvvltFailed + TvvgtFailed + Tmisc;
+ if (rc) {
+ printf("\n\tMEMCMP failed %d tests!!!\n", rc);
+ exit(rc);
+ } else {
+ printf("\n\tMEMCMP passed all tests!!!\n");
+ exit(0);
+ }
+}
diff --git a/private/crt32/string/mips/memcpym.s b/private/crt32/string/mips/memcpym.s
new file mode 100644
index 000000000..ca0f8fe78
--- /dev/null
+++ b/private/crt32/string/mips/memcpym.s
@@ -0,0 +1,298 @@
+/*
+ * Fast bcopy code which supports overlapped copies.
+ * Not fully optimized yet.
+ *
+ * Written by: Kipp Hickman
+ *
+ * $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $
+ * $Revision: 1.7 $
+ * $Date: 1993/11/20 19:23:11 $
+ */
+
+#include <kxmips.h>
+
+/*
+ * char *bcopy(from, to, count);
+ * unsigned char *from, *to;
+ * unsigned long count;
+ *
+ * OR
+ *
+ * void *memcpy/memmove(to, from, count);
+ * void *to, *from;
+ * unsigned long count;
+ *
+ * Both functions return "to"
+ */
+
+#define MINCOPY 16
+
+/* registers used */
+
+#define to a0
+#define from a1
+#define count a2
+
+LEAF_ENTRY(memcpy)
+ALTERNATE_ENTRY(memmove)
+ move a3,to # Save to in a3
+ beq count,zero,ret # Test for zero count
+ beq from,to,ret # Test for from == to
+
+ /* use backwards copying code if the from and to regions overlap */
+ blt to,from,goforwards # If to < from then use forwards copy
+ add v0,from,count # v0 := from + count
+ bge to,v0,goforwards # If to >= from + count; no overlap
+ b gobackwards # Oh well, go backwards
+
+/*****************************************************************************/
+
+/*
+ * Forward copy code. Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+goforwards:
+ /* small byte counts use byte at a time copy */
+ blt count,MINCOPY,forwards_bytecopy
+ and v0,from,3 # v0 := from & 3
+ and v1,to,3 # v1 := to & 3
+ beq v0,v1,forwalignable # low bits are identical
+/*
+ * Byte at a time copy code. This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+forwards_bytecopy:
+ beq count,zero,ret # If count is zero, then we are done
+ addu v1,from,count # v1 := from + count
+
+99: lb v0,0(from) # v0 = *from
+ addu from,1 # advance pointer
+ sb v0,0(to) # Store byte
+ addu to,1 # advance pointer
+ bne from,v1,99b # Loop until done
+ret: move v0,a3 # Set v0 to old "to" pointer
+ j ra # return to caller
+
+/*
+ * Pointers are alignable, and may be aligned. Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned. Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+forwalignable:
+ beq v0,zero,forwards # If v0==v1 && v0==0 then aligned
+ beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned
+ beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+ lb v0,0(from) # get one byte
+ addu from,1 # advance pointer
+ sb v0,0(to) # store one byte
+ addu to,1 # advance pointer
+ subu count,1 # and reduce count
+ b forwards # Now pointers are aligned
+
+/* need to copy 2 bytes */
+forw_copy2:
+ lh v0,0(from) # get one short
+ addu from,2 # advance pointer
+ sh v0,0(to) # store one short
+ addu to,2 # advance pointer
+ subu count,2 # and reduce count
+ b forwards
+
+/* need to copy 3 bytes */
+forw_copy3:
+ lb v0,0(from) # get one byte
+ lh v1,1(from) # and one short
+ addu from,3 # advance pointer
+ sb v0,0(to) # store one byte
+ sh v1,1(to) # and one short
+ addu to,3 # advance pointer
+ subu count,3 # and reduce count
+ /* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+forwards:
+
+/* 32 byte at a time loop */
+forwards_32:
+ blt count,32,forwards_16 # do 16 bytes at a time
+ lw v0,0(from)
+ lw v1,4(from)
+ lw t0,8(from)
+ lw t1,12(from)
+ lw t2,16(from)
+ lw t3,20(from)
+ lw t4,24(from)
+ lw t5,28(from) # Fetch 8*4 bytes
+ addu from,32 # advance from pointer now
+ sw v0,0(to)
+ sw v1,4(to)
+ sw t0,8(to)
+ sw t1,12(to)
+ sw t2,16(to)
+ sw t3,20(to)
+ sw t4,24(to)
+ sw t5,28(to) # Store 8*4 bytes
+ addu to,32 # advance to pointer now
+ subu count,32 # Reduce count
+ b forwards_32 # Try some more
+
+/* 16 byte at a time loop */
+forwards_16:
+ blt count,16,forwards_4 # Do rest in words
+ lw v0,0(from)
+ lw v1,4(from)
+ lw t0,8(from)
+ lw t1,12(from)
+ addu from,16 # advance from pointer now
+ sw v0,0(to)
+ sw v1,4(to)
+ sw t0,8(to)
+ sw t1,12(to)
+ addu to,16 # advance to pointer now
+ subu count,16 # Reduce count
+ b forwards_16 # Try some more
+
+/* 4 bytes at a time loop */
+forwards_4:
+ blt count,4,forwards_bytecopy # Do rest
+ lw v0,0(from)
+ addu from,4 # advance pointer
+ sw v0,0(to)
+ addu to,4 # advance pointer
+ subu count,4
+ b forwards_4
+
+/*****************************************************************************/
+
+/*
+ * Backward copy code. Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+gobackwards:
+ add from,count # Advance to end + 1
+ add to,count # Advance to end + 1
+
+ /* small byte counts use byte at a time copy */
+ blt count,MINCOPY,backwards_bytecopy
+ and v0,from,3 # v0 := from & 3
+ and v1,to,3 # v1 := to & 3
+ beq v0,v1,backalignable # low bits are identical
+/*
+ * Byte at a time copy code. This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+backwards_bytecopy:
+ beq count,zero,ret # If count is zero quit
+ subu from,1 # Reduce by one (point at byte)
+ subu to,1 # Reduce by one (point at byte)
+ subu v1,from,count # v1 := original from - 1
+
+99: lb v0,0(from) # v0 = *from
+ subu from,1 # backup pointer
+ sb v0,0(to) # Store byte
+ subu to,1 # backup pointer
+ bne from,v1,99b # Loop until done
+ move v0,a3 # Set v0 to old "to" pointer
+ j ra # return to caller
+
+/*
+ * Pointers are alignable, and may be aligned. Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned. Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+backalignable:
+ beq v0,zero,backwards # If v0==v1 && v0==0 then aligned
+ beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned
+ beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+ lb v0,-1(from) # get one byte
+ subu from,1 # backup pointer
+ sb v0,-1(to) # store one byte
+ subu to,1 # backup pointer
+ subu count,1 # and reduce count
+ b backwards # Now pointers are aligned
+
+/* need to copy 2 bytes */
+back_copy2:
+ lh v0,-2(from) # get one short
+ subu from,2 # backup pointer
+ sh v0,-2(to) # store one short
+ subu to,2 # backup pointer
+ subu count,2 # and reduce count
+ b backwards
+
+/* need to copy 3 bytes */
+back_copy3:
+ lb v0,-1(from) # get one byte
+ lh v1,-3(from) # and one short
+ subu from,3 # backup pointer
+ sb v0,-1(to) # store one byte
+ sh v1,-3(to) # and one short
+ subu to,3 # backup pointer
+ subu count,3 # and reduce count
+ /* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+backwards:
+
+/* 32 byte at a time loop */
+backwards_32:
+ blt count,32,backwards_16 # do 16 bytes at a time
+ lw v0,-4(from)
+ lw v1,-8(from)
+ lw t0,-12(from)
+ lw t1,-16(from)
+ lw t2,-20(from)
+ lw t3,-24(from)
+ lw t4,-28(from)
+ lw t5,-32(from) # Fetch 8*4 bytes
+ subu from,32 # backup from pointer now
+ sw v0,-4(to)
+ sw v1,-8(to)
+ sw t0,-12(to)
+ sw t1,-16(to)
+ sw t2,-20(to)
+ sw t3,-24(to)
+ sw t4,-28(to)
+ sw t5,-32(to) # Store 8*4 bytes
+ subu to,32 # backup to pointer now
+ subu count,32 # Reduce count
+ b backwards_32 # Try some more
+
+/* 16 byte at a time loop */
+backwards_16:
+ blt count,16,backwards_4 # Do rest in words
+ lw v0,-4(from)
+ lw v1,-8(from)
+ lw t0,-12(from)
+ lw t1,-16(from)
+ subu from,16 # backup from pointer now
+ sw v0,-4(to)
+ sw v1,-8(to)
+ sw t0,-12(to)
+ sw t1,-16(to)
+ subu to,16 # backup to pointer now
+ subu count,16 # Reduce count
+ b backwards_16 # Try some more
+
+/* 4 byte at a time loop */
+backwards_4:
+ blt count,4,backwards_bytecopy # Do rest
+ lw v0,-4(from)
+ subu from,4 # backup from pointer
+ sw v0,-4(to)
+ subu to,4 # backup to pointer
+ subu count,4 # Reduce count
+ b backwards_4
+.end memcpy
diff --git a/private/crt32/string/mips/memorym.s b/private/crt32/string/mips/memorym.s
new file mode 100644
index 000000000..6f98423dc
--- /dev/null
+++ b/private/crt32/string/mips/memorym.s
@@ -0,0 +1,1218 @@
+// TITLE("Compare, Move, and Fill Memory Support")
+//++
+//
+// Copyright (c) 1990 Microsoft Corporation
+//
+// Module Name:
+//
+// memory.s
+//
+// Abstract:
+//
+// This module implements functions to compare, move, zero, and fill
+// blocks of memory. If the memory is aligned, then these functions
+// are very efficient.
+//
+// N.B. These routines MUST preserve all floating state since they are
+// frequently called from interrupt service routines that normally
+// do not save or restore floating state.
+//
+// Author:
+//
+// David N. Cutler (davec) 11-Apr-1990
+//
+// Environment:
+//
+// User or Kernel mode.
+//
+// Revision History:
+// 02/02/94 RDL This is a cloned version of ntos\rtl\mips\xxmvmem.s
+// Used RtlMoveMemory and RtlFillMemory.
+// 02/15/94 RDL Used RtlCompareMemory, changed return code for memcmp.
+// 02/22/94 RDL Fixed memcmp, zero length and equal aligned 32-byte
+// buffers return wrong code.
+//
+//--
+
+#include "ksmips.h"
+ SBTTL("Compare Memory")
+
+//++
+//
+// ULONG
+// RtlCompareMemory (
+// IN PVOID Source1,
+// IN PVOID Source2,
+// IN ULONG Length
+// )
+//
+// Routine Description:
+//
+// This function compares two blocks of memory and returns the number
+// of bytes that compared equal.
+//
+// Arguments:
+//
+// Source1 (a0) - Supplies a pointer to the first block of memory to
+// compare.
+//
+// Source2 (a1) - Supplies a pointer to the second block of memory to
+// compare.
+//
+// Length (a2) - Supplies the length, in bytes, of the memory to be
+// compared.
+//
+// Return Value:
+//
+// zero if source1 == source2
+// -1 if source1 < source2
+// 1 if source1 > source2
+// value. If all bytes compared equal, then the length of the orginal
+// block of memory is returned.
+//
+//--
+
+ LEAF_ENTRY(memcmp)
+
+ addu a3,a0,a2 // compute ending address of source1
+ move v0,a2 // save length of comparison
+ and t0,a2,32 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ addu t4,a0,t1 // compute ending block address
+ beq zero,t1,100f // if eq, no 32-byte block to compare
+ or t0,a0,a1 // merge and isolate alignment bits
+ and t0,t0,0x3 //
+ bne zero,t0,CompareUnaligned // if ne, unalignment comparison
+
+//
+// Compare memory aligned.
+//
+
+CompareAligned: //
+
+ .set noreorder
+10: lw t0,0(a0) // compare 32-byte block
+ lw t1,0(a1) //
+ lw t2,4(a0) //
+ bne t0,t1,90f // if ne, first word not equal
+ lw t3,4(a1) //
+ lw t0,8(a0) //
+ bne t2,t3,20f // if ne, second word not equal
+ lw t1,8(a1) //
+ lw t2,12(a0) //
+ bne t0,t1,30f // if ne, third word not equal
+ lw t3,12(a1) //
+ lw t0,16(a0) //
+ bne t2,t3,40f // if ne, fourth word not equal
+ lw t1,16(a1) //
+ lw t2,20(a0) //
+ bne t0,t1,50f // if ne, fifth word not equal
+ lw t3,20(a1) //
+ lw t0,24(a0) //
+ bne t2,t3,60f // if ne, sixth word not equal
+ lw t1,24(a1) //
+ lw t2,28(a0) //
+ bne t0,t1,70f // if ne, seventh word not equal
+ lw t3,28(a1) //
+ addu a0,a0,32 // advance source1 to next block
+ bne t2,t3,80f // if ne, eighth word not equal
+ nop //
+ bne a0,t4,10b // if ne, more 32-byte blocks to compare
+ addu a1,a1,32 // update source2 address
+ .set reorder
+
+ subu a2,a3,a0 // compute remaining bytes
+ b 100f //
+
+//
+// Compare memory unaligned.
+//
+
+CompareUnaligned: //
+ and t0,a0,0x3 // isolate source1 alignment
+ bne zero,t0,CompareUnalignedS1 // if ne, source1 unaligned
+
+//
+// Source1 is aligned and Source2 is unaligned.
+//
+
+CompareUnalignedS2: //
+
+ .set noreorder
+10: lw t0,0(a0) // compare 32-byte block
+ lwr t1,0(a1) //
+ lwl t1,3(a1) //
+ lw t2,4(a0) //
+ bne t0,t1,90f // if ne, first word not equal
+ lwr t3,4(a1) //
+ lwl t3,7(a1) //
+ lw t0,8(a0) //
+ bne t2,t3,20f // if ne, second word not equal
+ lwr t1,8(a1) //
+ lwl t1,11(a1) //
+ lw t2,12(a0) //
+ bne t0,t1,30f // if ne, third word not equal
+ lwr t3,12(a1) //
+ lwl t3,15(a1) //
+ lw t0,16(a0) //
+ bne t2,t3,40f // if ne, fourth word not equal
+ lwr t1,16(a1) //
+ lwl t1,19(a1) //
+ lw t2,20(a0) //
+ bne t0,t1,50f // if ne, fifth word not equal
+ lwr t3,20(a1) //
+ lwl t3,23(a1) //
+ lw t0,24(a0) //
+ bne t2,t3,60f // if ne, sixth word not equal
+ lwr t1,24(a1) //
+ lwl t1,27(a1) //
+ lw t2,28(a0) //
+ bne t0,t1,70f // if ne, seventh word not equal
+ lwr t3,28(a1) //
+ lwl t3,31(a1) //
+ addu a0,a0,32 // advance source1 to next block
+ bne t2,t3,80f // if ne, eighth word not equal
+ nop //
+ bne a0,t4,10b // if ne, more 32-byte blocks to compare
+ addu a1,a1,32 // update source2 address
+ .set reorder
+
+ subu a2,a3,a0 // compute remaining bytes
+ b 100f //
+
+//
+// Source1 is unaligned, check Source2 alignment.
+//
+
+CompareUnalignedS1: //
+ and t0,a1,0x3 // isolate Source2 alignment
+ bne zero,t0,CompareUnalignedS1AndS2 // if ne, Source2 unaligned
+
+//
+// Source1 is unaligned and Source2 is aligned.
+//
+
+ .set noreorder
+10: lwr t0,0(a0) // compare 32-byte block
+ lwl t0,3(a0) //
+ lw t1,0(a1) //
+ lwr t2,4(a0) //
+ lwl t2,7(a0) //
+ bne t0,t1,90f // if ne, first word not equal
+ lw t3,4(a1) //
+ lwr t0,8(a0) //
+ lwl t0,11(a0) //
+ bne t2,t3,20f // if ne, second word not equal
+ lw t1,8(a1) //
+ lwr t2,12(a0) //
+ lwl t2,15(a0) //
+ bne t0,t1,30f // if ne, third word not equal
+ lw t3,12(a1) //
+ lwr t0,16(a0) //
+ lwl t0,19(a0) //
+ bne t2,t3,40f // if ne, fourth word not equal
+ lw t1,16(a1) //
+ lwr t2,20(a0) //
+ lwl t2,23(a0) //
+ bne t0,t1,50f // if ne, fifth word not equal
+ lw t3,20(a1) //
+ lwr t0,24(a0) //
+ lwl t0,27(a0) //
+ bne t2,t3,60f // if ne, sixth word not equal
+ lw t1,24(a1) //
+ lwr t2,28(a0) //
+ lwl t2,31(a0) //
+ bne t0,t1,70f // if ne, seventh word not equal
+ lw t3,28(a1) //
+ addu a0,a0,32 // advance source1 to next block
+ bne t2,t3,80f // if ne, eighth word not equal
+ nop //
+ bne a0,t4,10b // if ne, more 32-byte blocks to compare
+ addu a1,a1,32 // update source2 address
+ .set reorder
+
+ subu a2,a3,a0 // compute remaining bytes
+ b 100f //
+
+//
+// Source1 and Source2 are unaligned.
+//
+
+CompareUnalignedS1AndS2: //
+
+ .set noreorder
+10: lwr t0,0(a0) // compare 32-byte block
+ lwl t0,3(a0) //
+ lwr t1,0(a1) //
+ lwl t1,3(a1) //
+ lwr t2,4(a0) //
+ lwl t2,7(a0) //
+ bne t0,t1,90f // if ne, first word not equal
+ lwr t3,4(a1) //
+ lwl t3,7(a1) //
+ lwr t0,8(a0) //
+ lwl t0,11(a0) //
+ bne t2,t3,20f // if ne, second word not equal
+ lwr t1,8(a1) //
+ lwl t1,11(a1) //
+ lwr t2,12(a0) //
+ lwl t2,15(a0) //
+ bne t0,t1,30f // if ne, third word not equal
+ lwr t3,12(a1) //
+ lwl t3,15(a1) //
+ lwr t0,16(a0) //
+ lwl t0,19(a0) //
+ bne t2,t3,40f // if ne, fourth word not equal
+ lwr t1,16(a1) //
+ lwl t1,19(a1) //
+ lwr t2,20(a0) //
+ lwl t2,23(a0) //
+ bne t0,t1,50f // if ne, fifth word not equal
+ lwr t3,20(a1) //
+ lwl t3,23(a1) //
+ lwr t0,24(a0) //
+ lwl t0,27(a0) //
+ bne t2,t3,60f // if ne, sixth word not equal
+ lwr t1,24(a1) //
+ lwl t1,27(a1) //
+ lwr t2,28(a0) //
+ lwl t2,31(a0) //
+ bne t0,t1,70f // if ne, seventh word not equal
+ lwr t3,28(a1) //
+ lwl t3,31(a1) //
+ addu a0,a0,32 // advance source1 to next block
+ bne t2,t3,80f // if ne, eighth word not equal
+ nop //
+ bne a0,t4,10b // if ne, more 32-byte blocks to compare
+ addu a1,a1,32 // update source2 address
+ .set reorder
+
+ subu a2,a3,a0 // compute remaining bytes
+ b 100f //
+
+//
+// Adjust source1 and source2 pointers dependent on position of miscompare in
+// block.
+//
+
+20: addu a0,a0,4 // mismatch on second word
+ addu a1,a1,4 //
+ b 90f //
+
+30: addu a0,a0,8 // mismatch on third word
+ addu a1,a1,8 //
+ b 90f //
+
+40: addu a0,a0,12 // mistmatch on fourth word
+ addu a1,a1,12 //
+ b 90f //
+
+50: addu a0,a0,16 // mismatch on fifth word
+ addu a1,a1,16 //
+ b 90f //
+
+60: addu a0,a0,20 // mismatch on sixth word
+ addu a1,a1,20 //
+ b 90f //
+
+70: addu a0,a0,24 // mismatch on seventh word
+ addu a1,a1,24 //
+ b 90f //
+
+80: subu a0,a0,4 // mismatch on eighth word
+ addu a1,a1,28 //
+90: subu a2,a3,a0 // compute remaining bytes
+
+//
+// Compare 1-byte blocks.
+//
+
+100: addu t2,a0,a2 // compute ending block address
+ beq zero,a2,120f // if eq, buffers equal
+110: lb t0,0(a0) // compare 1-byte block
+ lb t1,0(a1) //
+ addu a1,a1,1 // advance pointers to next block
+ bne t0,t1,130f // if ne, byte not equal
+ addu a0,a0,1 //
+ bne a0,t2,110b // if ne, more 1-byte block to zero
+
+120: move v0,zero // source1 == source2
+ j ra // return
+
+130: sltu v0,t1,t0 // compare source1 to source2
+ beq v0,zero,140f
+ j ra // return, source1 > source2
+140:
+ li v0,-1
+ j ra // return, source1 < source2
+
+ .end memcmp
+
+ SBTTL("Move Memory")
+//++
+//
+// VOID
+// RtlMoveMemory (
+// IN PVOID Destination,
+// IN PVOID Source,
+// IN ULONG Length
+// )
+//
+// Routine Description:
+//
+// This function moves memory either forward or backward, aligned or
+// unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
+// by any remaining bytes.
+//
+// Arguments:
+//
+// Destination (a0) - Supplies a pointer to the destination address of
+// the move operation.
+//
+// Source (a1) - Supplies a pointer to the source address of the move
+// operation.
+//
+// Length (a2) - Supplies the length, in bytes, of the memory to be moved.
+//
+// Return Value:
+//
+// None.
+//
+// N.B. The C runtime entry points memmove and memcpy are equivalent to
+// RtlMoveMemory thus alternate entry points are provided for these
+// routines.
+//--
+
+ LEAF_ENTRY(memmove)
+ j memcpy
+ .end memmove
+
+ LEAF_ENTRY(memcpy)
+
+ move v0,a0 // return destination
+
+//
+// If the source address is less than the destination address and source
+// address plus the length of the move is greater than the destination
+// address, then the source and destination overlap such that the move
+// must be performed backwards.
+//
+
+10: bgeu a1,a0,MoveForward // if geu, no overlap possible
+ addu t0,a1,a2 // compute source ending address
+ bgtu t0,a0,MoveBackward // if gtu, source and destination overlap
+
+//
+// Move memory forward aligned and unaligned.
+//
+
+MoveForward: //
+ sltu t0,a2,4 // check if less than four bytes
+ bne zero,t0,50f // if ne, less than four bytes to move
+ xor t0,a0,a1 // compare alignment bits
+ and t0,t0,0x3 // isolate alignment comparison
+ bne zero,t0,MoveForwardUnaligned // if ne, incompatible alignment
+
+//
+// Move memory forward aligned.
+//
+
+MoveForwardAligned: //
+ subu t0,zero,a0 // compute bytes until aligned
+ and t0,t0,0x3 // isolate residual byte count
+ subu a2,a2,t0 // reduce number of bytes to move
+ beq zero,t0,10f // if eq, already aligned
+ lwr t1,0(a1) // move unaligned bytes
+ swr t1,0(a0) //
+ addu a0,a0,t0 // align destination address
+ addu a1,a1,t0 // align source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10: and t0,a2,32 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ addu t8,a0,t1 // compute ending block address
+ beq zero,t1,30f // if eq, no 32-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 32-byte blocks.
+//
+
+#if defined(R4000)
+
+ and t0,a0,1 << 2 // check if destination quadword aligned
+ beq zero,t0,15f // if eq, destination quadword aligned
+ lw t0,0(a1) // get source longword
+ addu a1,a1,4 // align source address
+ sw t0,0(a0) // store destination longword
+ addu a0,a0,4 // align destination address
+ addu a2,a2,t1 // recompute bytes to move
+ subu a2,a2,4 // reduce count by 4
+ b 10b //
+
+//
+// The destination is quadword aligned, check the source operand.
+//
+
+15: and t0,a1,1 << 2 // check if source quadword aligned
+ beq zero,t0,22f // if eq, source quadword aligned
+
+//
+// The source is longword aligned and the destination is quadword aligned.
+//
+
+ .set noreorder
+20: lwc1 f0,0(a1) // move 32-byte block
+ lwc1 f1,4(a1) //
+ lwc1 f2,8(a1) //
+ lwc1 f3,12(a1) //
+ lwc1 f4,16(a1) //
+ lwc1 f5,20(a1) //
+ lwc1 f6,24(a1) //
+ lwc1 f7,28(a1) //
+ sdc1 f0,0(a0) //
+ sdc1 f2,8(a0) //
+ sdc1 f4,16(a0) //
+ sdc1 f6,24(a0) //
+ addu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ addu a1,a1,32 //
+ .set reorder
+
+ b 30f //
+
+//
+// Both the source and the destination are quadword aligned.
+//
+
+22: and t0,t1,1 << 5 // test if even number of 32-byte blocks
+ beq zero,t0,26f // if eq, even number of 32-byte blocks
+
+//
+// Move one 32-byte block quadword aligned.
+//
+
+ .set noreorder
+ ldc1 f0,0(a1) // move 32-byte block
+ ldc1 f2,8(a1) //
+ ldc1 f4,16(a1) //
+ ldc1 f6,24(a1) //
+ sdc1 f0,0(a0) //
+ sdc1 f2,8(a0) //
+ sdc1 f4,16(a0) //
+ sdc1 f6,24(a0) //
+ addu a0,a0,32 // advance pointers to next block
+ beq a0,t8,30f // if eq, end of block
+ addu a1,a1,32 //
+ .set reorder
+
+//
+// Move 64-byte blocks quadword aligned.
+//
+
+ .set noreorder
+26: ldc1 f0,0(a1) // move 64-byte block
+ ldc1 f2,8(a1) //
+ ldc1 f4,16(a1) //
+ ldc1 f6,24(a1) //
+ ldc1 f8,32(a1) //
+ ldc1 f10,40(a1) //
+ ldc1 f12,48(a1) //
+ ldc1 f14,56(a1) //
+ sdc1 f0,0(a0) //
+ sdc1 f2,8(a0) //
+ sdc1 f4,16(a0) //
+ sdc1 f6,24(a0) //
+ sdc1 f8,32(a0) //
+ sdc1 f10,40(a0) //
+ sdc1 f12,48(a0) //
+ sdc1 f14,56(a0) //
+ addu a0,a0,64 // advance pointers to next block
+ bne a0,t8,26b // if ne, more 64-byte blocks to zero
+ addu a1,a1,64 //
+ .set reorder
+
+#endif
+
+//
+// The source is longword aligned and the destination is longword aligned.
+//
+
+#if defined(R3000)
+
+ .set noreorder
+20: lw t0,0(a1) // move 32-byte block
+ lw t1,4(a1) //
+ lw t2,8(a1) //
+ lw t3,12(a1) //
+ lw t4,16(a1) //
+ lw t5,20(a1) //
+ lw t6,24(a1) //
+ lw t7,28(a1) //
+ sw t0,0(a0) //
+ sw t1,4(a0) //
+ sw t2,8(a0) //
+ sw t3,12(a0) //
+ sw t4,16(a0) //
+ sw t5,20(a0) //
+ sw t6,24(a0) //
+ sw t7,28(a0) //
+ addu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ addu a1,a1,32 //
+ .set reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30: and t0,a2,4 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ addu t2,a0,t1 // compute ending block address
+ beq zero,t1,50f // if eq, no 4-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+ .set noreorder
+40: lw t0,0(a1) // move 4-byte block
+ addu a0,a0,4 // advance pointers to next block
+ sw t0,-4(a0) //
+ bne a0,t2,40b // if ne, more 4-byte blocks to zero
+ addu a1,a1,4 //
+ .set reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50: addu t2,a0,a2 // compute ending block address
+ beq zero,a2,70f // if eq, no bytes to zero
+
+ .set noreorder
+60: lb t0,0(a1) // move 1-byte block
+ addu a0,a0,1 // advance pointers to next block
+ sb t0,-1(a0) //
+ bne a0,t2,60b // if ne, more 1-byte block to zero
+ addu a1,a1,1 //
+ .set reorder
+
+70: j ra // return
+
+//
+// Move memory forward unaligned.
+//
+
+MoveForwardUnaligned: //
+ subu t0,zero,a0 // compute bytes until aligned
+ and t0,t0,0x3 // isolate residual byte count
+ subu a2,a2,t0 // reduce number of bytes to move
+ beq zero,t0,10f // if eq, already aligned
+ lwr t1,0(a1) // move unaligned bytes
+ lwl t1,3(a1) //
+ swr t1,0(a0) //
+ addu a0,a0,t0 // align destination address
+ addu a1,a1,t0 // update source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10: and t0,a2,32 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ addu t8,a0,t1 // compute ending block address
+ beq zero,t1,30f // if eq, no 32-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+ .set noreorder
+20: lwr t0,0(a1) // move 32-byte block
+ lwl t0,3(a1) //
+ lwr t1,4(a1) //
+ lwl t1,7(a1) //
+ lwr t2,8(a1) //
+ lwl t2,11(a1) //
+ lwr t3,12(a1) //
+ lwl t3,15(a1) //
+ lwr t4,16(a1) //
+ lwl t4,19(a1) //
+ lwr t5,20(a1) //
+ lwl t5,23(a1) //
+ lwr t6,24(a1) //
+ lwl t6,27(a1) //
+ lwr t7,28(a1) //
+ lwl t7,31(a1) //
+ sw t0,0(a0) //
+ sw t1,4(a0) //
+ sw t2,8(a0) //
+ sw t3,12(a0) //
+ sw t4,16(a0) //
+ sw t5,20(a0) //
+ sw t6,24(a0) //
+ sw t7,28(a0) //
+ addu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ addu a1,a1,32 //
+ .set reorder
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30: and t0,a2,4 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ addu t2,a0,t1 // compute ending block address
+ beq zero,t1,50f // if eq, no 4-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+ .set noreorder
+40: lwr t0,0(a1) // move 4-byte block
+ lwl t0,3(a1) //
+ addu a0,a0,4 // advance pointers to next block
+ sw t0,-4(a0) //
+ bne a0,t2,40b // if ne, more 4-byte blocks to zero
+ addu a1,a1,4 //
+ .set reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50: addu t2,a0,a2 // compute ending block address
+ beq zero,a2,70f // if eq, no bytes to zero
+
+ .set noreorder
+60: lb t0,0(a1) // move 1-byte block
+ addu a0,a0,1 // advance pointers to next block
+ sb t0,-1(a0) //
+ bne a0,t2,60b // if ne, more 1-byte block to zero
+ addu a1,a1,1 //
+ .set reorder
+
+70: j ra // return
+
+//
+// Move memory backward.
+//
+
+MoveBackward: //
+ addu a0,a0,a2 // compute ending destination address
+ addu a1,a1,a2 // compute ending source address
+ sltu t0,a2,4 // check if less than four bytes
+ bne zero,t0,50f // if ne, less than four bytes to move
+ xor t0,a0,a1 // compare alignment bits
+ and t0,t0,0x3 // isolate alignment comparison
+ bne zero,t0,MoveBackwardUnaligned // if ne, incompatible alignment
+
+//
+// Move memory backward aligned.
+//
+
+MoveBackwardAligned: //
+ and t0,a0,0x3 // isolate residual byte count
+ subu a2,a2,t0 // reduce number of bytes to move
+ beq zero,t0,10f // if eq, already aligned
+ lwl t1,-1(a1) // move unaligned bytes
+ swl t1,-1(a0) //
+ subu a0,a0,t0 // align destination address
+ subu a1,a1,t0 // align source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10: and t0,a2,32 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ subu t8,a0,t1 // compute ending block address
+ beq zero,t1,30f // if eq, no 32-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+#if defined(R4000)
+
+ and t0,a0,1 << 2 // check if destination quadword aligned
+ beq zero,t0,15f // if eq, destination quadword aligned
+ lw t0,-4(a1) // get source longword
+ subu a1,a1,4 // align source address
+ sw t0,-4(a0) // store destination longword
+ subu a0,a0,4 // align destination address
+ addu a2,a2,t1 // recompute byte to move
+ subu a2,a2,4 // reduce count by 4
+ b 10b //
+
+//
+// The destination is quadword aligned, check the source operand.
+//
+
+15: and t0,a1,1 << 2 // check if source quadword aligned
+ beq zero,t0,22f // if eq, source quadword aligned
+
+//
+// The source is longword aligned and the destination is quadword aligned.
+//
+
+ .set noreorder
+20: lwc1 f1,-4(a1) // move 32-byte block
+ lwc1 f0,-8(a1) //
+ lwc1 f3,-12(a1) //
+ lwc1 f2,-16(a1) //
+ lwc1 f5,-20(a1) //
+ lwc1 f4,-24(a1) //
+ lwc1 f7,-28(a1) //
+ lwc1 f6,-32(a1) //
+ sdc1 f0,-8(a0) //
+ sdc1 f2,-16(a0) //
+ sdc1 f4,-24(a0) //
+ sdc1 f6,-32(a0) //
+ subu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ subu a1,a1,32 //
+ .set reorder
+
+ b 30f //
+
+//
+// Both the source and the destination are quadword aligned.
+//
+
+22: and t0,t1,1 << 5 // test if even number of 32-byte blocks
+ beq zero,t0,26f // if eq, even number of 32-byte blocks
+
+//
+// Move one 32-byte block quadword aligned.
+//
+
+ .set noreorder
+ ldc1 f0,-8(a1) // move 32-byte block
+ ldc1 f2,-16(a1) //
+ ldc1 f4,-24(a1) //
+ ldc1 f6,-32(a1) //
+ sdc1 f0,-8(a0) //
+ sdc1 f2,-16(a0) //
+ sdc1 f4,-24(a0) //
+ sdc1 f6,-32(a0) //
+ subu a0,a0,32 // advance pointers to next block
+ beq a0,t8,30f // if eq, end of block
+ subu a1,a1,32 //
+ .set reorder
+
+//
+// Move 64-byte blocks quadword aligned.
+//
+
+ .set noreorder
+26: ldc1 f0,-8(a1) // move 64-byte block
+ ldc1 f2,-16(a1) //
+ ldc1 f4,-24(a1) //
+ ldc1 f6,-32(a1) //
+ ldc1 f8,-40(a1) //
+ ldc1 f10,-48(a1) //
+ ldc1 f12,-56(a1) //
+ ldc1 f14,-64(a1) //
+ sdc1 f0,-8(a0) //
+ sdc1 f2,-16(a0) //
+ sdc1 f4,-24(a0) //
+ sdc1 f6,-32(a0) //
+ sdc1 f8,-40(a0) //
+ sdc1 f10,-48(a0) //
+ sdc1 f12,-56(a0) //
+ sdc1 f14,-64(a0) //
+ subu a0,a0,64 // advance pointers to next block
+ bne a0,t8,26b // if ne, more 64-byte blocks to zero
+ subu a1,a1,64 //
+ .set reorder
+
+#endif
+
+//
+// The source is longword aligned and the destination is longword aligned.
+//
+
+#if defined(R3000)
+
+ .set noreorder
+20: lw t0,-4(a1) // move 32-byte block
+ lw t1,-8(a1) //
+ lw t2,-12(a1) //
+ lw t3,-16(a1) //
+ lw t4,-20(a1) //
+ lw t5,-24(a1) //
+ lw t6,-28(a1) //
+ lw t7,-32(a1) //
+ sw t0,-4(a0) //
+ sw t1,-8(a0) //
+ sw t2,-12(a0) //
+ sw t3,-16(a0) //
+ sw t4,-20(a0) //
+ sw t5,-24(a0) //
+ sw t6,-28(a0) //
+ sw t7,-32(a0) //
+ subu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ subu a1,a1,32 //
+ .set reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30: and t0,a2,4 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ subu t2,a0,t1 // compute ending block address
+ beq zero,t1,50f // if eq, no 4-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+ .set noreorder
+40: lw t0,-4(a1) // move 4-byte block
+ subu a0,a0,4 // advance pointers to next block
+ sw t0,0(a0) //
+ bne a0,t2,40b // if ne, more 4-byte blocks to zero
+ subu a1,a1,4 //
+ .set reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50: subu t2,a0,a2 // compute ending block address
+ beq zero,a2,70f // if eq, no bytes to zero
+
+ .set noreorder
+60: lb t0,-1(a1) // move 1-byte block
+ subu a0,a0,1 // advance pointers to next block
+ sb t0,0(a0) //
+ bne a0,t2,60b // if ne, more 1-byte block to zero
+ subu a1,a1,1 //
+ .set reorder
+
+70: j ra // return
+
+//
+// Move memory backward unaligned.
+//
+
+MoveBackwardUnaligned: //
+ and t0,a0,0x3 // isolate residual byte count
+ subu a2,a2,t0 // reduce number of bytes to move
+ beq zero,t0,10f // if eq, already aligned
+ lwl t1,-1(a1) // move unaligned bytes
+ lwr t1,-4(a1) //
+ swl t1,-1(a0) //
+ subu a0,a0,t0 // align destination address
+ subu a1,a1,t0 // update source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10: and t0,a2,32 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ subu t8,a0,t1 // compute ending block address
+ beq zero,t1,30f // if eq, no 32-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+ .set noreorder
+20: lwr t0,-4(a1) // move 32-byte block
+ lwl t0,-1(a1) //
+ lwr t1,-8(a1) //
+ lwl t1,-5(a1) //
+ lwr t2,-12(a1) //
+ lwl t2,-9(a1) //
+ lwr t3,-16(a1) //
+ lwl t3,-13(a1) //
+ lwr t4,-20(a1) //
+ lwl t4,-17(a1) //
+ lwr t5,-24(a1) //
+ lwl t5,-21(a1) //
+ lwr t6,-28(a1) //
+ lwl t6,-25(a1) //
+ lwr t7,-32(a1) //
+ lwl t7,-29(a1) //
+ sw t0,-4(a0) //
+ sw t1,-8(a0) //
+ sw t2,-12(a0) //
+ sw t3,-16(a0) //
+ sw t4,-20(a0) //
+ sw t5,-24(a0) //
+ sw t6,-28(a0) //
+ sw t7,-32(a0) //
+ subu a0,a0,32 // advance pointers to next block
+ bne a0,t8,20b // if ne, more 32-byte blocks to zero
+ subu a1,a1,32 //
+ .set reorder
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30: and t0,a2,4 - 1 // isolate residual bytes
+ subu t1,a2,t0 // subtract out residual bytes
+ subu t2,a0,t1 // compute ending block address
+ beq zero,t1,50f // if eq, no 4-byte block to zero
+ move a2,t0 // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+ .set noreorder
+40: lwr t0,-4(a1) // move 4-byte block
+ lwl t0,-1(a1) //
+ subu a0,a0,4 // advance pointers to next block
+ sw t0,0(a0) //
+ bne a0,t2,40b // if ne, more 4-byte blocks to zero
+ subu a1,a1,4 //
+ .set reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50: subu t2,a0,a2 // compute ending block address
+ beq zero,a2,70f // if eq, no bytes to zero
+
+ .set noreorder
+60: lb t0,-1(a1) // move 1-byte block
+ subu a0,a0,1 // advance pointers to next block
+ sb t0,0(a0) //
+ bne a0,t2,60b // if ne, more 1-byte block to zero
+ subu a1,a1,1 //
+ .set reorder
+
+70: j ra // return
+
+ .end memcpy
+
+ SBTTL("Fill Memory")
+//++
+//
+// VOID
+// RtlFillMemory (
+// IN PVOID Destination,
+// IN ULONG Length,
+// IN UCHAR Fill
+// )
+//
+// Routine Description:
+//
+// This function fills memory by first aligning the destination address to
+// a longword boundary, and then filling 32-byte blocks, followed by 4-byte
+// blocks, followed by any remaining bytes.
+//
+// Arguments:
+//
+// Destination (a0) - Supplies a pointer to the memory to fill.
+//
+// Length (a1) - Supplies the length, in bytes, of the memory to be filled.
+//
+// Fill (a2) - Supplies the fill byte.
+//
+// N.B. The alternate entry memset expects the length and fill arguments
+// to be reversed.
+//
+// Return Value:
+//
+// None.
+//
+//--
+
+ LEAF_ENTRY(memset)
+
+ move a3,a1 // swap length and fill arguments
+ move a1,a2 //
+ move a2,a3 //
+ move v0,a0 // return destination
+
+ and a2,a2,0xff // clear excess bits
+ sll t0,a2,8 // duplicate fill byte
+ or a2,a2,t0 // generate fill word
+ sll t0,a2,16 // duplicate fill word
+ or a2,a2,t0 // generate fill longword
+
+//
+// Fill memory with the pattern specified in register a2.
+//
+
+#if DBG
+
+ mtc1 a2,f0 // set pattern to store
+ mtc1 a2,f1 //
+
+#endif
+
+ subu t0,zero,a0 // compute bytes until aligned
+ and t0,t0,0x3 // isolate residual byte count
+ subu t1,a1,t0 // reduce number of bytes to fill
+ blez t1,60f // if lez, less than 4 bytes to fill
+ move a1,t1 // set number of bytes to fill
+ beq zero,t0,10f // if eq, already aligned
+ swr a2,0(a0) // fill unaligned bytes
+ addu a0,a0,t0 // align destination address
+
+//
+// Check for 32-byte blocks to fill.
+//
+
+10: and t0,a1,32 - 1 // isolate residual bytes
+ subu t1,a1,t0 // subtract out residual bytes
+ addu t2,a0,t1 // compute ending block address
+ beq zero,t1,40f // if eq, no 32-byte blocks to fill
+ move a1,t0 // set residual number of bytes
+
+//
+// Fill 32-byte blocks.
+//
+
+#if defined(R4000)
+
+ and t0,a0,1 << 2 // check if destintion quadword aligned
+ beq zero,t0,20f // if eq, yes
+ sw a2,0(a0) // store destination longword
+ addu a0,a0,4 // align destination address
+ addu a1,a1,t1 // recompute bytes to fill
+ subu a1,a1,4 // reduce count by 4
+ b 10b //
+
+//
+// The destination is quadword aligned.
+//
+
+20: mtc1 a2,f0 // set pattern value
+ mtc1 a2,f1 //
+ and t0,t1,1 << 5 // test if even number of 32-byte blocks
+ beq zero,t0,30f // if eq, even number of 32-byte blocks
+
+//
+// Fill one 32-byte block.
+//
+
+ .set noreorder
+ sdc1 f0,0(a0) // fill 32-byte block
+ sdc1 f0,8(a0) //
+ sdc1 f0,16(a0) //
+ addu a0,a0,32 // advance pointer to next block
+ beq a0,t2,40f // if ne, no 64-byte blocks to fill
+ sdc1 f0,-8(a0) //
+ .set reorder
+
+//
+// Fill 64-byte block.
+//
+
+ .set noreorder
+30: sdc1 f0,0(a0) // fill 32-byte block
+ sdc1 f0,8(a0) //
+ sdc1 f0,16(a0) //
+ sdc1 f0,24(a0) //
+ sdc1 f0,32(a0) //
+ sdc1 f0,40(a0) //
+ sdc1 f0,48(a0) //
+ addu a0,a0,64 // advance pointer to next block
+ bne a0,t2,30b // if ne, more 32-byte blocks to fill
+ sdc1 f0,-8(a0) //
+ .set reorder
+
+#endif
+
+//
+// Fill 32-byte blocks.
+//
+
+#if defined(R3000)
+
+ .set noreorder
+20: sw a2,0(a0) // fill 32-byte block
+ sw a2,4(a0) //
+ sw a2,8(a0) //
+ sw a2,12(a0) //
+ addu a0,a0,32 // advance pointer to next block
+ sw a2,-4(a0) //
+ sw a2,-8(a0) //
+ sw a2,-12(a0) //
+ bne a0,t2,20b // if ne, more 32-byte blocks to fill
+ sw a2,-16(a0) //
+ .set reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to fill.
+//
+
+40: and t0,a1,4 - 1 // isolate residual bytes
+ subu t1,a1,t0 // subtract out residual bytes
+ addu t2,a0,t1 // compute ending block address
+ beq zero,t1,60f // if eq, no 4-byte block to fill
+ move a1,t0 // set residual number of bytes
+
+//
+// Fill 4-byte blocks.
+//
+
+ .set noreorder
+50: addu a0,a0,4 // advance pointer to next block
+ bne a0,t2,50b // if ne, more 4-byte blocks to fill
+ sw a2,-4(a0) // fill 4-byte block
+ .set reorder
+
+//
+// Check for 1-byte blocks to fill.
+//
+
+60: addu t2,a0,a1 // compute ending block address
+ beq zero,a1,80f // if eq, no bytes to fill
+
+//
+// Fill 1-byte blocks.
+//
+
+ .set noreorder
+70: addu a0,a0,1 // advance pointer to next block
+ bne a0,t2,70b // if ne, more 1-byte block to fill
+ sb a2,-1(a0) // fill 1-byte block
+ .set reorder
+
+#if DBG
+
+80: mfc1 t0,f0 // get fill pattern
+ mfc1 t1,f1 //
+ bne t0,a2,90f // if ne, pattern altered
+ bne t1,a2,90f // if ne, pattern altered
+ j ra // return
+
+90: break KERNEL_BREAKPOINT //
+
+#else
+
+80: j ra // return
+
+#endif
+
+ .end memset
diff --git a/private/crt32/string/mips/memsetm.s b/private/crt32/string/mips/memsetm.s
new file mode 100644
index 000000000..a53f8f0a1
--- /dev/null
+++ b/private/crt32/string/mips/memsetm.s
@@ -0,0 +1,105 @@
+/* --------------------------------------------------- */
+/* | Copyright (c) 1986 MIPS Computer Systems, Inc. | */
+/* | All Rights Reserved. | */
+/* --------------------------------------------------- */
+/* $Revision: 1.3 $ */
+
+/*
+ * char *
+ * memset(s, c, n)
+ * register char * s;
+ * register c, n;
+ * {
+ * register char * p = s;
+ *
+ * while (--n >= 0)
+ * *s++ = c;
+ *
+ * return (p);
+ * }
+ */
+
+/*
+ * Copyright 1986 by MIPS Computer Systems, Inc.
+ */
+
+#include <kxmips.h>
+
+#define NBPW 4
+
+/*
+ * memset(dst, c, bcount)
+ * set block of memory with blanks
+ *
+ * Calculating MINSET, assuming 10% cache-miss on non-loop code:
+ * Overhead =~ 18 instructions => 28 (30) cycles
+ * Byte set =~ 12 (24) cycles/word for 08M44 (08V11)
+ * Word set =~ 3 (5) cycles/word for 08M44 (08V11)
+ * If I-cache-miss nears 0, MINSET ==> 4 bytes; otherwise, times are:
+ * breakeven (MEM) = 28 / (12 - 3) =~ 3 words
+ * breakeven (VME) = 30 / (24 - 5) =~ 1.5 words
+ * Since the overhead is pessimistic (worst-case alignment), and many calls
+ * will be for well-aligned data, and since Word-set at least leaves
+ * the set in the cache, we shade these values (6-12) down to 8 bytes
+ */
+#define MINSET 8
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+ smaller-vs-bigger address rather than left-vs-right.
+ Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+LEAF_ENTRY(memset)
+ move v0,a0 # return first argument; BDSLOT
+ blt a2,MINSET,byteset
+ subu v1,zero,a0 # number of bytes til aligned; BDSLOT
+ beq a1,$0,1f # make memset(s, 0, n) faster
+ sll t0,a1,8
+ or a1,t0
+ sll t0,a1,16
+ or a1,t0
+1: and v1,NBPW-1
+ subu a2,v1 # adjust count; BDSLOT
+ beq v1,zero,blkset # already aligned
+ SWS a1,0(a0)
+ addu a0,v1
+
+/*
+ * set 8 byte, aligned block (no point in unrolling further,
+ * since maximum write rate in M/500 is two cycles/word write)
+ */
+blkset:
+ and t0,a2,NBPW+NBPW-1 # count after by-8-byte loop done
+ subu a3,a2,t0 # total in 8 byte chunks; BDSLOT
+ beq a2,t0,wordset # less than 8 bytes to set
+ addu a3,a0 # dst endpoint
+1: addu a0,NBPW+NBPW
+ sw a1,-NBPW-NBPW(a0)
+ sw a1,-NBPW(a0)
+ bne a0,a3,1b
+ move a2,t0 # set end-of loop count
+
+/*
+ * do a word (if required) this is not a loop since loop above
+ * guarantees that at most one word must be written here.
+ */
+wordset:
+ and t0,a2,NBPW # count after by-word non-loop done
+ subu a2,t0 # adjust count; BDSLOT
+ beq t0,zero,byteset # less than word to set
+ sw a1,0(a0)
+ addu a0,NBPW
+
+byteset:
+ addu a3,a2,a0 # dst endpoint; BDSLOT
+ ble a2,zero,setdone
+1: addu a0,1
+ sb a1,-1(a0)
+ bne a0,a3,1b
+setdone:
+ j ra
+.end memset
diff --git a/private/crt32/string/mips/memsett.c b/private/crt32/string/mips/memsett.c
new file mode 100644
index 000000000..c653803ab
--- /dev/null
+++ b/private/crt32/string/mips/memsett.c
@@ -0,0 +1,20 @@
+char buffer[100];
+#include <stdio.h>
+#include <string.h>
+
+void main()
+{
+ char *f = buffer;
+ char *g = buffer;
+
+ printf("%8.8x\n", f);
+ f=(char*)memset(f,0x0a,12);
+ printf("%8.8x\n", f);
+
+ if (f == g) {
+ int k = 12;
+ while (k--)
+ printf("%2.2x", *f++);
+ }
+}
+
diff --git a/private/crt32/string/mips/strcatm.s b/private/crt32/string/mips/strcatm.s
new file mode 100644
index 000000000..0c22c47de
--- /dev/null
+++ b/private/crt32/string/mips/strcatm.s
@@ -0,0 +1,98 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+/* strcat.s 1.1 */
+
+/* This function is an assembly-code replacement for the libc function
+ * strcat.
+
+ * strcat and strcpy are very similar, but we waste about 40 words of
+ * code when both are used, so that they can be independently replaced.
+
+ * There are one caveat to consider: this function is written in
+ * assembler code, and as such, cannot be merged using the U-code
+ * loader. */
+
+/* Craig Hansen - 3-September-86 */
+
+#include <kxmips.h>
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+ smaller-vs-bigger address rather than left-vs-right.
+ Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+.text
+
+LEAF_ENTRY(strcat)
+.set noreorder
+ // a0/ destination
+ // a1/ source
+ move v0, a0 # a copy of destination address is returned
+$findz: lb t0,0(a0)
+ nop
+ bne t0,0,$findz
+ add a0,1
+ // go back over null byte
+ add a0,-1
+ // start up first word
+ // adjust pointers so that a0 points to next word
+ // t7 = a1 adjusted by same amount minus one
+ // t0,t1,t2,t3 are filled with 4 consecutive bytes
+ // t4 is filled with the same 4 bytes in a single word
+ lb t0, 0(a1)
+ ori t5, a0, 3 # get an early start
+ beq t0, 0, $doch0
+ sub t6, t5, a0 # number of char in 1st word of dest - 1
+ lb t1, 1(a1)
+ add t7, a1, t6 # offset starting point for source string
+ beq t1, 0, $doch1
+ nop
+ lb t2, 2(a1)
+ nop
+ beq t2, 0, $doch2
+ LWS t4, 0(a1) # safe: always in same word as 0(a1)
+ lb t3, 3(a1)
+ LWB t4, 3(a1) # fill out word
+ beq t3, 0, $doch3
+ SWS t4, 0(a0) # store entire or part word
+ addi a0, t5, 1-4 # adjust destination ptr
+
+ // inner loop
+1: lb t0, 1(t7)
+ addi t7, 4
+ beq t0, 0, $doch0
+ addi a0, 4
+ lb t1, 1+1-4(t7)
+ nop
+ beq t1, 0, $doch1
+ nop
+ lb t2, 2+1-4(t7)
+ nop
+ beq t2, 0, $doch2
+ LWS t4, 0+1-4(t7)
+ lb t3, 3+1-4(t7)
+ LWB t4, 3+1-4(t7)
+ bne t3, 0, 1b
+ sw t4, 0(a0)
+ j ra
+ nop
+
+ // store four bytes using swl/swr
+$doch3: j ra
+ SWB t4, 3(a0)
+ // store up to three bytes, a byte at a time.
+$doch2: sb t2, 2(a0)
+$doch1: sb t1, 1(a0)
+$doch0: j ra
+ sb t0, 0(a0)
+
+.end strcat
diff --git a/private/crt32/string/mips/strchrm.s b/private/crt32/string/mips/strchrm.s
new file mode 100644
index 000000000..b92f79fd6
--- /dev/null
+++ b/private/crt32/string/mips/strchrm.s
@@ -0,0 +1,26 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/index.s,v 1.3 1992/03/07 15:37:04 jleong Exp $"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+#include "kxmips.h"
+
+LEAF_ENTRY(strchr)
+1: lbu a2,0(a0)
+ addu a0,1
+ beq a2,a1,2f
+ bne a2,zero,1b
+ move v0,zero
+ j ra
+
+2: subu v0,a0,1
+ j ra
+.end strchr
diff --git a/private/crt32/string/mips/strchrt.c b/private/crt32/string/mips/strchrt.c
new file mode 100644
index 000000000..1da4e1d0d
--- /dev/null
+++ b/private/crt32/string/mips/strchrt.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+void main( int argc, char **argv )
+ {
+ int c;
+ unsigned char *pstr;
+ unsigned char string[100];
+
+ strcpy(string, "ABCDEFGHIJKLMNOPQRST");
+ for (c = 'a'; c <= UCHAR_MAX; c++)
+ {
+ string[9] = c;
+ pstr = strchr( string, c);
+ if (!pstr)
+ printf("Fail - Could not find %d in %s\n", c, string);
+ }
+ return;
+ }
diff --git a/private/crt32/string/mips/strcmpm.s b/private/crt32/string/mips/strcmpm.s
new file mode 100644
index 000000000..dfaaa9c39
--- /dev/null
+++ b/private/crt32/string/mips/strcmpm.s
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+/* strcmp.s 1.1 */
+
+/* This function is an assembly-code replacement for
+ the libc function "strcmp." */
+/* Libc currently has a mips-specific C version that uses 7 instructions/byte.
+ (It claims to use 6 cycles/byte, but is wrong!)
+ This function uses an unrolled loop, which uses 5 instructions per byte.
+
+ Under some circumstances more characters are read than are
+ required for determining the collating order, but it
+ never reads beyond the end of either string.
+
+ There are one caveat to consider: this function is written
+ in assembler code, and as such, cannot be merged
+ using the U-code loader. */
+
+/* Craig Hansen - 6-June-86 */
+
+#include <kxmips.h>
+
+ .text
+
+LEAF_ENTRY(strcmp)
+
+ .set noreorder
+ lbu t0,0(a0)
+1: lbu t1,0(a1)
+ beq t0,0,2f
+ addi a0,2
+ bne t0,t1,3f
+ lbu t2,-1(a0) # ok to load since -2(a0)!=0
+ lbu t1,1(a1)
+ beq t2,0,2f
+ addi a1,2
+ beq t2,t1,1b
+ lbu t0,0(a0) # ok to load since -1(a0) != 0
+ j ra
+ subu v0,t2,t1
+2: j ra
+ subu v0,zero,t1
+3: j ra
+ subu v0,t0,t1
+ .end strcmp
diff --git a/private/crt32/string/mips/strcpym.s b/private/crt32/string/mips/strcpym.s
new file mode 100644
index 000000000..4f5bc416a
--- /dev/null
+++ b/private/crt32/string/mips/strcpym.s
@@ -0,0 +1,148 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+/* strcpy.s 1.2 */
+
+/* This function is an assembly-code replacement for the libc function
+ * strcpy. It uses the MIPS special instructions "lwl", "lwr", "swl",
+ * and "swr", which handle unaligned words.
+
+ * The standard C version of this function is a 5-instruction loop,
+ * working one byte at a time:
+
+ * Copy string s2 to s1. s1 must be large enough.
+ * return s1
+ * char *strcpy(s1, s2)
+ * register char *s1, *s2;
+ * {
+ * register char *os1;
+ * os1 = s1;
+ * while (*s1++ = *s2++);
+ * return(os1);
+ * }
+
+ * A better C version is 4 cycles/byte. Loop is unrolled once.
+ * char *
+ * strcpy(s1, s2)
+ * register char *s1, *s2;
+ * {
+ * register char *os1 = s1;
+ * while (1) {
+ * register unsigned c;
+ * c = s2[0];
+ * s2 += 2;
+ * s1[0] = c;
+ * if (c == 0) break;
+ * c = s2[1-2];
+ * s1 += 2;
+ * s1[1-2] = c;
+ * if (c == 0) break;
+ * }
+ * return(os1);
+ * }
+
+ * This function starts with an unrolled loop, which uses 5
+ * instructions per byte (including the store bytes at the end) for
+ * the first few bytes.
+
+ * After filling a word, the first word or portion of a word is saved
+ * using a "swl" instruction. If the start of destination string is at
+ * a word boundary, this leaves the result valid in the cache. Because
+ * this replaces up to 4 store byte instructions, we are still near 3
+ * instructions per byte, but there is only one write.
+
+ * The inner loop moves 4 bytes in 16 cycles, an average of 4 cycles
+ * per byte. This is 1 cycle faster than the standard C code, the
+ * same speed as the unrolled version, and it also leaves the result
+ * valid in the cache.
+
+ * Finally, when a zero byte is found, the end of the string is stored
+ * using store byte instructions. This adds one instruction per byte
+ * for as much as three bytes, but elminates the up to four cycles of
+ * overhead we counted before.
+
+ * The end result is that this function is never slower than the C
+ * function, is faster by up to 30% in instruction count, uses up to
+ * 75% fewer writes, and leaves most of the result valid in the cache.
+
+ * There are one caveat to consider: this function is written in
+ * assembler code, and as such, cannot be merged using the U-code
+ * loader. */
+
+/* Craig Hansen - 3-September-86 */
+
+#include <kxmips.h>
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+ smaller-vs-bigger address rather than left-vs-right.
+ Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+.text
+
+LEAF_ENTRY(strcpy)
+.set noreorder
+ // a0/ destination
+ // a1/ source
+ move v0, a0 # a copy of destination address is returned
+ // start up first word
+ // adjust pointers so that a0 points to next word
+ // t7 = a1 adjusted by same amount minus one
+ // t0,t1,t2,t3 are filled with 4 consecutive bytes
+ // t4 is filled with the same 4 bytes in a single word
+ lb t0, 0(a1)
+ ori t5, a0, 3 # get an early start
+ beq t0, 0, $doch0
+ sub t6, t5, a0 # number of char in 1st word of dest - 1
+ lb t1, 1(a1)
+ add t7, a1, t6 # offset starting point for source string
+ beq t1, 0, $doch1
+ nop
+ lb t2, 2(a1)
+ nop
+ beq t2, 0, $doch2
+ LWS t4, 0(a1) # safe: always in same word as 0(a1)
+ lb t3, 3(a1)
+ LWB t4, 3(a1) # fill out word
+ beq t3, 0, $doch3
+ SWS t4, 0(a0) # store entire or part word
+ addi a0, t5, 1-4 # adjust destination ptr
+
+ // inner loop
+1: lb t0, 1(t7)
+ addi t7, 4
+ beq t0, 0, $doch0
+ addi a0, 4
+ lb t1, 1+1-4(t7)
+ nop
+ beq t1, 0, $doch1
+ nop
+ lb t2, 2+1-4(t7)
+ nop
+ beq t2, 0, $doch2
+ LWS t4, 0+1-4(t7)
+ lb t3, 3+1-4(t7)
+ LWB t4, 3+1-4(t7)
+ bne t3, 0, 1b
+ sw t4, 0(a0)
+ j ra
+ nop
+
+ // store four bytes using swl/swr
+$doch3: j ra
+ SWB t4, 3(a0)
+ // store up to three bytes, a byte at a time.
+$doch2: sb t2, 2(a0)
+$doch1: sb t1, 1(a0)
+$doch0: j ra
+ sb t0, 0(a0)
+
+.end strcpy
diff --git a/private/crt32/string/mips/strcpyt.c b/private/crt32/string/mips/strcpyt.c
new file mode 100644
index 000000000..4d0d99279
--- /dev/null
+++ b/private/crt32/string/mips/strcpyt.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <limits.h>
+
+#define SRCLEN 21 /* to avoid complicating errors */
+
+void main( int argc, char **argv )
+{
+ int c;
+ unsigned char *psrc, *pdst;
+ unsigned char src[SRCLEN] = "ABCDEFGHIJKLMNOPQRST";
+ unsigned char dst[100];
+
+ for (c = 'a'; c <= UCHAR_MAX; c++) {
+ src[9] = c;
+ strcpy( dst, src);
+ for (psrc = src, pdst = dst; *psrc; psrc++, pdst++) {
+ if (*psrc != *pdst) {
+ printf("Fail - Could not find '%c' 0x%x in %s\n", c, c, src);
+ break;
+ }
+ }
+ }
+}
diff --git a/private/crt32/string/mips/strlenm.s b/private/crt32/string/mips/strlenm.s
new file mode 100644
index 000000000..24027e0fc
--- /dev/null
+++ b/private/crt32/string/mips/strlenm.s
@@ -0,0 +1,19 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+/* strlen.s 1.1 */
+
+#include <kxmips.h>
+
+LEAF_ENTRY(strlen)
+ subu v0,a0,1
+1: lbu v1,1(v0)
+ add v0,1
+ bne v1,zero,1b
+ subu v0,v0,a0
+ j ra
+ .end strlen
diff --git a/private/crt32/string/mips/strrchrm.s b/private/crt32/string/mips/strrchrm.s
new file mode 100644
index 000000000..feb1f7945
--- /dev/null
+++ b/private/crt32/string/mips/strrchrm.s
@@ -0,0 +1,24 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
+/* | Reserved. This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers. Use, disclosure or | */
+/* | reproduction is prohibited without the prior express written | */
+/* | consent of MIPS. | */
+/* ------------------------------------------------------------------ */
+#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/rindex.s,v 1.3 1992/03/07 15:37:36 jleong Exp $"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+#include "kxmips.h"
+
+LEAF_ENTRY(strrchr)
+ move v0,zero
+1: lbu a3,0(a0)
+ addu a0,1
+ bne a3,a1,2f
+ subu v0,a0,1
+2: bne a3,zero,1b
+ j ra
+.end strrchr
diff --git a/private/crt32/string/mips/strrchrt.c b/private/crt32/string/mips/strrchrt.c
new file mode 100644
index 000000000..f608c2fc6
--- /dev/null
+++ b/private/crt32/string/mips/strrchrt.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+void main( int argc, char **argv )
+ {
+ int c;
+ unsigned char *pstr;
+ unsigned char string[100];
+
+ strcpy(string, "ABCDEFGHIJKLMNOPQRST");
+ for (c = 'a'; c <= UCHAR_MAX; c++)
+ {
+ string[9] = c;
+ pstr = strrchr( string, c);
+ if (!pstr)
+ printf("Fail - Could not find %d in %s\n", c, string);
+ }
+ return;
+ }
diff --git a/private/crt32/string/mips/wcscmpm.s b/private/crt32/string/mips/wcscmpm.s
new file mode 100644
index 000000000..d3997945d
--- /dev/null
+++ b/private/crt32/string/mips/wcscmpm.s
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ * wcscmpm.s - contains wcscmp()
+ *
+ * ------------------------------------------------------------------
+ * | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights |
+ * | Reserved. This software contains proprietary and confidential |
+ * | information of MIPS and its suppliers. Use, disclosure or |
+ * | reproduction is prohibited without the prior express written |
+ * | consent of MIPS. |
+ * ------------------------------------------------------------------
+ * strcmp.s 1.1
+ *
+ * Purpose:
+ * wcscmp() compares two wide-character strings and returns an integer
+ * to indicate whether the first is less than the second, the two are
+ * equal, or whether the first is greater than the second.
+ *
+ * Comparison is done wchar_t by wchar_t on an UNSIGNED basis, which is to
+ * say that Null wchar_t(0) is less than any other character.
+ *
+ * This function is a MIPS assembly-code replacement for the C version.
+ *
+ * Entry:
+ *
+ * const wchar_t * src - string for left-hand side of comparison
+ * const wchar_t * dst - string for right-hand side of comparison
+ *
+ *Exit:
+ * returns -1 if src < dst
+ * returns 0 if src == dst
+ * returns +1 if src > dst
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ * Craig Hansen (MIPS) 06-June-86 Created.
+ * Roger Lanser (MS) 02-April-94 Cloned for Wide Characters (16-bits).
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+ .text
+
+LEAF_ENTRY(wcscmp)
+
+ lhu t0,0(a0)
+1: lhu t1,0(a1)
+ addi a0,4
+ beq t0,0,2f
+ lhu t2,-2(a0) # ok to load since -4(a0)!=0
+ bne t0,t1,2f
+ lhu t1,2(a1)
+ addi a1,4
+ beq t2,0,2f
+ lhu t0,0(a0) # ok to load since -2(a0) != 0
+ beq t2,t1,1b
+ move v0,zero
+ j ra // source1 == source2, return 0
+2:
+ sltu v0,t1,t0 // compare source1 to source2
+ beq v0,zero,3f
+ j ra // source1 > source2, return 1
+3:
+ li v0,-1
+ j ra // source1 < source2, return 1
+ .end wcscmp
diff --git a/private/crt32/string/mips/wcscmpt.c b/private/crt32/string/mips/wcscmpt.c
new file mode 100644
index 000000000..629f61ca5
--- /dev/null
+++ b/private/crt32/string/mips/wcscmpt.c
@@ -0,0 +1,62 @@
+char buffer[100];
+#include <stdio.h>
+#include <memory.h>
+
+#define NTUL 7
+
+void main()
+{
+ int i, k;
+ int rc;
+
+ unsigned long source1[4] = {
+ 0x30003000,
+ 0x30003000,
+ 0x30003000,
+ 0x36003000
+ };
+
+ unsigned long source2[4] = {
+ 0x30003000,
+ 0x30003000,
+ 0x30003000,
+ 0x00000000
+ };
+
+ unsigned long tul[NTUL] = {
+ 0x35004600,
+ 0x37004600,
+ 0x36002f00,
+ 0x37002f00,
+ 0x30004600,
+ 0x30003000,
+ 0x36003000
+ };
+
+
+ for (k = 0; k < NTUL; k++) {
+ unsigned short *s1 = (unsigned short *)source1;
+ unsigned short *s2 = (unsigned short *)source2;
+
+ source2[3] = tul[k];
+
+ printf("source1 = ");
+ for (i = 0; i < 4*sizeof(unsigned long); i++)
+ printf("%2.2x ", ((char *)source1)[i]);
+ printf("\n");
+
+ printf("source2 = ");
+ for (i = 0; i < 4*sizeof(unsigned long); i++)
+ printf("%2.2x ", ((char *)source2)[i]);
+
+ rc = wcscmp(source1,source2);
+ if (rc < 0) {
+ printf(" source1 < source2\n");
+ } else if (rc > 0) {
+ printf(" source1 > source2\n");
+ } else {
+ printf(" source1 == source2\n");
+ }
+ printf("Return Code = %d\n",rc);
+ }
+}
diff --git a/private/crt32/string/mips/wcscpym.s b/private/crt32/string/mips/wcscpym.s
new file mode 100644
index 000000000..41b3544ec
--- /dev/null
+++ b/private/crt32/string/mips/wcscpym.s
@@ -0,0 +1,139 @@
+/*******************************************************************************
+ * wcscpym.s - contains wcscpy()
+ *
+ * Copyright (c) 1994, Microsoft Corporation. All rights reserved.
+ *
+ * Purpose:
+ * wcscpy() copies one wchar_t string into another.
+ *
+ * wcscpy() copies the source string to the destination string
+ * assuming no overlap and enough room in the destination. The
+ * destination string is returned. Strings are wide-character
+ * strings.
+ *
+ * This function is a MIPS assembly-code replacement for the C version.
+ * The only thing that this code tries to do is to produce a loop that
+ * uses a lw/sw pair versus running a lhu/sh loop twice. A small
+ * penality will be paid for very short wide-character strings due
+ * to the setup tests.
+ *
+ * Entry:
+ *
+ * wchar_t *wcscpy(dst, src)
+ * wchar_t * dst - wchar_t string over which "src" is to be copied
+ * const wchar_t * src - wchar_t string to be copied over "dst"
+ *
+ *Exit:
+ * The address of "dst".
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ * 02-08-97 RDL Created initial version.
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+.text
+
+LEAF_ENTRY(wcscat)
+
+ .set noreorder
+
+ // a0 destination
+ // a1 source
+
+ move v0, a0 // a copy of destination address is returned
+1: lhu t2,0(a0)
+ bnel zero,t2,1b
+ addiu a0,a0,2
+ b 2f
+ nop
+
+ALTERNATE_ENTRY(wcscpy)
+
+ // a0 destination
+ // a1 source
+
+ move v0, a0 // a copy of destination address is returned
+
+2: andi t1,a1,2 // assume at least halfword alignment
+3: andi t0,a0,2 // assume at least halfword alignment
+5: bne t0,t1,30f
+ nop
+
+10: // buffers start on same alignment
+ beq zero,t0,20f
+ nop
+ // halfword alignment
+ lhu t1,0(a1)
+ addiu a0,2
+ addiu a1,2
+ beq zero,t1,99f
+ sh t1,-2(a0)
+
+20: // word alignment
+ lw t0,0(a1)
+ addiu a0,4
+ addiu a1,4
+ andi t1,t0,0xffff
+ beq zero,t1,92f
+ srl t2,t0,16
+ bne zero,t2,20b
+ sw t0,-4(a0)
+ j ra
+ nop
+
+30: // buffers start on different alignment
+ beq zero,t1,40f
+ nop
+ // destination on word boundary, source on halfword boundary
+ lhu t0,0(a1)
+ addiu a1,2
+35: beq zero,t0,92f
+ addiu a0,4
+ lw t1,0(a1)
+ addiu a1,4
+ srl t2,t1,16
+ andi t1,0xffff
+ sll t3,t1,16
+ or t0,t0,t3
+ sw t0,-4(a0)
+ bne zero,t1,35b
+ or t0,zero,t2
+ j ra
+ nop
+
+40: // destination on halfword boundary, source on word boundary
+ lw t3,0(a1)
+ addiu a0,2
+ addiu a1,4
+ srl t2,t3,16
+ andi t0,t3,0xffff
+ beq zero,t0,99f
+ sh t0,-2(a0)
+45: lw t3,0(a1)
+ addiu a0,4
+ addiu a1,4
+ srl t1,t3,16
+ sll t3,t3,16
+ beq zero,t3,94f
+ or t0,t2,t3
+ sw t0,-4(a0)
+ bne zero,t1,45b
+ or t2,t1,zero
+ j ra
+ sh t1,0(a0)
+
+92: j ra
+ sh t0,-4(a0)
+
+94: j ra
+ sw t0,-4(a0)
+
+99: j ra
+ nop
+ .set reorder
+
+ .end wcscat
diff --git a/private/crt32/string/mips/wcslenm.s b/private/crt32/string/mips/wcslenm.s
new file mode 100644
index 000000000..ef45c114e
--- /dev/null
+++ b/private/crt32/string/mips/wcslenm.s
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * wcslenm.s - contains wcslen()
+ *
+ * ------------------------------------------------------------------
+ * | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights |
+ * | Reserved. This software contains proprietary and confidential |
+ * | information of MIPS and its suppliers. Use, disclosure or |
+ * | reproduction is prohibited without the prior express written |
+ * | consent of MIPS. |
+ * ------------------------------------------------------------------
+ * strlen.s 1.1
+ *
+ * Purpose:
+ * Finds the length in wchar_t's of the given string, not including
+ * the final null wchar_t (wide-characters).
+ *
+ * This function is a MIPS assembly-code replacement for the C version.
+ *
+ * Entry:
+ *
+ * wchar_t *wcslen(wcs)
+ * wchar_t * wcs - wchar_t string
+ *
+ *Exit:
+ * The "length" of wcs in wchar_t's.
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ * Craig Hansen (MIPS) 06-June-86 Created.
+ * Roger Lanser (MS) 02-April-94 Cloned for Wide Characters (16-bits).
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+LEAF_ENTRY(wcslen)
+ subu v0,a0,2
+1: lhu v1,2(v0)
+ addiu v0,v0,2
+ bne v1,zero,1b
+ subu v0,v0,a0
+ srl v0,v0,1
+ j ra
+ .end wcslen