From e611b132f9b8abe35b362e5870b74bce94a1e58e Mon Sep 17 00:00:00 2001
From: Adam <you@example.com>
Date: Sat, 16 May 2020 20:51:50 -0700
Subject: initial commit

---
 private/crt32/string/mips/memcmpm.s  |  125 ++++
 private/crt32/string/mips/memcmpt.c  |  334 ++++++++++
 private/crt32/string/mips/memcpym.s  |  298 +++++++++
 private/crt32/string/mips/memorym.s  | 1218 ++++++++++++++++++++++++++++++++++
 private/crt32/string/mips/memsetm.s  |  105 +++
 private/crt32/string/mips/memsett.c  |   20 +
 private/crt32/string/mips/strcatm.s  |   98 +++
 private/crt32/string/mips/strchrm.s  |   26 +
 private/crt32/string/mips/strchrt.c  |   20 +
 private/crt32/string/mips/strcmpm.s  |   50 ++
 private/crt32/string/mips/strcpym.s  |  148 +++++
 private/crt32/string/mips/strcpyt.c  |   23 +
 private/crt32/string/mips/strlenm.s  |   19 +
 private/crt32/string/mips/strrchrm.s |   24 +
 private/crt32/string/mips/strrchrt.c |   20 +
 private/crt32/string/mips/wcscmpm.s  |   67 ++
 private/crt32/string/mips/wcscmpt.c  |   62 ++
 private/crt32/string/mips/wcscpym.s  |  139 ++++
 private/crt32/string/mips/wcslenm.s  |   45 ++
 19 files changed, 2841 insertions(+)
 create mode 100644 private/crt32/string/mips/memcmpm.s
 create mode 100644 private/crt32/string/mips/memcmpt.c
 create mode 100644 private/crt32/string/mips/memcpym.s
 create mode 100644 private/crt32/string/mips/memorym.s
 create mode 100644 private/crt32/string/mips/memsetm.s
 create mode 100644 private/crt32/string/mips/memsett.c
 create mode 100644 private/crt32/string/mips/strcatm.s
 create mode 100644 private/crt32/string/mips/strchrm.s
 create mode 100644 private/crt32/string/mips/strchrt.c
 create mode 100644 private/crt32/string/mips/strcmpm.s
 create mode 100644 private/crt32/string/mips/strcpym.s
 create mode 100644 private/crt32/string/mips/strcpyt.c
 create mode 100644 private/crt32/string/mips/strlenm.s
 create mode 100644 private/crt32/string/mips/strrchrm.s
 create mode 100644 private/crt32/string/mips/strrchrt.c
 create mode 100644 private/crt32/string/mips/wcscmpm.s
 create mode 100644 private/crt32/string/mips/wcscmpt.c
 create mode 100644 private/crt32/string/mips/wcscpym.s
 create mode 100644 private/crt32/string/mips/wcslenm.s

(limited to 'private/crt32/string/mips')

diff --git a/private/crt32/string/mips/memcmpm.s b/private/crt32/string/mips/memcmpm.s
new file mode 100644
index 000000000..961939432
--- /dev/null
+++ b/private/crt32/string/mips/memcmpm.s
@@ -0,0 +1,125 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+#ident "$Header"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+/* bcmp(s1, s2, n) */
+
+#include "kxmips.h"
+
+/*
+ * bcmp(src, dst, bcount)
+ *
+ * MINCMP is minimum number of byte that its worthwhile to try and
+ * align cmp into word transactions
+ *
+ * Calculating MINCMP
+ * Overhead =~ 15 instructions => 90 cycles
+ * Byte cmp =~ 38 cycles/word
+ * Word cmp =~ 17 cycles/word
+ * Breakeven =~ 16 bytes
+ */
+#define	MINCMP	16
+#define	NBPW	4
+
+LEAF_ENTRY(memcmp)
+	xor	v0,a0,a1
+	blt	a2,MINCMP,bytecmp	# too short, just byte cmp
+	and	v0,NBPW-1
+	subu	t8,zero,a0		# number of bytes til aligned
+	bne	v0,zero,unalgncmp	# src and dst not alignable
+/*
+ * src and dst can be simultaneously word aligned
+ */
+	and	t8,NBPW-1
+	subu	a2,t8
+	beq	t8,zero,wordcmp		# already aligned
+	move	v0,v1			# lw[lr] don't clear target reg
+	lwr	v0,0(a0)
+	lwr	v1,0(a1)
+	addu	a0,t8
+	addu	a1,t8
+	bne	v0,v1,cmpne
+
+/*
+ * word cmp loop
+ */
+wordcmp:
+	and	a3,a2,~(NBPW-1)
+	subu	a2,a3
+	beq	a3,zero,bytecmp
+	addu	a3,a0				# src1 endpoint
+1:	lw	v0,0(a0)
+	lw	v1,0(a1)
+	addu	a0,NBPW				# 1st BDSLOT
+	addu	a1,NBPW				# 2nd BDSLOT (asm doesn't move)
+	bne	v0,v1,cmpne
+	bne	a0,a3,1b			# at least one more word
+	b	bytecmp
+
+/*
+ * deal with simultaneously unalignable cmp by aligning one src
+ */
+unalgncmp:
+	subu	a3,zero,a1		# calc byte cnt to get src2 aligned
+	and	a3,NBPW-1
+	subu	a2,a3
+	beq	a3,zero,partaligncmp	# already aligned
+	addu	a3,a0			# src1 endpoint
+1:	lbu	v0,0(a0)
+	lbu	v1,0(a1)
+	addu	a0,1
+	addu	a1,1
+	bne	v0,v1,cmpne
+	bne	a0,a3,1b
+
+/*
+ * src unaligned, dst aligned loop
+ */
+partaligncmp:
+	and	a3,a2,~(NBPW-1)
+	subu	a2,a3
+	beq	a3,zero,bytecmp
+	addu	a3,a0
+1:
+	lwr	v0,0(a0)
+	lwl	v0,3(a0)
+	lw	v1,0(a1)
+	addu	a0,NBPW
+	addu	a1,NBPW
+	bne	v0,v1,cmpne
+	bne	a0,a3,1b
+
+/*
+ * brute force byte cmp loop
+ */
+bytecmp:
+	addu	a3,a2,a0			# src1 endpoint; BDSLOT
+	ble	a2,zero,cmpdone
+1:	lbu	v0,0(a0)
+	lbu	v1,0(a1)
+	addu	a0,1
+	addu	a1,1
+	bne	v0,v1,cmpne
+	bne	a0,a3,1b
+cmpdone:
+	move	v0,zero	
+	j	ra
+
+cmpne:
+	sltu	a2,v1,v0
+	bne	a2,zero,9f
+	li	v0,-1
+	j	ra
+9:
+	li	v0,1
+	j	ra
+.end bcmp
diff --git a/private/crt32/string/mips/memcmpt.c b/private/crt32/string/mips/memcmpt.c
new file mode 100644
index 000000000..3adb427b9
--- /dev/null
+++ b/private/crt32/string/mips/memcmpt.c
@@ -0,0 +1,334 @@
+/*
+ * Test memcpy() function.
+ */
+
+char buffer[100];
+#include <stdio.h>
+#include <memory.h>
+
+#define FALSE 0
+#define TRUE 1
+
+#define NTUL 7
+#define TEST16 4
+#define TEST32 8
+
+#define BUFSIZE 256
+
+void printbuf(char *identifier, char *buf, int length)
+{
+	int i;
+	printf("%s = '", identifier);
+	for (i = 0; i < length; i++)
+		printf("%c", buf[i]);
+	printf("'\n");
+}
+
+void main()
+{
+	int i, j, n, k, l;
+	int rc;
+	char *s1, *s2;
+
+	char TavEqFailed = FALSE;
+	char TvaEqFailed = FALSE;
+	char TavltFailed = FALSE;
+	char TvaltFailed = FALSE;
+	char TavgtFailed = FALSE;
+	char TvagtFailed = FALSE;
+
+	char TvveqFailed = FALSE;
+	char TvvltFailed = FALSE;
+	char TvvgtFailed = FALSE;
+
+	int Tmisc = 0;
+
+        unsigned long source1_16[TEST16] = {
+		0x00003000,
+		0x30003000,
+		0x30003000,
+		0x36003000
+		};
+
+        unsigned long source2_16[TEST16] = {
+		0x00003000,
+		0x30003000,
+		0x30003000,
+		0x00000000
+		};
+
+	unsigned long tul[NTUL] = {
+		0x35004600,
+		0x37004600,
+		0x36002f00,
+		0x37002f00,
+		0x30004600,
+		0x30003000,
+		0x36003000
+		};
+	int tul_test[NTUL] = {
+		-1,
+		-1,
+		+1,
+		+1,
+		-1,
+		+1,
+		0
+		};
+
+	struct {
+		double dummy;
+		char source1[BUFSIZE];
+		char source2[BUFSIZE];
+	} buffer;
+
+	char source32[32] = "0X0042036C 002477CD BREAK 0x91DF";
+	char source[BUFSIZE];
+
+	for (j = 0; j  < BUFSIZE; ) {
+		for (i = 0; i <= j % 32; i++, j++) {
+			buffer.source1[j] = source32[i];
+			buffer.source2[j] = source32[i];
+		}
+	}
+
+	j = BUFSIZE;
+	s1 = buffer.source1;
+	s2 = buffer.source2;
+	while (j--) {
+		if (*s1++ != *s2++) {
+			printf("\n\nbuffer.source1 != buffer.source2,  exiting test!!!\n");
+			exit(-1);
+		}
+	}
+
+	if (memcmp(buffer.source1, buffer.source2, BUFSIZE) != 0) {
+		printf("\n\tbuffer.source1 != buffer.source2,  exiting test!!!\n");
+		exit(-1);
+	}
+
+	/* Test for zero length */
+	for (i = 0; i < BUFSIZE; i++ ) {
+		int l;
+
+		s1 = &(buffer.source1[i]);
+		s2 = &(buffer.source2[i]);
+		l = 0;
+		rc = memcmp(s1, s2, l);
+		if (rc) {
+			printf("%s, line #%d:  Zero length test failed!!!\n", __FILE__, __LINE__);
+			break;
+		}
+	}
+
+
+	for (k = BUFSIZE; k > 0; k-- ) {
+		for (n = 0; n < k; n++) {
+			char c;
+			int l;
+			int m;
+
+			/* Test with aligned start and variable end */
+			if (!TavEqFailed) {
+				s1 = buffer.source1;
+				s2 = buffer.source2;
+				l = k;
+				rc = memcmp(s1, s2, l);
+				if (rc != 0) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte aligned block equal test failed!!!\n", __FILE__, __LINE__, k);
+					TavEqFailed = TRUE;
+				}
+			}
+
+			/* Test with variable start and aligned end */
+			if (!TvaEqFailed) {
+				s1 = &(buffer.source1[n]);
+				s2 = &(buffer.source2[n]);
+				l = k - n;
+				rc = memcmp(s1, s2, l);
+				if (rc != 0) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte unaligned block equal test failed!!!\n", __FILE__, __LINE__, k);
+					TvaEqFailed = TRUE;
+				}
+			}
+
+			/* Test with aligned start and variable end */
+			s1 = buffer.source1;
+			s2 = buffer.source2;
+			l = k - n;
+			for (m = 0; m < l && !TavltFailed; m++) {
+				c = s1[m];
+				s1[m] -= 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != -1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte aligned block less than test failed!!!\n", __FILE__, __LINE__, k);
+					TavltFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+
+			/* Test with variable start and aligned end */
+			s1 = &(buffer.source1[n]);
+			s2 = &(buffer.source2[n]);
+			l = k - n;
+			for (m = 0; m < l && !TvaltFailed; m++) {
+				c = s1[m];
+				s1[m] -= 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != -1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte unaligned block less than test failed!!!\n", __FILE__, __LINE__, k);
+					TvaltFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+
+			/* Test with aligned start and variable end */
+			s1 = buffer.source1;
+			s2 = buffer.source2;
+			l = k - n;
+			for (m = 0; m < l && !TavgtFailed; m++) {
+				c = s1[m];
+				s1[m] += 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != 1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte aligned block greater than test failed!!!\n", __FILE__, __LINE__, k);
+					TavgtFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+
+			/* Test with variable start and aligned end */
+			s1 = &(buffer.source1[n]);
+			s2 = &(buffer.source2[n]);
+			l = k - n;
+			for (m = 0; m < l && !TvagtFailed; m++) {
+				c = s1[m];
+				s1[m] += 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != 1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte unaligned block greater than test failed!!!\n", __FILE__, __LINE__, k);
+					TvagtFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+		}
+	}
+
+	for (k = BUFSIZE; k > 0; k-- ) {
+		for (n = 0; n < k/2; n++) {
+			char c;
+			int m;
+
+			/* Test equal with variable start and end */
+			if (!TvveqFailed) {
+				l = k - 2*n;
+				s1 = &(buffer.source1[n]);
+				s2 = &(buffer.source2[n]);
+				rc = memcmp(s1, s2, l);
+				if (rc != 0) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte variable block equal test failed!!!\n", __FILE__, __LINE__, l);
+					TvveqFailed = TRUE;
+				}
+			}
+
+			/* Test less than with variable start and end */
+			l = k - 2*n;
+			s1 = buffer.source1;
+			s2 = buffer.source2;
+			for (m = 0; m < l && !TvvltFailed; m++) {
+				c = s1[m];
+				s1[m] -= 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != -1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte variable block less than test failed!!!\n", __FILE__, __LINE__, l);
+					TvvltFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+
+			/* Test greater than with variable start and end */
+			l = k - 2*n;
+			s1 = buffer.source1;
+			s2 = buffer.source2;
+			for (m = 0; m < l && !TvvgtFailed; m++) {
+				c = s1[m];
+				s1[m] += 1;
+				rc = memcmp(s1, s2, l);
+				if (rc != 1) {
+					printbuf("source1", s1, l);
+					printbuf("source2", s2, l);
+					printf("%s, line #%d:  %d byte variable block greater than test failed!!!\n", __FILE__, __LINE__, l);
+					TvvgtFailed = TRUE;
+				}
+				s1[m] = c;
+			}
+		}
+	}
+
+
+	/* Misc test1 */
+	for (k = 0; k < NTUL; k++) {
+
+		source2_16[3] = tul[k];
+
+		rc = memcmp(source1_16,source2_16,TEST16*sizeof(unsigned long));
+		if (rc != tul_test[k]) {
+
+			printf("source1_16 = ");
+			for (i = 0; i < TEST16*sizeof(unsigned long); i++)
+		        	printf("%2.2x ", ((char *)source1_16)[i]);
+			printf("\n");
+
+			printf("source2_16 = ");
+			for (i = 0; i < TEST16*sizeof(unsigned long); i++)
+		        	printf("%2.2x ", ((char *)source2_16)[i]);
+			printf("%s, line #%d:  Misc Test #1, case #%d of %d failed!!!\n", __FILE__, __LINE__, k+1, NTUL);
+			printf("Return Code = %d, Should be = %d\n",rc,tul_test[k]);
+			Tmisc++;
+		}
+	}
+
+
+	/* Misc test2 */
+	l = 32;
+	buffer.source2[0] = '"';
+	for (i = 0; i < l; i++) {
+		buffer.source1[i] = source32[i];
+		buffer.source2[i+1] = source32[i];
+	}
+	buffer.source2[l+1] = '"';
+	s1 = &(buffer.source1[0]);
+	s2 = &(buffer.source2[1]);
+	if (0 != memcmp(s1, s2, l)) {
+		printbuf("source1", s1, l);
+		printbuf("source2", s2, l);
+		printf("%s, line #%d:  Misc Test #2 failed!!!\n", __FILE__, __LINE__);
+		Tmisc++;
+	}
+
+
+	rc = TavEqFailed + TvaEqFailed + TavltFailed + TvaltFailed + TavgtFailed + TvagtFailed + TvveqFailed + TvvltFailed + TvvgtFailed + Tmisc;
+	if (rc) {
+		printf("\n\tMEMCMP failed %d tests!!!\n", rc);
+		exit(rc);
+	} else {
+		printf("\n\tMEMCMP passed all tests!!!\n");
+		exit(0);
+	}
+}
diff --git a/private/crt32/string/mips/memcpym.s b/private/crt32/string/mips/memcpym.s
new file mode 100644
index 000000000..ca0f8fe78
--- /dev/null
+++ b/private/crt32/string/mips/memcpym.s
@@ -0,0 +1,298 @@
+/*
+ * Fast bcopy code which supports overlapped copies.
+ * Not fully optimized yet.
+ *
+ * Written by: Kipp Hickman
+ *
+ * $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $
+ * $Revision: 1.7 $
+ * $Date: 1993/11/20 19:23:11 $
+ */
+
+#include <kxmips.h>
+
+/*
+ * char *bcopy(from, to, count);
+ *	unsigned char *from, *to;
+ *	unsigned long count;
+ *
+ * OR
+ *
+ * void *memcpy/memmove(to, from, count);
+ *	void *to, *from;
+ *	unsigned long count;
+ *
+ * Both functions return "to"
+ */
+
+#define	MINCOPY	16
+
+/* registers used */
+
+#define	to	a0
+#define	from	a1
+#define	count	a2
+
+LEAF_ENTRY(memcpy)
+ALTERNATE_ENTRY(memmove)
+	move	a3,to			# Save to in a3
+	beq	count,zero,ret		# Test for zero count
+	beq	from,to,ret		# Test for from == to
+
+	/* use backwards copying code if the from and to regions overlap */
+	blt	to,from,goforwards	# If to < from then use forwards copy
+	add	v0,from,count		# v0 := from + count
+	bge	to,v0,goforwards	# If to >= from + count; no overlap
+	b	gobackwards		# Oh well, go backwards
+
+/*****************************************************************************/
+
+/*
+ * Forward copy code.  Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+goforwards:
+	/* small byte counts use byte at a time copy */
+	blt	count,MINCOPY,forwards_bytecopy
+	and	v0,from,3		# v0 := from & 3
+	and	v1,to,3			# v1 := to & 3
+	beq	v0,v1,forwalignable	# low bits are identical
+/*
+ * Byte at a time copy code.  This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+forwards_bytecopy:
+	beq	count,zero,ret		# If count is zero, then we are done
+	addu	v1,from,count		# v1 := from + count
+
+99:	lb	v0,0(from)		# v0 = *from
+	addu	from,1			# advance pointer
+	sb	v0,0(to)		# Store byte
+	addu	to,1			# advance pointer
+	bne	from,v1,99b		# Loop until done
+ret:	move	v0,a3			# Set v0 to old "to" pointer
+	j	ra			# return to caller
+
+/*
+ * Pointers are alignable, and may be aligned.  Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned.  Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+forwalignable:
+	beq	v0,zero,forwards	# If v0==v1 && v0==0 then aligned
+	beq	v0,1,forw_copy3		# Need to copy 3 bytes to get aligned
+	beq	v0,2,forw_copy2		# Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+	lb	v0,0(from)		# get one byte
+	addu	from,1			#  advance pointer
+	sb	v0,0(to)		#   store one byte
+	addu	to,1			#    advance pointer
+	subu	count,1			#     and reduce count
+	b	forwards		# Now pointers are aligned
+
+/* need to copy 2 bytes */
+forw_copy2:
+	lh	v0,0(from)		# get one short
+	addu	from,2			#  advance pointer
+	sh	v0,0(to)		#   store one short
+	addu	to,2			#    advance pointer
+	subu	count,2			#     and reduce count
+	b	forwards
+
+/* need to copy 3 bytes */
+forw_copy3:
+	lb	v0,0(from)		# get one byte
+	lh	v1,1(from)		#  and one short
+	addu	from,3			#  advance pointer
+	sb	v0,0(to)		#   store one byte
+	sh	v1,1(to)		#    and one short
+	addu	to,3			#    advance pointer
+	subu	count,3			#     and reduce count
+	/* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+forwards:
+
+/* 32 byte at a time loop */
+forwards_32:
+	blt	count,32,forwards_16	# do 16 bytes at a time
+	lw	v0,0(from)
+	lw	v1,4(from)
+	lw	t0,8(from)
+	lw	t1,12(from)
+	lw	t2,16(from)
+	lw	t3,20(from)
+	lw	t4,24(from)
+	lw	t5,28(from)		# Fetch 8*4 bytes
+	addu	from,32			# advance from pointer now
+	sw	v0,0(to)
+	sw	v1,4(to)
+	sw	t0,8(to)
+	sw	t1,12(to)
+	sw	t2,16(to)
+	sw	t3,20(to)
+	sw	t4,24(to)
+	sw	t5,28(to)		# Store 8*4 bytes
+	addu	to,32			# advance to pointer now
+	subu	count,32		# Reduce count
+	b	forwards_32		# Try some more
+
+/* 16 byte at a time loop */
+forwards_16:
+	blt	count,16,forwards_4	# Do rest in words
+	lw	v0,0(from)
+	lw	v1,4(from)
+	lw	t0,8(from)
+	lw	t1,12(from)
+	addu	from,16			# advance from pointer now
+	sw	v0,0(to)
+	sw	v1,4(to)
+	sw	t0,8(to)
+	sw	t1,12(to)
+	addu	to,16			# advance to pointer now
+	subu	count,16		# Reduce count
+	b	forwards_16		# Try some more
+
+/* 4 bytes at a time loop */
+forwards_4:
+	blt	count,4,forwards_bytecopy	# Do rest
+	lw	v0,0(from)
+	addu	from,4			# advance pointer
+	sw	v0,0(to)
+	addu	to,4			# advance pointer
+	subu	count,4
+	b	forwards_4
+
+/*****************************************************************************/
+
+/*
+ * Backward copy code.  Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+gobackwards:
+	add	from,count		# Advance to end + 1
+	add	to,count		# Advance to end + 1
+
+	/* small byte counts use byte at a time copy */
+	blt	count,MINCOPY,backwards_bytecopy
+	and	v0,from,3		# v0 := from & 3
+	and	v1,to,3			# v1 := to & 3
+	beq	v0,v1,backalignable	# low bits are identical
+/*
+ * Byte at a time copy code.  This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+backwards_bytecopy:
+	beq	count,zero,ret		# If count is zero quit
+	subu	from,1			# Reduce by one (point at byte)
+	subu	to,1			# Reduce by one (point at byte)
+	subu	v1,from,count		# v1 := original from - 1
+
+99:	lb	v0,0(from)		# v0 = *from
+	subu	from,1			# backup pointer
+	sb	v0,0(to)		# Store byte
+	subu	to,1			# backup pointer
+	bne	from,v1,99b		# Loop until done
+	move	v0,a3			# Set v0 to old "to" pointer
+	j	ra			# return to caller
+
+/*
+ * Pointers are alignable, and may be aligned.  Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned.  Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+backalignable:
+	beq	v0,zero,backwards	# If v0==v1 && v0==0 then aligned
+	beq	v0,3,back_copy3		# Need to copy 3 bytes to get aligned
+	beq	v0,2,back_copy2		# Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+	lb	v0,-1(from)		# get one byte
+	subu	from,1			# backup pointer
+	sb	v0,-1(to)		# store one byte
+	subu	to,1			# backup pointer
+	subu	count,1			#  and reduce count
+	b	backwards		# Now pointers are aligned
+
+/* need to copy 2 bytes */
+back_copy2:
+	lh	v0,-2(from)		# get one short
+	subu	from,2			# backup pointer
+	sh	v0,-2(to)		# store one short
+	subu	to,2			# backup pointer
+	subu	count,2			#  and reduce count
+	b	backwards
+
+/* need to copy 3 bytes */
+back_copy3:
+	lb	v0,-1(from)		# get one byte
+	lh	v1,-3(from)		#  and one short
+	subu	from,3			# backup pointer
+	sb	v0,-1(to)		#  store one byte
+	sh	v1,-3(to)		#   and one short
+	subu	to,3			# backup pointer
+	subu	count,3			#  and reduce count
+	/* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+backwards:
+
+/* 32 byte at a time loop */
+backwards_32:
+	blt	count,32,backwards_16	# do 16 bytes at a time
+	lw	v0,-4(from)
+	lw	v1,-8(from)
+	lw	t0,-12(from)
+	lw	t1,-16(from)
+	lw	t2,-20(from)
+	lw	t3,-24(from)
+	lw	t4,-28(from)
+	lw	t5,-32(from)		# Fetch 8*4 bytes
+	subu	from,32			# backup from pointer now
+	sw	v0,-4(to)
+	sw	v1,-8(to)
+	sw	t0,-12(to)
+	sw	t1,-16(to)
+	sw	t2,-20(to)
+	sw	t3,-24(to)
+	sw	t4,-28(to)
+	sw	t5,-32(to)		# Store 8*4 bytes
+	subu	to,32			# backup to pointer now
+	subu	count,32		# Reduce count
+	b	backwards_32		# Try some more
+
+/* 16 byte at a time loop */
+backwards_16:
+	blt	count,16,backwards_4	# Do rest in words
+	lw	v0,-4(from)
+	lw	v1,-8(from)
+	lw	t0,-12(from)
+	lw	t1,-16(from)
+	subu	from,16			# backup from pointer now
+	sw	v0,-4(to)
+	sw	v1,-8(to)
+	sw	t0,-12(to)
+	sw	t1,-16(to)
+	subu	to,16			# backup to pointer now
+	subu	count,16		# Reduce count
+	b	backwards_16		# Try some more
+
+/* 4 byte at a time loop */
+backwards_4:
+	blt	count,4,backwards_bytecopy	# Do rest
+	lw	v0,-4(from)
+	subu	from,4			# backup from pointer
+	sw	v0,-4(to)
+	subu	to,4			# backup to pointer
+	subu	count,4			# Reduce count
+	b	backwards_4
+.end	memcpy
diff --git a/private/crt32/string/mips/memorym.s b/private/crt32/string/mips/memorym.s
new file mode 100644
index 000000000..6f98423dc
--- /dev/null
+++ b/private/crt32/string/mips/memorym.s
@@ -0,0 +1,1218 @@
+//      TITLE("Compare, Move, and Fill Memory Support")
+//++
+//
+// Copyright (c) 1990  Microsoft Corporation
+//
+// Module Name:
+//
+//    memory.s
+//
+// Abstract:
+//
+//    This module implements functions to compare, move, zero, and fill
+//    blocks of memory. If the memory is aligned, then these functions
+//    are very efficient.
+//
+//    N.B. These routines MUST preserve all floating state since they are
+//        frequently called from interrupt service routines that normally
+//        do not save or restore floating state.
+//
+// Author:
+//
+//    David N. Cutler (davec) 11-Apr-1990
+//
+// Environment:
+//
+//    User or Kernel mode.
+//
+// Revision History:
+//    02/02/94 RDL This is a cloned version of ntos\rtl\mips\xxmvmem.s
+//                 Used RtlMoveMemory and RtlFillMemory.
+//    02/15/94 RDL Used RtlCompareMemory, changed return code for memcmp.
+//    02/22/94 RDL Fixed memcmp, zero length and equal aligned 32-byte
+//                 buffers return wrong code.
+//
+//--
+
+#include "ksmips.h"
+        SBTTL("Compare Memory")
+
+//++
+//
+// ULONG
+// RtlCompareMemory (
+//    IN PVOID Source1,
+//    IN PVOID Source2,
+//    IN ULONG Length
+//    )
+//
+// Routine Description:
+//
+//    This function compares two blocks of memory and returns the number
+//    of bytes that compared equal.
+//
+// Arguments:
+//
+//    Source1 (a0) - Supplies a pointer to the first block of memory to
+//       compare.
+//
+//    Source2 (a1) - Supplies a pointer to the second block of memory to
+//       compare.
+//
+//    Length (a2) - Supplies the length, in bytes, of the memory to be
+//       compared.
+//
+// Return Value:
+//
+//    zero if source1 == source2
+//    -1   if source1 <  source2
+//     1   if source1 >  source2
+//    value. If all bytes compared equal, then the length of the orginal
+//    block of memory is returned.
+//
+//--
+
+        LEAF_ENTRY(memcmp)
+
+        addu    a3,a0,a2                // compute ending address of source1
+        move    v0,a2                   // save length of comparison
+        and     t0,a2,32 - 1            // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        addu    t4,a0,t1                // compute ending block address
+        beq     zero,t1,100f            // if eq, no 32-byte block to compare
+        or      t0,a0,a1                // merge and isolate alignment bits
+        and     t0,t0,0x3               //
+        bne     zero,t0,CompareUnaligned // if ne, unalignment comparison
+
+//
+// Compare memory aligned.
+//
+
+CompareAligned:                         //
+
+        .set    noreorder
+10:     lw      t0,0(a0)                // compare 32-byte block
+        lw      t1,0(a1)                //
+        lw      t2,4(a0)                //
+        bne     t0,t1,90f               // if ne, first word not equal
+        lw      t3,4(a1)                //
+        lw      t0,8(a0)                //
+        bne     t2,t3,20f               // if ne, second word not equal
+        lw      t1,8(a1)                //
+        lw      t2,12(a0)               //
+        bne     t0,t1,30f               // if ne, third word not equal
+        lw      t3,12(a1)               //
+        lw      t0,16(a0)               //
+        bne     t2,t3,40f               // if ne, fourth word not equal
+        lw      t1,16(a1)               //
+        lw      t2,20(a0)               //
+        bne     t0,t1,50f               // if ne, fifth word not equal
+        lw      t3,20(a1)               //
+        lw      t0,24(a0)               //
+        bne     t2,t3,60f               // if ne, sixth word not equal
+        lw      t1,24(a1)               //
+        lw      t2,28(a0)               //
+        bne     t0,t1,70f               // if ne, seventh word not equal
+        lw      t3,28(a1)               //
+        addu    a0,a0,32                // advance source1 to next block
+        bne     t2,t3,80f               // if ne, eighth word not equal
+        nop                             //
+        bne     a0,t4,10b               // if ne, more 32-byte blocks to compare
+        addu    a1,a1,32                // update source2 address
+        .set    reorder
+
+        subu    a2,a3,a0                // compute remaining bytes
+        b       100f                    //
+
+//
+// Compare memory unaligned.
+//
+
+CompareUnaligned:                       //
+        and     t0,a0,0x3               // isolate source1 alignment
+        bne     zero,t0,CompareUnalignedS1 // if ne, source1 unaligned
+
+//
+// Source1 is aligned and Source2 is unaligned.
+//
+
+CompareUnalignedS2:                     //
+
+        .set    noreorder
+10:     lw      t0,0(a0)                // compare 32-byte block
+        lwr     t1,0(a1)                //
+        lwl     t1,3(a1)                //
+        lw      t2,4(a0)                //
+        bne     t0,t1,90f               // if ne, first word not equal
+        lwr     t3,4(a1)                //
+        lwl     t3,7(a1)                //
+        lw      t0,8(a0)                //
+        bne     t2,t3,20f               // if ne, second word not equal
+        lwr     t1,8(a1)                //
+        lwl     t1,11(a1)               //
+        lw      t2,12(a0)               //
+        bne     t0,t1,30f               // if ne, third word not equal
+        lwr     t3,12(a1)               //
+        lwl     t3,15(a1)               //
+        lw      t0,16(a0)               //
+        bne     t2,t3,40f               // if ne, fourth word not equal
+        lwr     t1,16(a1)               //
+        lwl     t1,19(a1)               //
+        lw      t2,20(a0)               //
+        bne     t0,t1,50f               // if ne, fifth word not equal
+        lwr     t3,20(a1)               //
+        lwl     t3,23(a1)               //
+        lw      t0,24(a0)               //
+        bne     t2,t3,60f               // if ne, sixth word not equal
+        lwr     t1,24(a1)               //
+        lwl     t1,27(a1)               //
+        lw      t2,28(a0)               //
+        bne     t0,t1,70f               // if ne, seventh word not equal
+        lwr     t3,28(a1)               //
+        lwl     t3,31(a1)               //
+        addu    a0,a0,32                // advance source1 to next block
+        bne     t2,t3,80f               // if ne, eighth word not equal
+        nop                             //
+        bne     a0,t4,10b               // if ne, more 32-byte blocks to compare
+        addu    a1,a1,32                // update source2 address
+        .set    reorder
+
+        subu    a2,a3,a0                // compute remaining bytes
+        b       100f                    //
+
+//
+// Source1 is unaligned, check Source2 alignment.
+//
+
+CompareUnalignedS1:                     //
+        and     t0,a1,0x3               // isolate Source2 alignment
+        bne     zero,t0,CompareUnalignedS1AndS2 // if ne, Source2 unaligned
+
+//
+// Source1 is unaligned and Source2 is aligned.
+//
+
+        .set    noreorder
+10:     lwr     t0,0(a0)                // compare 32-byte block
+        lwl     t0,3(a0)                //
+        lw      t1,0(a1)                //
+        lwr     t2,4(a0)                //
+        lwl     t2,7(a0)                //
+        bne     t0,t1,90f               // if ne, first word not equal
+        lw      t3,4(a1)                //
+        lwr     t0,8(a0)                //
+        lwl     t0,11(a0)               //
+        bne     t2,t3,20f               // if ne, second word not equal
+        lw      t1,8(a1)                //
+        lwr     t2,12(a0)               //
+        lwl     t2,15(a0)               //
+        bne     t0,t1,30f               // if ne, third word not equal
+        lw      t3,12(a1)               //
+        lwr     t0,16(a0)               //
+        lwl     t0,19(a0)               //
+        bne     t2,t3,40f               // if ne, fourth word not equal
+        lw      t1,16(a1)               //
+        lwr     t2,20(a0)               //
+        lwl     t2,23(a0)               //
+        bne     t0,t1,50f               // if ne, fifth word not equal
+        lw      t3,20(a1)               //
+        lwr     t0,24(a0)               //
+        lwl     t0,27(a0)               //
+        bne     t2,t3,60f               // if ne, sixth word not equal
+        lw      t1,24(a1)               //
+        lwr     t2,28(a0)               //
+        lwl     t2,31(a0)               //
+        bne     t0,t1,70f               // if ne, seventh word not equal
+        lw      t3,28(a1)               //
+        addu    a0,a0,32                // advance source1 to next block
+        bne     t2,t3,80f               // if ne, eighth word not equal
+        nop                             //
+        bne     a0,t4,10b               // if ne, more 32-byte blocks to compare
+        addu    a1,a1,32                // update source2 address
+        .set    reorder
+
+        subu    a2,a3,a0                // compute remaining bytes
+        b       100f                    //
+
+//
+// Source1 and Source2 are unaligned.
+//
+
+CompareUnalignedS1AndS2:                //
+
+        .set    noreorder
+10:     lwr     t0,0(a0)                // compare 32-byte block
+        lwl     t0,3(a0)                //
+        lwr     t1,0(a1)                //
+        lwl     t1,3(a1)                //
+        lwr     t2,4(a0)                //
+        lwl     t2,7(a0)                //
+        bne     t0,t1,90f               // if ne, first word not equal
+        lwr     t3,4(a1)                //
+        lwl     t3,7(a1)                //
+        lwr     t0,8(a0)                //
+        lwl     t0,11(a0)               //
+        bne     t2,t3,20f               // if ne, second word not equal
+        lwr     t1,8(a1)                //
+        lwl     t1,11(a1)               //
+        lwr     t2,12(a0)               //
+        lwl     t2,15(a0)               //
+        bne     t0,t1,30f               // if ne, third word not equal
+        lwr     t3,12(a1)               //
+        lwl     t3,15(a1)               //
+        lwr     t0,16(a0)               //
+        lwl     t0,19(a0)               //
+        bne     t2,t3,40f               // if ne, fourth word not equal
+        lwr     t1,16(a1)               //
+        lwl     t1,19(a1)               //
+        lwr     t2,20(a0)               //
+        lwl     t2,23(a0)               //
+        bne     t0,t1,50f               // if ne, fifth word not equal
+        lwr     t3,20(a1)               //
+        lwl     t3,23(a1)               //
+        lwr     t0,24(a0)               //
+        lwl     t0,27(a0)               //
+        bne     t2,t3,60f               // if ne, sixth word not equal
+        lwr     t1,24(a1)               //
+        lwl     t1,27(a1)               //
+        lwr     t2,28(a0)               //
+        lwl     t2,31(a0)               //
+        bne     t0,t1,70f               // if ne, seventh word not equal
+        lwr     t3,28(a1)               //
+        lwl     t3,31(a1)               //
+        addu    a0,a0,32                // advance source1 to next block
+        bne     t2,t3,80f               // if ne, eighth word not equal
+        nop                             //
+        bne     a0,t4,10b               // if ne, more 32-byte blocks to compare
+        addu    a1,a1,32                // update source2 address
+        .set    reorder
+
+        subu    a2,a3,a0                // compute remaining bytes
+        b       100f                    //
+
+//
+// Adjust source1 and source2 pointers dependent on position of miscompare in
+// block.
+//
+
+20:    addu     a0,a0,4                // mismatch on second word
+       addu     a1,a1,4                //
+       b        90f                    //
+
+30:    addu     a0,a0,8                // mismatch on third word
+       addu     a1,a1,8                //
+       b        90f                    //
+
+40:    addu     a0,a0,12               // mistmatch on fourth word
+       addu     a1,a1,12               //
+       b        90f                    //
+
+50:    addu     a0,a0,16               // mismatch on fifth word
+       addu     a1,a1,16               //
+       b        90f                    //
+
+60:    addu     a0,a0,20               // mismatch on sixth word
+       addu     a1,a1,20               //
+       b        90f                    //
+
+70:    addu     a0,a0,24               // mismatch on seventh word
+       addu     a1,a1,24               //
+       b        90f                    //
+
+80:    subu     a0,a0,4                // mismatch on eighth word
+       addu     a1,a1,28               //
+90:    subu     a2,a3,a0               // compute remaining bytes
+
+//
+// Compare 1-byte blocks.
+//
+
+100:    addu    t2,a0,a2                // compute ending block address
+        beq     zero,a2,120f            // if eq, buffers equal
+110:    lb      t0,0(a0)                // compare 1-byte block
+        lb      t1,0(a1)                //
+        addu    a1,a1,1                 // advance pointers to next block
+        bne     t0,t1,130f              // if ne, byte not equal
+        addu    a0,a0,1                 //
+        bne     a0,t2,110b              // if ne, more 1-byte block to zero
+
+120:    move    v0,zero                 // source1 == source2
+        j       ra                      // return
+
+130:    sltu    v0,t1,t0                // compare source1 to source2
+        beq     v0,zero,140f
+        j       ra                      // return, source1 > source2
+140:
+        li      v0,-1
+        j       ra                      // return, source1 < source2
+
+        .end    memcmp
+
+        SBTTL("Move Memory")
+//++
+//
+// VOID
+// RtlMoveMemory (
+//    IN PVOID Destination,
+//    IN PVOID Source,
+//    IN ULONG Length
+//    )
+//
+// Routine Description:
+//
+//    This function moves memory either forward or backward, aligned or
+//    unaligned, in 32-byte blocks, followed by 4-byte blocks, followed
+//    by any remaining bytes.
+//
+// Arguments:
+//
+//    Destination (a0) - Supplies a pointer to the destination address of
+//       the move operation.
+//
+//    Source (a1) - Supplies a pointer to the source address of the move
+//       operation.
+//
+//    Length (a2) - Supplies the length, in bytes, of the memory to be moved.
+//
+// Return Value:
+//
+//    None.
+//
+//    N.B. The C runtime entry points memmove and memcpy are equivalent to
+//         RtlMoveMemory thus alternate entry points are provided for these
+//         routines.
+//--
+
+        LEAF_ENTRY(memmove)
+        j      memcpy
+        .end   memmove
+
+        LEAF_ENTRY(memcpy)
+
+        move    v0,a0                   // return destination
+
+//
+// If the source address is less than the destination address and source
+// address plus the length of the move is greater than the destination
+// address, then the source and destination overlap such that the move
+// must be performed backwards.
+//
+
+10:     bgeu    a1,a0,MoveForward       // if geu, no overlap possible
+        addu    t0,a1,a2                // compute source ending address
+        bgtu    t0,a0,MoveBackward      // if gtu, source and destination overlap
+
+//
+// Move memory forward aligned and unaligned.
+//
+
+MoveForward:                            //
+        sltu    t0,a2,4                 // check if less than four bytes
+        bne     zero,t0,50f             // if ne, less than four bytes to move
+        xor     t0,a0,a1                // compare alignment bits
+        and     t0,t0,0x3               // isolate alignment comparison
+        bne     zero,t0,MoveForwardUnaligned // if ne, incompatible alignment
+
+//
+// Move memory forward aligned.
+//
+
+MoveForwardAligned:                     //
+        subu    t0,zero,a0              // compute bytes until aligned
+        and     t0,t0,0x3               // isolate residual byte count
+        subu    a2,a2,t0                // reduce number of bytes to move
+        beq     zero,t0,10f             // if eq, already aligned
+        lwr     t1,0(a1)                // move unaligned bytes
+        swr     t1,0(a0)                //
+        addu    a0,a0,t0                // align destination address
+        addu    a1,a1,t0                // align source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10:     and     t0,a2,32 - 1            // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        addu    t8,a0,t1                // compute ending block address
+        beq     zero,t1,30f             // if eq, no 32-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 32-byte blocks.
+//
+
+#if defined(R4000)
+
+        and     t0,a0,1 << 2            // check if destination quadword aligned
+        beq     zero,t0,15f             // if eq, destination quadword aligned
+        lw      t0,0(a1)                // get source longword
+        addu    a1,a1,4                 // align source address
+        sw      t0,0(a0)                // store destination longword
+        addu    a0,a0,4                 // align destination address
+        addu    a2,a2,t1                // recompute bytes to move
+        subu    a2,a2,4                 // reduce count by 4
+        b       10b                     //
+
+//
+// The destination is quadword aligned, check the source operand.
+//
+
+15:     and     t0,a1,1 << 2            // check if source quadword aligned
+        beq     zero,t0,22f             // if eq, source quadword aligned
+
+//
+// The source is longword aligned and the destination is quadword aligned.
+//
+
+        .set    noreorder
+20:     lwc1    f0,0(a1)                // move 32-byte block
+        lwc1    f1,4(a1)                //
+        lwc1    f2,8(a1)                //
+        lwc1    f3,12(a1)               //
+        lwc1    f4,16(a1)               //
+        lwc1    f5,20(a1)               //
+        lwc1    f6,24(a1)               //
+        lwc1    f7,28(a1)               //
+        sdc1    f0,0(a0)                //
+        sdc1    f2,8(a0)                //
+        sdc1    f4,16(a0)               //
+        sdc1    f6,24(a0)               //
+        addu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        addu    a1,a1,32                //
+        .set    reorder
+
+        b       30f                     //
+
+//
+// Both the source and the destination are quadword aligned.
+//
+
+22:     and     t0,t1,1 << 5            // test if even number of 32-byte blocks
+        beq     zero,t0,26f             // if eq, even number of 32-byte blocks
+
+//
+// Move one 32-byte block quadword aligned.
+//
+
+        .set    noreorder
+        ldc1    f0,0(a1)                // move 32-byte block
+        ldc1    f2,8(a1)                //
+        ldc1    f4,16(a1)               //
+        ldc1    f6,24(a1)               //
+        sdc1    f0,0(a0)                //
+        sdc1    f2,8(a0)                //
+        sdc1    f4,16(a0)               //
+        sdc1    f6,24(a0)               //
+        addu    a0,a0,32                // advance pointers to next block
+        beq     a0,t8,30f               // if eq, end of block
+        addu    a1,a1,32                //
+        .set    reorder
+
+//
+// Move 64-byte blocks quadword aligned.
+//
+
+        .set    noreorder
+26:     ldc1    f0,0(a1)                // move 64-byte block
+        ldc1    f2,8(a1)                //
+        ldc1    f4,16(a1)               //
+        ldc1    f6,24(a1)               //
+        ldc1    f8,32(a1)               //
+        ldc1    f10,40(a1)              //
+        ldc1    f12,48(a1)              //
+        ldc1    f14,56(a1)              //
+        sdc1    f0,0(a0)                //
+        sdc1    f2,8(a0)                //
+        sdc1    f4,16(a0)               //
+        sdc1    f6,24(a0)               //
+        sdc1    f8,32(a0)               //
+        sdc1    f10,40(a0)              //
+        sdc1    f12,48(a0)              //
+        sdc1    f14,56(a0)              //
+        addu    a0,a0,64                // advance pointers to next block
+        bne     a0,t8,26b               // if ne, more 64-byte blocks to zero
+        addu    a1,a1,64                //
+        .set    reorder
+
+#endif
+
+//
+// The source is longword aligned and the destination is longword aligned.
+//
+
+#if defined(R3000)
+
+        .set    noreorder
+20:     lw      t0,0(a1)                // move 32-byte block
+        lw      t1,4(a1)                //
+        lw      t2,8(a1)                //
+        lw      t3,12(a1)               //
+        lw      t4,16(a1)               //
+        lw      t5,20(a1)               //
+        lw      t6,24(a1)               //
+        lw      t7,28(a1)               //
+        sw      t0,0(a0)                //
+        sw      t1,4(a0)                //
+        sw      t2,8(a0)                //
+        sw      t3,12(a0)               //
+        sw      t4,16(a0)               //
+        sw      t5,20(a0)               //
+        sw      t6,24(a0)               //
+        sw      t7,28(a0)               //
+        addu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        addu    a1,a1,32                //
+        .set    reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30:     and     t0,a2,4 - 1             // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        addu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,50f             // if eq, no 4-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+        .set    noreorder
+40:     lw      t0,0(a1)                // move 4-byte block
+        addu    a0,a0,4                 // advance pointers to next block
+        sw      t0,-4(a0)               //
+        bne     a0,t2,40b               // if ne, more 4-byte blocks to zero
+        addu    a1,a1,4                 //
+        .set    reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50:     addu    t2,a0,a2                // compute ending block address
+        beq     zero,a2,70f             // if eq, no bytes to zero
+
+        .set    noreorder
+60:     lb      t0,0(a1)                // move 1-byte block
+        addu    a0,a0,1                 // advance pointers to next block
+        sb      t0,-1(a0)               //
+        bne     a0,t2,60b               // if ne, more 1-byte block to zero
+        addu    a1,a1,1                 //
+        .set    reorder
+
+70:     j       ra                      // return
+
+//
+// Move memory forward unaligned.
+//
+
+MoveForwardUnaligned:                   //
+        subu    t0,zero,a0              // compute bytes until aligned
+        and     t0,t0,0x3               // isolate residual byte count
+        subu    a2,a2,t0                // reduce number of bytes to move
+        beq     zero,t0,10f             // if eq, already aligned
+        lwr     t1,0(a1)                // move unaligned bytes
+        lwl     t1,3(a1)                //
+        swr     t1,0(a0)                //
+        addu    a0,a0,t0                // align destination address
+        addu    a1,a1,t0                // update source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10:     and     t0,a2,32 - 1            // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        addu    t8,a0,t1                // compute ending block address
+        beq     zero,t1,30f             // if eq, no 32-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+        .set    noreorder
+20:     lwr     t0,0(a1)                // move 32-byte block
+        lwl     t0,3(a1)                //
+        lwr     t1,4(a1)                //
+        lwl     t1,7(a1)                //
+        lwr     t2,8(a1)                //
+        lwl     t2,11(a1)               //
+        lwr     t3,12(a1)               //
+        lwl     t3,15(a1)               //
+        lwr     t4,16(a1)               //
+        lwl     t4,19(a1)               //
+        lwr     t5,20(a1)               //
+        lwl     t5,23(a1)               //
+        lwr     t6,24(a1)               //
+        lwl     t6,27(a1)               //
+        lwr     t7,28(a1)               //
+        lwl     t7,31(a1)               //
+        sw      t0,0(a0)                //
+        sw      t1,4(a0)                //
+        sw      t2,8(a0)                //
+        sw      t3,12(a0)               //
+        sw      t4,16(a0)               //
+        sw      t5,20(a0)               //
+        sw      t6,24(a0)               //
+        sw      t7,28(a0)               //
+        addu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        addu    a1,a1,32                //
+        .set    reorder
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30:     and     t0,a2,4 - 1             // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        addu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,50f             // if eq, no 4-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+        .set    noreorder
+40:     lwr     t0,0(a1)                // move 4-byte block
+        lwl     t0,3(a1)                //
+        addu    a0,a0,4                 // advance pointers to next block
+        sw      t0,-4(a0)               //
+        bne     a0,t2,40b               // if ne, more 4-byte blocks to zero
+        addu    a1,a1,4                 //
+        .set    reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50:     addu    t2,a0,a2                // compute ending block address
+        beq     zero,a2,70f             // if eq, no bytes to zero
+
+        .set    noreorder
+60:     lb      t0,0(a1)                // move 1-byte block
+        addu    a0,a0,1                 // advance pointers to next block
+        sb      t0,-1(a0)               //
+        bne     a0,t2,60b               // if ne, more 1-byte block to zero
+        addu    a1,a1,1                 //
+        .set    reorder
+
+70:     j       ra                      // return
+
+//
+// Move memory backward.
+//
+
+MoveBackward:                           //
+        addu    a0,a0,a2                // compute ending destination address
+        addu    a1,a1,a2                // compute ending source address
+        sltu    t0,a2,4                 // check if less than four bytes
+        bne     zero,t0,50f             // if ne, less than four bytes to move
+        xor     t0,a0,a1                // compare alignment bits
+        and     t0,t0,0x3               // isolate alignment comparison
+        bne     zero,t0,MoveBackwardUnaligned // if ne, incompatible alignment
+
+//
+// Move memory backward aligned.
+//
+
+MoveBackwardAligned:                    //
+        and     t0,a0,0x3               // isolate residual byte count
+        subu    a2,a2,t0                // reduce number of bytes to move
+        beq     zero,t0,10f             // if eq, already aligned
+        lwl     t1,-1(a1)               // move unaligned bytes
+        swl     t1,-1(a0)               //
+        subu    a0,a0,t0                // align destination address
+        subu    a1,a1,t0                // align source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10:     and     t0,a2,32 - 1            // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        subu    t8,a0,t1                // compute ending block address
+        beq     zero,t1,30f             // if eq, no 32-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+#if defined(R4000)
+
+        and     t0,a0,1 << 2            // check if destination quadword aligned
+        beq     zero,t0,15f             // if eq, destination quadword aligned
+        lw      t0,-4(a1)               // get source longword
+        subu    a1,a1,4                 // align source address
+        sw      t0,-4(a0)               // store destination longword
+        subu    a0,a0,4                 // align destination address
+        addu    a2,a2,t1                // recompute byte to move
+        subu    a2,a2,4                 // reduce count by 4
+        b       10b                     //
+
+//
+// The destination is quadword aligned, check the source operand.
+//
+
+15:     and     t0,a1,1 << 2            // check if source quadword aligned
+        beq     zero,t0,22f             // if eq, source quadword aligned
+
+//
+// The source is longword aligned and the destination is quadword aligned.
+//
+
+        .set    noreorder
+20:     lwc1    f1,-4(a1)               // move 32-byte block
+        lwc1    f0,-8(a1)               //
+        lwc1    f3,-12(a1)              //
+        lwc1    f2,-16(a1)              //
+        lwc1    f5,-20(a1)              //
+        lwc1    f4,-24(a1)              //
+        lwc1    f7,-28(a1)              //
+        lwc1    f6,-32(a1)              //
+        sdc1    f0,-8(a0)               //
+        sdc1    f2,-16(a0)              //
+        sdc1    f4,-24(a0)              //
+        sdc1    f6,-32(a0)              //
+        subu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        subu    a1,a1,32                //
+        .set    reorder
+
+        b       30f                     //
+
+//
+// Both the source and the destination are quadword aligned.
+//
+
+22:     and     t0,t1,1 << 5            // test if even number of 32-byte blocks
+        beq     zero,t0,26f             // if eq, even number of 32-byte blocks
+
+//
+// Move one 32-byte block quadword aligned.
+//
+
+        .set    noreorder
+        ldc1    f0,-8(a1)               // move 32-byte block
+        ldc1    f2,-16(a1)              //
+        ldc1    f4,-24(a1)              //
+        ldc1    f6,-32(a1)              //
+        sdc1    f0,-8(a0)               //
+        sdc1    f2,-16(a0)              //
+        sdc1    f4,-24(a0)              //
+        sdc1    f6,-32(a0)              //
+        subu    a0,a0,32                // advance pointers to next block
+        beq     a0,t8,30f               // if eq, end of block
+        subu    a1,a1,32                //
+        .set    reorder
+
+//
+// Move 64-byte blocks quadword aligned.
+//
+
+        .set    noreorder
+26:     ldc1    f0,-8(a1)               // move 64-byte block
+        ldc1    f2,-16(a1)              //
+        ldc1    f4,-24(a1)              //
+        ldc1    f6,-32(a1)              //
+        ldc1    f8,-40(a1)              //
+        ldc1    f10,-48(a1)             //
+        ldc1    f12,-56(a1)             //
+        ldc1    f14,-64(a1)             //
+        sdc1    f0,-8(a0)               //
+        sdc1    f2,-16(a0)              //
+        sdc1    f4,-24(a0)              //
+        sdc1    f6,-32(a0)              //
+        sdc1    f8,-40(a0)              //
+        sdc1    f10,-48(a0)             //
+        sdc1    f12,-56(a0)             //
+        sdc1    f14,-64(a0)             //
+        subu    a0,a0,64                // advance pointers to next block
+        bne     a0,t8,26b               // if ne, more 64-byte blocks to zero
+        subu    a1,a1,64                //
+        .set    reorder
+
+#endif
+
+//
+// The source is longword aligned and the destination is longword aligned.
+//
+
+#if defined(R3000)
+
+        .set    noreorder
+20:     lw      t0,-4(a1)               // move 32-byte block
+        lw      t1,-8(a1)               //
+        lw      t2,-12(a1)              //
+        lw      t3,-16(a1)              //
+        lw      t4,-20(a1)              //
+        lw      t5,-24(a1)              //
+        lw      t6,-28(a1)              //
+        lw      t7,-32(a1)              //
+        sw      t0,-4(a0)               //
+        sw      t1,-8(a0)               //
+        sw      t2,-12(a0)              //
+        sw      t3,-16(a0)              //
+        sw      t4,-20(a0)              //
+        sw      t5,-24(a0)              //
+        sw      t6,-28(a0)              //
+        sw      t7,-32(a0)              //
+        subu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        subu    a1,a1,32                //
+        .set    reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30:     and     t0,a2,4 - 1             // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        subu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,50f             // if eq, no 4-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+        .set    noreorder
+40:     lw      t0,-4(a1)               // move 4-byte block
+        subu    a0,a0,4                 // advance pointers to next block
+        sw      t0,0(a0)                //
+        bne     a0,t2,40b               // if ne, more 4-byte blocks to zero
+        subu    a1,a1,4                 //
+        .set    reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50:     subu    t2,a0,a2                // compute ending block address
+        beq     zero,a2,70f             // if eq, no bytes to zero
+
+        .set    noreorder
+60:     lb      t0,-1(a1)               // move 1-byte block
+        subu    a0,a0,1                 // advance pointers to next block
+        sb      t0,0(a0)                //
+        bne     a0,t2,60b               // if ne, more 1-byte block to zero
+        subu    a1,a1,1                 //
+        .set    reorder
+
+70:     j       ra                      // return
+
+//
+// Move memory backward unaligned.
+//
+
+MoveBackwardUnaligned:                  //
+        and     t0,a0,0x3               // isolate residual byte count
+        subu    a2,a2,t0                // reduce number of bytes to move
+        beq     zero,t0,10f             // if eq, already aligned
+        lwl     t1,-1(a1)               // move unaligned bytes
+        lwr     t1,-4(a1)               //
+        swl     t1,-1(a0)               //
+        subu    a0,a0,t0                // align destination address
+        subu    a1,a1,t0                // update source address
+
+//
+// Check for 32-byte blocks to move.
+//
+
+10:     and     t0,a2,32 - 1            // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        subu    t8,a0,t1                // compute ending block address
+        beq     zero,t1,30f             // if eq, no 32-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 32-byte block.
+//
+
+        .set    noreorder
+20:     lwr     t0,-4(a1)               // move 32-byte block
+        lwl     t0,-1(a1)               //
+        lwr     t1,-8(a1)               //
+        lwl     t1,-5(a1)               //
+        lwr     t2,-12(a1)              //
+        lwl     t2,-9(a1)               //
+        lwr     t3,-16(a1)              //
+        lwl     t3,-13(a1)              //
+        lwr     t4,-20(a1)              //
+        lwl     t4,-17(a1)              //
+        lwr     t5,-24(a1)              //
+        lwl     t5,-21(a1)              //
+        lwr     t6,-28(a1)              //
+        lwl     t6,-25(a1)              //
+        lwr     t7,-32(a1)              //
+        lwl     t7,-29(a1)              //
+        sw      t0,-4(a0)               //
+        sw      t1,-8(a0)               //
+        sw      t2,-12(a0)              //
+        sw      t3,-16(a0)              //
+        sw      t4,-20(a0)              //
+        sw      t5,-24(a0)              //
+        sw      t6,-28(a0)              //
+        sw      t7,-32(a0)              //
+        subu    a0,a0,32                // advance pointers to next block
+        bne     a0,t8,20b               // if ne, more 32-byte blocks to zero
+        subu    a1,a1,32                //
+        .set    reorder
+
+//
+// Check for 4-byte blocks to move.
+//
+
+30:     and     t0,a2,4 - 1             // isolate residual bytes
+        subu    t1,a2,t0                // subtract out residual bytes
+        subu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,50f             // if eq, no 4-byte block to zero
+        move    a2,t0                   // set residual number of bytes
+
+//
+// Move 4-byte block.
+//
+
+        .set    noreorder
+40:     lwr     t0,-4(a1)               // move 4-byte block
+        lwl     t0,-1(a1)               //
+        subu    a0,a0,4                 // advance pointers to next block
+        sw      t0,0(a0)                //
+        bne     a0,t2,40b               // if ne, more 4-byte blocks to zero
+        subu    a1,a1,4                 //
+        .set    reorder
+
+//
+// Move 1-byte blocks.
+//
+
+50:     subu    t2,a0,a2                // compute ending block address
+        beq     zero,a2,70f             // if eq, no bytes to zero
+
+        .set    noreorder
+60:     lb      t0,-1(a1)               // move 1-byte block
+        subu    a0,a0,1                 // advance pointers to next block
+        sb      t0,0(a0)                //
+        bne     a0,t2,60b               // if ne, more 1-byte block to zero
+        subu    a1,a1,1                 //
+        .set    reorder
+
+70:     j       ra                      // return
+
+        .end    memcpy
+
+        SBTTL("Fill Memory")
+//++
+//
+// VOID
+// RtlFillMemory (
+//    IN PVOID Destination,
+//    IN ULONG Length,
+//    IN UCHAR Fill
+//    )
+//
+// Routine Description:
+//
+//    This function fills memory by first aligning the destination address to
+//    a longword boundary, and then filling 32-byte blocks, followed by 4-byte
+//    blocks, followed by any remaining bytes.
+//
+// Arguments:
+//
+//    Destination (a0) - Supplies a pointer to the memory to fill.
+//
+//    Length (a1) - Supplies the length, in bytes, of the memory to be filled.
+//
+//    Fill (a2) - Supplies the fill byte.
+//
+//    N.B. The alternate entry memset expects the length and fill arguments
+//         to be reversed.
+//
+// Return Value:
+//
+//    None.
+//
+//--
+
+        LEAF_ENTRY(memset)
+
+        move    a3,a1                   // swap length and fill arguments
+        move    a1,a2                   //
+        move    a2,a3                   //
+        move    v0,a0                   // return destination
+
+        and     a2,a2,0xff              // clear excess bits
+        sll     t0,a2,8                 // duplicate fill byte
+        or      a2,a2,t0                // generate fill word
+        sll     t0,a2,16                // duplicate fill word
+        or      a2,a2,t0                // generate fill longword
+
+//
+// Fill memory with the pattern specified in register a2.
+//
+
+#if DBG
+
+        mtc1    a2,f0                   // set pattern to store
+        mtc1    a2,f1                   //
+
+#endif
+
+        subu    t0,zero,a0              // compute bytes until aligned
+        and     t0,t0,0x3               // isolate residual byte count
+        subu    t1,a1,t0                // reduce number of bytes to fill
+        blez    t1,60f                  // if lez, less than 4 bytes to fill
+        move    a1,t1                   // set number of bytes to fill
+        beq     zero,t0,10f             // if eq, already aligned
+        swr     a2,0(a0)                // fill unaligned bytes
+        addu    a0,a0,t0                // align destination address
+
+//
+// Check for 32-byte blocks to fill.
+//
+
+10:     and     t0,a1,32 - 1            // isolate residual bytes
+        subu    t1,a1,t0                // subtract out residual bytes
+        addu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,40f             // if eq, no 32-byte blocks to fill
+        move    a1,t0                   // set residual number of bytes
+
+//
+// Fill 32-byte blocks.
+//
+
+#if defined(R4000)
+
+        and     t0,a0,1 << 2            // check if destintion quadword aligned
+        beq     zero,t0,20f             // if eq, yes
+        sw      a2,0(a0)                // store destination longword
+        addu    a0,a0,4                 // align destination address
+        addu    a1,a1,t1                // recompute bytes to fill
+        subu    a1,a1,4                 // reduce count by 4
+        b       10b                     //
+
+//
+// The destination is quadword aligned.
+//
+
+20:     mtc1    a2,f0                   // set pattern value
+        mtc1    a2,f1                   //
+        and     t0,t1,1 << 5            // test if even number of 32-byte blocks
+        beq     zero,t0,30f             // if eq, even number of 32-byte blocks
+
+//
+// Fill one 32-byte block.
+//
+
+        .set    noreorder
+        sdc1    f0,0(a0)                // fill 32-byte block
+        sdc1    f0,8(a0)                //
+        sdc1    f0,16(a0)               //
+        addu    a0,a0,32                // advance pointer to next block
+        beq     a0,t2,40f               // if ne, no 64-byte blocks to fill
+        sdc1    f0,-8(a0)               //
+        .set    reorder
+
+//
+// Fill 64-byte block.
+//
+
+        .set    noreorder
+30:     sdc1    f0,0(a0)                // fill 32-byte block
+        sdc1    f0,8(a0)                //
+        sdc1    f0,16(a0)               //
+        sdc1    f0,24(a0)               //
+        sdc1    f0,32(a0)               //
+        sdc1    f0,40(a0)               //
+        sdc1    f0,48(a0)               //
+        addu    a0,a0,64                // advance pointer to next block
+        bne     a0,t2,30b               // if ne, more 32-byte blocks to fill
+        sdc1    f0,-8(a0)               //
+        .set    reorder
+
+#endif
+
+//
+// Fill 32-byte blocks.
+//
+
+#if defined(R3000)
+
+        .set    noreorder
+20:     sw      a2,0(a0)                // fill 32-byte block
+        sw      a2,4(a0)                //
+        sw      a2,8(a0)                //
+        sw      a2,12(a0)               //
+        addu    a0,a0,32                // advance pointer to next block
+        sw      a2,-4(a0)               //
+        sw      a2,-8(a0)               //
+        sw      a2,-12(a0)              //
+        bne     a0,t2,20b               // if ne, more 32-byte blocks to fill
+        sw      a2,-16(a0)              //
+        .set    reorder
+
+#endif
+
+//
+// Check for 4-byte blocks to fill.
+//
+
+40:     and     t0,a1,4 - 1             // isolate residual bytes
+        subu    t1,a1,t0                // subtract out residual bytes
+        addu    t2,a0,t1                // compute ending block address
+        beq     zero,t1,60f             // if eq, no 4-byte block to fill
+        move    a1,t0                   // set residual number of bytes
+
+//
+// Fill 4-byte blocks.
+//
+
+        .set    noreorder
+50:     addu    a0,a0,4                 // advance pointer to next block
+        bne     a0,t2,50b               // if ne, more 4-byte blocks to fill
+        sw      a2,-4(a0)               // fill 4-byte block
+        .set    reorder
+
+//
+// Check for 1-byte blocks to fill.
+//
+
+60:     addu    t2,a0,a1                // compute ending block address
+        beq     zero,a1,80f             // if eq, no bytes to fill
+
+//
+// Fill 1-byte blocks.
+//
+
+        .set    noreorder
+70:     addu    a0,a0,1                 // advance pointer to next block
+        bne     a0,t2,70b               // if ne, more 1-byte block to fill
+        sb      a2,-1(a0)               // fill 1-byte block
+        .set    reorder
+
+#if DBG
+
+80:     mfc1    t0,f0                   // get fill pattern
+        mfc1    t1,f1                   //
+        bne     t0,a2,90f               // if ne, pattern altered
+        bne     t1,a2,90f               // if ne, pattern altered
+        j       ra                      // return
+
+90:     break   KERNEL_BREAKPOINT       //
+
+#else
+
+80:     j       ra                      // return
+
+#endif
+
+        .end    memset
diff --git a/private/crt32/string/mips/memsetm.s b/private/crt32/string/mips/memsetm.s
new file mode 100644
index 000000000..a53f8f0a1
--- /dev/null
+++ b/private/crt32/string/mips/memsetm.s
@@ -0,0 +1,105 @@
+/* --------------------------------------------------- */
+/* | Copyright (c) 1986 MIPS Computer Systems, Inc.  | */
+/* | All Rights Reserved.                            | */
+/* --------------------------------------------------- */
+/* $Revision: 1.3 $ */
+
+/*
+ * char * 
+ * memset(s, c, n)
+ * 	register char * s;
+ * 	register c, n;
+ * {
+ * 	register char * p = s;
+ * 
+ * 	while (--n >= 0)
+ * 		*s++ = c;
+ * 
+ * 	return (p);
+ * }
+ */
+
+/*
+ * Copyright 1986 by MIPS Computer Systems, Inc.
+ */
+
+#include <kxmips.h>
+
+#define	NBPW	4
+
+/*
+ * memset(dst, c, bcount)
+ * set block of memory with blanks
+ *
+ * Calculating MINSET, assuming 10% cache-miss on non-loop code:
+ * Overhead =~ 18 instructions => 28 (30) cycles
+ * Byte set =~ 12 (24) cycles/word for 08M44 (08V11)
+ * Word set =~ 3 (5) cycles/word for 08M44 (08V11)
+ * If I-cache-miss nears 0, MINSET ==> 4 bytes; otherwise, times are:
+ * breakeven (MEM) = 28 / (12 - 3) =~ 3 words
+ * breakeven (VME) = 30 / (24 - 5)  =~ 1.5 words
+ * Since the overhead is pessimistic (worst-case alignment), and many calls
+ * will be for well-aligned data, and since Word-set at least leaves
+ * the set in the cache, we shade these values (6-12) down to 8 bytes
+ */
+#define	MINSET	8
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+   smaller-vs-bigger address rather than left-vs-right.
+   Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+LEAF_ENTRY(memset)
+	move	v0,a0			# return first argument; BDSLOT
+	blt	a2,MINSET,byteset
+	subu	v1,zero,a0		# number of bytes til aligned; BDSLOT
+	beq	a1,$0,1f		# make memset(s, 0, n) faster
+	sll	t0,a1,8
+	or	a1,t0
+	sll	t0,a1,16
+	or	a1,t0
+1:	and	v1,NBPW-1
+	subu	a2,v1			# adjust count; BDSLOT
+	beq	v1,zero,blkset		# already aligned
+	SWS	a1,0(a0)
+	addu	a0,v1
+
+/*
+ * set 8 byte, aligned block (no point in unrolling further,
+ * since maximum write rate in M/500 is two cycles/word write)
+ */
+blkset:
+	and	t0,a2,NBPW+NBPW-1	# count after by-8-byte loop done
+	subu	a3,a2,t0		# total in 8 byte chunks; BDSLOT
+	beq	a2,t0,wordset		# less than 8 bytes to set
+	addu	a3,a0			# dst endpoint
+1:	addu	a0,NBPW+NBPW
+	sw	a1,-NBPW-NBPW(a0)
+	sw	a1,-NBPW(a0)
+	bne	a0,a3,1b
+	move	a2,t0			# set end-of loop count
+
+/*
+ * do a word (if required) this is not a loop since loop above
+ * guarantees that at most one word must be written here.
+ */
+wordset:
+	and	t0,a2,NBPW		# count after by-word non-loop done
+	subu	a2,t0			# adjust count; BDSLOT
+	beq	t0,zero,byteset		# less than word to set
+	sw	a1,0(a0)
+	addu	a0,NBPW
+
+byteset:
+	addu	a3,a2,a0		# dst endpoint; BDSLOT
+	ble	a2,zero,setdone
+1:	addu	a0,1
+	sb	a1,-1(a0)
+	bne	a0,a3,1b
+setdone:
+	j	ra
+.end memset
diff --git a/private/crt32/string/mips/memsett.c b/private/crt32/string/mips/memsett.c
new file mode 100644
index 000000000..c653803ab
--- /dev/null
+++ b/private/crt32/string/mips/memsett.c
@@ -0,0 +1,20 @@
+char buffer[100];
+#include <stdio.h>
+#include <string.h>
+
+void main()
+{
+        char *f = buffer;
+	char *g = buffer;
+
+        printf("%8.8x\n", f);
+        f=(char*)memset(f,0x0a,12);
+        printf("%8.8x\n", f);
+
+	if (f == g) {
+		int k = 12;
+		while (k--)
+			printf("%2.2x", *f++);
+	}
+}
+
diff --git a/private/crt32/string/mips/strcatm.s b/private/crt32/string/mips/strcatm.s
new file mode 100644
index 000000000..0c22c47de
--- /dev/null
+++ b/private/crt32/string/mips/strcatm.s
@@ -0,0 +1,98 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+/*  strcat.s 1.1 */
+
+/* This function is an assembly-code replacement for the libc function
+ * strcat.
+   
+ * strcat and strcpy are very similar, but we waste about 40 words of
+ * code when both are used, so that they can be independently replaced.
+
+ * There are one caveat to consider: this function is written in
+ * assembler code, and as such, cannot be merged using the U-code
+ * loader. */
+
+/* Craig Hansen - 3-September-86 */
+
+#include <kxmips.h>
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+   smaller-vs-bigger address rather than left-vs-right.
+   Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+.text	
+
+LEAF_ENTRY(strcat)
+.set noreorder
+	// a0/ destination
+	// a1/ source
+	move	v0, a0		# a copy of destination address is returned
+$findz: lb	t0,0(a0)
+	nop
+	bne	t0,0,$findz
+	add	a0,1
+	// go back over null byte
+	add	a0,-1
+	// start up first word
+	// adjust pointers so that a0 points to next word
+	// t7 = a1 adjusted by same amount minus one
+	// t0,t1,t2,t3 are filled with 4 consecutive bytes
+	// t4 is filled with the same 4 bytes in a single word
+	lb	t0, 0(a1)
+	ori	t5, a0, 3	# get an early start
+	beq	t0, 0, $doch0
+	sub	t6, t5, a0	# number of char in 1st word of dest - 1
+	lb	t1, 1(a1)
+	add	t7, a1, t6	# offset starting point for source string
+	beq	t1, 0, $doch1
+	nop
+	lb	t2, 2(a1)
+	nop
+	beq	t2, 0, $doch2
+	LWS	t4, 0(a1)	# safe: always in same word as 0(a1)
+	lb	t3, 3(a1)
+	LWB	t4, 3(a1)	# fill out word
+	beq	t3, 0, $doch3
+	SWS	t4, 0(a0)	# store entire or part word
+	addi	a0, t5, 1-4	# adjust destination ptr
+
+	// inner loop
+1:	lb	t0, 1(t7)
+	addi	t7, 4
+	beq	t0, 0, $doch0
+	addi	a0, 4
+	lb	t1, 1+1-4(t7)
+	nop
+	beq	t1, 0, $doch1
+	nop
+	lb	t2, 2+1-4(t7)
+	nop
+	beq	t2, 0, $doch2
+	LWS	t4, 0+1-4(t7)
+	lb	t3, 3+1-4(t7)
+	LWB	t4, 3+1-4(t7)
+	bne	t3, 0, 1b
+	sw	t4, 0(a0)
+	j	ra
+	nop
+
+	// store four bytes using swl/swr
+$doch3:	j	ra
+	SWB	t4, 3(a0)
+	// store up to three bytes, a byte at a time.
+$doch2:	sb	t2, 2(a0)
+$doch1:	sb	t1, 1(a0)
+$doch0:	j	ra
+	sb	t0, 0(a0)
+
+.end strcat
diff --git a/private/crt32/string/mips/strchrm.s b/private/crt32/string/mips/strchrm.s
new file mode 100644
index 000000000..b92f79fd6
--- /dev/null
+++ b/private/crt32/string/mips/strchrm.s
@@ -0,0 +1,26 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/index.s,v 1.3 1992/03/07 15:37:04 jleong Exp $"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+#include "kxmips.h"
+
+LEAF_ENTRY(strchr)
+1:	lbu	a2,0(a0)
+	addu	a0,1
+	beq	a2,a1,2f
+	bne	a2,zero,1b
+	move	v0,zero
+	j	ra
+
+2:	subu	v0,a0,1
+	j	ra
+.end	strchr
diff --git a/private/crt32/string/mips/strchrt.c b/private/crt32/string/mips/strchrt.c
new file mode 100644
index 000000000..1da4e1d0d
--- /dev/null
+++ b/private/crt32/string/mips/strchrt.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+void main( int argc, char **argv )
+  {
+  int c;
+  unsigned char *pstr;      
+  unsigned char string[100];
+  
+  strcpy(string, "ABCDEFGHIJKLMNOPQRST");
+  for (c = 'a'; c <= UCHAR_MAX; c++)
+    {
+    string[9] = c;
+    pstr = strchr( string, c);
+    if (!pstr)
+      printf("Fail - Could not find %d in %s\n", c, string);
+    }
+  return;
+  }
diff --git a/private/crt32/string/mips/strcmpm.s b/private/crt32/string/mips/strcmpm.s
new file mode 100644
index 000000000..dfaaa9c39
--- /dev/null
+++ b/private/crt32/string/mips/strcmpm.s
@@ -0,0 +1,50 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+/*  strcmp.s 1.1 */
+
+/* This function is an assembly-code replacement for
+   the libc function "strcmp."  */
+/* Libc currently has a mips-specific C version that uses 7 instructions/byte.
+   (It claims to use 6 cycles/byte, but is wrong!)
+   This function uses an unrolled loop, which uses 5 instructions per byte.
+
+   Under some circumstances more characters are read than are
+   required for determining the collating order, but it
+   never reads beyond the end of either string.
+
+   There are one caveat to consider: this function is written
+   in assembler code, and as such, cannot be merged
+   using the U-code loader. */
+
+/* Craig Hansen - 6-June-86 */
+
+#include <kxmips.h>
+
+	.text	
+
+LEAF_ENTRY(strcmp)
+
+	.set	noreorder
+	lbu	t0,0(a0)
+1:	lbu	t1,0(a1)
+	beq	t0,0,2f
+	addi	a0,2
+	bne	t0,t1,3f
+	lbu	t2,-1(a0)	# ok to load since -2(a0)!=0
+	lbu	t1,1(a1)
+	beq	t2,0,2f
+	addi	a1,2
+	beq	t2,t1,1b
+	lbu	t0,0(a0)	# ok to load since -1(a0) != 0
+	j	ra
+	subu	v0,t2,t1	
+2:	j	ra
+	subu	v0,zero,t1
+3:	j	ra
+	subu	v0,t0,t1
+	.end	strcmp
diff --git a/private/crt32/string/mips/strcpym.s b/private/crt32/string/mips/strcpym.s
new file mode 100644
index 000000000..4f5bc416a
--- /dev/null
+++ b/private/crt32/string/mips/strcpym.s
@@ -0,0 +1,148 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+/*  strcpy.s 1.2 */
+
+/* This function is an assembly-code replacement for the libc function
+ * strcpy.  It uses the MIPS special instructions "lwl", "lwr", "swl",
+ * and "swr", which handle unaligned words.
+
+ * The standard C version of this function is a 5-instruction loop,
+ * working one byte at a time:
+
+ * Copy string s2 to s1.  s1 must be large enough.
+ * return s1
+ *	char *strcpy(s1, s2)
+ *	register char *s1, *s2;
+ *	{
+ *		register char *os1;
+ *		os1 = s1;
+ *		while (*s1++ = *s2++);
+ *		return(os1);
+ *	}
+
+ * A better C version is 4 cycles/byte. Loop is unrolled once.
+ * char *
+ * strcpy(s1, s2)
+ * register char *s1, *s2;
+ * {
+ * 	register char *os1 = s1;
+ * 	while (1) {
+ * 		register unsigned c;
+ * 		c = s2[0];
+ * 		s2 += 2;
+ * 		s1[0] = c;
+ * 		if (c == 0) break;
+ * 		c = s2[1-2];
+ * 		s1 += 2;
+ * 		s1[1-2] = c;
+ * 		if (c == 0) break;
+ * 	}
+ * 	return(os1);
+ * }
+
+ * This function starts with an unrolled loop, which uses 5
+ * instructions per byte (including the store bytes at the end) for
+ * the first few bytes.
+
+ * After filling a word, the first word or portion of a word is saved
+ * using a "swl" instruction. If the start of destination string is at
+ * a word boundary, this leaves the result valid in the cache. Because
+ * this replaces up to 4 store byte instructions, we are still near 3
+ * instructions per byte, but there is only one write.
+   
+ * The inner loop moves 4 bytes in 16 cycles, an average of 4 cycles
+ * per byte.  This is 1 cycle faster than the standard C code, the
+ * same speed as the unrolled version, and it also leaves the result
+ * valid in the cache.
+   
+ * Finally, when a zero byte is found, the end of the string is stored
+ * using store byte instructions.  This adds one instruction per byte
+ * for as much as three bytes, but elminates the up to four cycles of
+ * overhead we counted before.
+   
+ * The end result is that this function is never slower than the C
+ * function, is faster by up to 30% in instruction count, uses up to
+ * 75% fewer writes, and leaves most of the result valid in the cache.
+   
+ * There are one caveat to consider: this function is written in
+ * assembler code, and as such, cannot be merged using the U-code
+ * loader. */
+
+/* Craig Hansen - 3-September-86 */
+
+#include <kxmips.h>
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+   smaller-vs-bigger address rather than left-vs-right.
+   Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+.text
+
+LEAF_ENTRY(strcpy)
+.set noreorder
+	// a0/ destination
+	// a1/ source
+	move	v0, a0		# a copy of destination address is returned
+	// start up first word
+	// adjust pointers so that a0 points to next word
+	// t7 = a1 adjusted by same amount minus one
+	// t0,t1,t2,t3 are filled with 4 consecutive bytes
+	// t4 is filled with the same 4 bytes in a single word
+	lb	t0, 0(a1)
+	ori	t5, a0, 3	# get an early start
+	beq	t0, 0, $doch0
+	sub	t6, t5, a0	# number of char in 1st word of dest - 1
+	lb	t1, 1(a1)
+	add	t7, a1, t6	# offset starting point for source string
+	beq	t1, 0, $doch1
+	nop
+	lb	t2, 2(a1)
+	nop
+	beq	t2, 0, $doch2
+	LWS	t4, 0(a1)	# safe: always in same word as 0(a1)
+	lb	t3, 3(a1)
+	LWB	t4, 3(a1)	# fill out word
+	beq	t3, 0, $doch3
+	SWS	t4, 0(a0)	# store entire or part word
+	addi	a0, t5, 1-4	# adjust destination ptr
+
+	// inner loop
+1:	lb	t0, 1(t7)
+	addi	t7, 4
+	beq	t0, 0, $doch0
+	addi	a0, 4
+	lb	t1, 1+1-4(t7)
+	nop
+	beq	t1, 0, $doch1
+	nop
+	lb	t2, 2+1-4(t7)
+	nop
+	beq	t2, 0, $doch2
+	LWS	t4, 0+1-4(t7)
+	lb	t3, 3+1-4(t7)
+	LWB	t4, 3+1-4(t7)
+	bne	t3, 0, 1b
+	sw	t4, 0(a0)
+	j	ra
+	nop
+
+	// store four bytes using swl/swr
+$doch3:	j	ra
+	SWB	t4, 3(a0)
+	// store up to three bytes, a byte at a time.
+$doch2:	sb	t2, 2(a0)
+$doch1:	sb	t1, 1(a0)
+$doch0:	j	ra
+	sb	t0, 0(a0)
+
+.end strcpy
diff --git a/private/crt32/string/mips/strcpyt.c b/private/crt32/string/mips/strcpyt.c
new file mode 100644
index 000000000..4d0d99279
--- /dev/null
+++ b/private/crt32/string/mips/strcpyt.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <limits.h>
+
+#define SRCLEN 21   /* to avoid complicating errors */
+
+void main( int argc, char **argv )
+{
+  int c;
+  unsigned char *psrc, *pdst;      
+  unsigned char src[SRCLEN] = "ABCDEFGHIJKLMNOPQRST";
+  unsigned char dst[100];
+  
+    for (c = 'a'; c <= UCHAR_MAX; c++) {
+        src[9] = c;
+        strcpy( dst, src);
+        for (psrc = src, pdst = dst; *psrc; psrc++, pdst++) {
+            if (*psrc != *pdst) {
+                printf("Fail - Could not find '%c' 0x%x in %s\n", c, c, src);
+                break;
+            }
+        }
+    }
+}
diff --git a/private/crt32/string/mips/strlenm.s b/private/crt32/string/mips/strlenm.s
new file mode 100644
index 000000000..24027e0fc
--- /dev/null
+++ b/private/crt32/string/mips/strlenm.s
@@ -0,0 +1,19 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+/*  strlen.s 1.1 */
+
+#include <kxmips.h>
+
+LEAF_ENTRY(strlen)
+	subu	v0,a0,1
+1:	lbu	v1,1(v0)
+	add	v0,1
+	bne	v1,zero,1b
+	subu	v0,v0,a0
+	j	ra
+	.end	strlen
diff --git a/private/crt32/string/mips/strrchrm.s b/private/crt32/string/mips/strrchrm.s
new file mode 100644
index 000000000..feb1f7945
--- /dev/null
+++ b/private/crt32/string/mips/strrchrm.s
@@ -0,0 +1,24 @@
+/* ------------------------------------------------------------------ */
+/* | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights | */
+/* | Reserved.  This software contains proprietary and confidential | */
+/* | information of MIPS and its suppliers.  Use, disclosure or     | */
+/* | reproduction is prohibited without the prior express written   | */
+/* | consent of MIPS.                                               | */
+/* ------------------------------------------------------------------ */
+#ident "$Header: /disks/bits/5.1isms/irix/lib/libc/src/strings/RCS/rindex.s,v 1.3 1992/03/07 15:37:36 jleong Exp $"
+
+/*
+ * Copyright 1985 by MIPS Computer Systems, Inc.
+ */
+
+#include "kxmips.h"
+
+LEAF_ENTRY(strrchr)
+	move	v0,zero
+1:	lbu	a3,0(a0)
+	addu	a0,1
+	bne	a3,a1,2f
+	subu	v0,a0,1
+2:	bne	a3,zero,1b
+	j	ra
+.end	strrchr
diff --git a/private/crt32/string/mips/strrchrt.c b/private/crt32/string/mips/strrchrt.c
new file mode 100644
index 000000000..f608c2fc6
--- /dev/null
+++ b/private/crt32/string/mips/strrchrt.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+void main( int argc, char **argv )
+  {
+  int c;
+  unsigned char *pstr;      
+  unsigned char string[100];
+  
+  strcpy(string, "ABCDEFGHIJKLMNOPQRST");
+  for (c = 'a'; c <= UCHAR_MAX; c++)
+    {
+    string[9] = c;
+    pstr = strrchr( string, c);
+    if (!pstr)
+      printf("Fail - Could not find %d in %s\n", c, string);
+    }
+  return;
+  }
diff --git a/private/crt32/string/mips/wcscmpm.s b/private/crt32/string/mips/wcscmpm.s
new file mode 100644
index 000000000..d3997945d
--- /dev/null
+++ b/private/crt32/string/mips/wcscmpm.s
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ * wcscmpm.s - contains wcscmp()
+ *
+ * ------------------------------------------------------------------
+ * | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights |
+ * | Reserved.  This software contains proprietary and confidential |
+ * | information of MIPS and its suppliers.  Use, disclosure or     |
+ * | reproduction is prohibited without the prior express written   |
+ * | consent of MIPS.                                               |
+ * ------------------------------------------------------------------
+ *  strcmp.s 1.1
+ *
+ * Purpose:
+ *      wcscmp() compares two wide-character strings and returns an integer
+ *      to indicate whether the first is less than the second, the two are
+ *      equal, or whether the first is greater than the second.
+ *
+ *      Comparison is done wchar_t by wchar_t on an UNSIGNED basis, which is to
+ *      say that Null wchar_t(0) is less than any other character.
+ *
+ *      This function is a MIPS assembly-code replacement for the C version.
+ *
+ * Entry:
+ *
+ *      const wchar_t * src - string for left-hand side of comparison
+ *      const wchar_t * dst - string for right-hand side of comparison
+ *
+ *Exit:
+ *      returns -1 if src <  dst
+ *      returns  0 if src == dst
+ *      returns +1 if src >  dst
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ *      Craig Hansen (MIPS)  06-June-86  Created.
+ *	Roger Lanser (MS)    02-April-94  Cloned for Wide Characters (16-bits).
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+	.text	
+
+LEAF_ENTRY(wcscmp)
+
+	lhu	t0,0(a0)
+1:	lhu	t1,0(a1)
+	addi	a0,4
+	beq	t0,0,2f
+	lhu	t2,-2(a0)	# ok to load since -4(a0)!=0
+	bne	t0,t1,2f
+	lhu	t1,2(a1)
+	addi	a1,4
+	beq	t2,0,2f
+	lhu	t0,0(a0)	# ok to load since -2(a0) != 0
+	beq	t2,t1,1b
+	move	v0,zero
+        j       ra                      // source1 == source2, return 0
+2:
+	sltu    v0,t1,t0                // compare source1 to source2
+	beq	v0,zero,3f
+	j       ra                      // source1 > source2, return 1
+3:
+	li	v0,-1
+        j       ra                      // source1 < source2, return 1
+	.end	wcscmp
diff --git a/private/crt32/string/mips/wcscmpt.c b/private/crt32/string/mips/wcscmpt.c
new file mode 100644
index 000000000..629f61ca5
--- /dev/null
+++ b/private/crt32/string/mips/wcscmpt.c
@@ -0,0 +1,62 @@
+char buffer[100];
+#include <stdio.h>
+#include <memory.h>
+
+#define NTUL 7
+
+void main()
+{
+	int i, k;
+	int rc;
+
+        unsigned long source1[4] = {
+		0x30003000,
+		0x30003000,
+		0x30003000,
+		0x36003000
+		};
+
+        unsigned long source2[4] = {
+		0x30003000,
+		0x30003000,
+		0x30003000,
+		0x00000000
+		};
+
+	unsigned long tul[NTUL] = {
+		0x35004600,
+		0x37004600,
+		0x36002f00,
+		0x37002f00,
+		0x30004600,
+		0x30003000,
+		0x36003000
+		};
+
+
+	for (k = 0; k < NTUL; k++) {
+		unsigned short *s1 = (unsigned short *)source1;
+		unsigned short *s2 = (unsigned short *)source2;
+
+		source2[3] = tul[k];
+
+		printf("source1 = ");
+		for (i = 0; i < 4*sizeof(unsigned long); i++)
+		        printf("%2.2x ", ((char *)source1)[i]);
+		printf("\n");
+
+		printf("source2 = ");
+		for (i = 0; i < 4*sizeof(unsigned long); i++)
+		        printf("%2.2x ", ((char *)source2)[i]);
+
+		rc = wcscmp(source1,source2);
+		if (rc < 0) {
+			printf("   source1 < source2\n");
+		} else if (rc > 0) {
+			printf("   source1 > source2\n");
+		} else {
+			printf("   source1 == source2\n");
+		}
+		printf("Return Code = %d\n",rc);
+	}
+}
diff --git a/private/crt32/string/mips/wcscpym.s b/private/crt32/string/mips/wcscpym.s
new file mode 100644
index 000000000..41b3544ec
--- /dev/null
+++ b/private/crt32/string/mips/wcscpym.s
@@ -0,0 +1,139 @@
+/*******************************************************************************
+ * wcscpym.s - contains wcscpy()
+ *
+ *	Copyright (c) 1994, Microsoft Corporation. All rights reserved.
+ *
+ * Purpose:
+ *	wcscpy() copies one wchar_t string into another.
+ *
+ *	wcscpy() copies the source string to the destination string
+ *      assuming no overlap and enough room in the destination.  The
+ *	destination string is returned.  Strings are wide-character
+ *      strings.
+ *
+ *      This function is a MIPS assembly-code replacement for the C version.
+ *      The only thing that this code tries to do is to produce a loop that
+ *      uses a lw/sw pair versus running a lhu/sh loop twice.  A small
+ *      penality will be paid for very short wide-character strings due
+ *      to the setup tests.
+ *
+ * Entry:
+ *
+ *      wchar_t *wcscpy(dst, src)
+ *	wchar_t * dst - wchar_t string over which "src" is to be copied
+ *	const wchar_t * src - wchar_t string to be copied over "dst"
+ *
+ *Exit:
+ *	The address of "dst".
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ *	02-08-97   RDL	Created initial version.
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+.text
+
+LEAF_ENTRY(wcscat)
+
+	.set	noreorder
+
+	// a0 destination
+	// a1 source
+
+	move	v0, a0		// a copy of destination address is returned
+1:	lhu	t2,0(a0)
+	bnel	zero,t2,1b
+	addiu	a0,a0,2
+	b	2f
+	nop
+
+ALTERNATE_ENTRY(wcscpy)
+
+	// a0 destination
+	// a1 source
+
+	move	v0, a0		// a copy of destination address is returned
+
+2:	andi	t1,a1,2		// assume at least halfword alignment
+3:	andi	t0,a0,2		// assume at least halfword alignment
+5:	bne	t0,t1,30f
+	nop
+
+10:	// buffers start on same alignment
+	beq	zero,t0,20f
+	nop
+	// halfword alignment
+	lhu	t1,0(a1)
+	addiu	a0,2
+	addiu	a1,2
+	beq	zero,t1,99f
+	sh	t1,-2(a0)
+
+20:	// word alignment
+	lw	t0,0(a1)
+	addiu	a0,4
+	addiu	a1,4
+	andi	t1,t0,0xffff
+	beq	zero,t1,92f
+	srl	t2,t0,16
+	bne	zero,t2,20b
+	sw	t0,-4(a0)
+	j	ra
+	nop
+
+30:	// buffers start on different alignment
+	beq	zero,t1,40f
+	nop
+	// destination on word boundary, source on halfword boundary
+	lhu	t0,0(a1)
+	addiu	a1,2
+35:	beq	zero,t0,92f
+	addiu	a0,4
+	lw	t1,0(a1)
+	addiu	a1,4
+	srl	t2,t1,16
+	andi	t1,0xffff
+	sll	t3,t1,16
+	or	t0,t0,t3
+	sw	t0,-4(a0)
+	bne	zero,t1,35b
+	or	t0,zero,t2
+	j	ra
+	nop
+
+40:	// destination on halfword boundary, source on word boundary
+	lw	t3,0(a1)
+	addiu	a0,2
+	addiu	a1,4
+	srl	t2,t3,16
+	andi	t0,t3,0xffff
+	beq	zero,t0,99f
+	sh	t0,-2(a0)
+45:	lw	t3,0(a1)
+	addiu	a0,4
+	addiu	a1,4
+	srl	t1,t3,16
+	sll	t3,t3,16
+	beq	zero,t3,94f
+	or	t0,t2,t3
+	sw	t0,-4(a0)
+	bne	zero,t1,45b
+	or	t2,t1,zero
+	j	ra
+	sh	t1,0(a0)
+
+92:	j	ra
+	sh	t0,-4(a0)
+
+94:	j	ra
+	sw	t0,-4(a0)
+
+99:	j	ra
+	nop
+	.set	reorder
+
+	.end	wcscat
diff --git a/private/crt32/string/mips/wcslenm.s b/private/crt32/string/mips/wcslenm.s
new file mode 100644
index 000000000..ef45c114e
--- /dev/null
+++ b/private/crt32/string/mips/wcslenm.s
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * wcslenm.s - contains wcslen()
+ *
+ * ------------------------------------------------------------------
+ * | Copyright Unpublished, MIPS Computer Systems, Inc.  All Rights |
+ * | Reserved.  This software contains proprietary and confidential |
+ * | information of MIPS and its suppliers.  Use, disclosure or     |
+ * | reproduction is prohibited without the prior express written   |
+ * | consent of MIPS.                                               |
+ * ------------------------------------------------------------------
+ *  strlen.s 1.1
+ *
+ * Purpose:
+ *	Finds the length in wchar_t's of the given string, not including
+ *      the final null wchar_t (wide-characters).
+ *
+ *      This function is a MIPS assembly-code replacement for the C version.
+ *
+ * Entry:
+ *
+ *      wchar_t *wcslen(wcs)
+ *	wchar_t * wcs - wchar_t string
+ *
+ *Exit:
+ *	The "length" of wcs in wchar_t's.
+ *
+ *Exceptions:
+ *
+ *Revision History:
+ *      Craig Hansen (MIPS)  06-June-86  Created.
+ *	Roger Lanser (MS)    02-April-94  Cloned for Wide Characters (16-bits).
+ *
+ ******************************************************************************/
+
+#include <kxmips.h>
+
+LEAF_ENTRY(wcslen)
+	subu	v0,a0,2
+1:	lhu	v1,2(v0)
+	addiu	v0,v0,2
+	bne	v1,zero,1b
+	subu	v0,v0,a0
+	srl	v0,v0,1
+	j	ra
+	.end	wcslen
-- 
cgit v1.2.3