summaryrefslogtreecommitdiffstats
path: root/private/crt32/string/mips/memcpym.s
diff options
context:
space:
mode:
Diffstat (limited to 'private/crt32/string/mips/memcpym.s')
-rw-r--r--private/crt32/string/mips/memcpym.s298
1 files changed, 298 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memcpym.s b/private/crt32/string/mips/memcpym.s
new file mode 100644
index 000000000..ca0f8fe78
--- /dev/null
+++ b/private/crt32/string/mips/memcpym.s
@@ -0,0 +1,298 @@
+/*
+ * Fast bcopy code which supports overlapped copies.
+ * Not fully optimized yet.
+ *
+ * Written by: Kipp Hickman
+ *
+ * $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $
+ * $Revision: 1.7 $
+ * $Date: 1993/11/20 19:23:11 $
+ */
+
+#include <kxmips.h>
+
+/*
+ * char *bcopy(from, to, count);
+ * unsigned char *from, *to;
+ * unsigned long count;
+ *
+ * OR
+ *
+ * void *memcpy/memmove(to, from, count);
+ * void *to, *from;
+ * unsigned long count;
+ *
+ * Both functions return "to"
+ */
+
+#define MINCOPY 16
+
+/* registers used */
+
+#define to a0
+#define from a1
+#define count a2
+
+LEAF_ENTRY(memcpy)
+ALTERNATE_ENTRY(memmove)
+ move a3,to # Save to in a3
+ beq count,zero,ret # Test for zero count
+ beq from,to,ret # Test for from == to
+
+ /* use backwards copying code if the from and to regions overlap */
+ blt to,from,goforwards # If to < from then use forwards copy
+ add v0,from,count # v0 := from + count
+ bge to,v0,goforwards # If to >= from + count; no overlap
+ b gobackwards # Oh well, go backwards
+
+/*****************************************************************************/
+
+/*
+ * Forward copy code. Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+goforwards:
+ /* small byte counts use byte at a time copy */
+ blt count,MINCOPY,forwards_bytecopy
+ and v0,from,3 # v0 := from & 3
+ and v1,to,3 # v1 := to & 3
+ beq v0,v1,forwalignable # low bits are identical
+/*
+ * Byte at a time copy code. This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+forwards_bytecopy:
+ beq count,zero,ret # If count is zero, then we are done
+ addu v1,from,count # v1 := from + count
+
+99: lb v0,0(from) # v0 = *from
+ addu from,1 # advance pointer
+ sb v0,0(to) # Store byte
+ addu to,1 # advance pointer
+ bne from,v1,99b # Loop until done
+ret: move v0,a3 # Set v0 to old "to" pointer
+ j ra # return to caller
+
+/*
+ * Pointers are alignable, and may be aligned. Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned. Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+forwalignable:
+ beq v0,zero,forwards # If v0==v1 && v0==0 then aligned
+ beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned
+ beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+ lb v0,0(from) # get one byte
+ addu from,1 # advance pointer
+ sb v0,0(to) # store one byte
+ addu to,1 # advance pointer
+ subu count,1 # and reduce count
+ b forwards # Now pointers are aligned
+
+/* need to copy 2 bytes */
+forw_copy2:
+ lh v0,0(from) # get one short
+ addu from,2 # advance pointer
+ sh v0,0(to) # store one short
+ addu to,2 # advance pointer
+ subu count,2 # and reduce count
+ b forwards
+
+/* need to copy 3 bytes */
+forw_copy3:
+ lb v0,0(from) # get one byte
+ lh v1,1(from) # and one short
+ addu from,3 # advance pointer
+ sb v0,0(to) # store one byte
+ sh v1,1(to) # and one short
+ addu to,3 # advance pointer
+ subu count,3 # and reduce count
+ /* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+forwards:
+
+/* 32 byte at a time loop */
+forwards_32:
+ blt count,32,forwards_16 # do 16 bytes at a time
+ lw v0,0(from)
+ lw v1,4(from)
+ lw t0,8(from)
+ lw t1,12(from)
+ lw t2,16(from)
+ lw t3,20(from)
+ lw t4,24(from)
+ lw t5,28(from) # Fetch 8*4 bytes
+ addu from,32 # advance from pointer now
+ sw v0,0(to)
+ sw v1,4(to)
+ sw t0,8(to)
+ sw t1,12(to)
+ sw t2,16(to)
+ sw t3,20(to)
+ sw t4,24(to)
+ sw t5,28(to) # Store 8*4 bytes
+ addu to,32 # advance to pointer now
+ subu count,32 # Reduce count
+ b forwards_32 # Try some more
+
+/* 16 byte at a time loop */
+forwards_16:
+ blt count,16,forwards_4 # Do rest in words
+ lw v0,0(from)
+ lw v1,4(from)
+ lw t0,8(from)
+ lw t1,12(from)
+ addu from,16 # advance from pointer now
+ sw v0,0(to)
+ sw v1,4(to)
+ sw t0,8(to)
+ sw t1,12(to)
+ addu to,16 # advance to pointer now
+ subu count,16 # Reduce count
+ b forwards_16 # Try some more
+
+/* 4 bytes at a time loop */
+forwards_4:
+ blt count,4,forwards_bytecopy # Do rest
+ lw v0,0(from)
+ addu from,4 # advance pointer
+ sw v0,0(to)
+ addu to,4 # advance pointer
+ subu count,4
+ b forwards_4
+
+/*****************************************************************************/
+
+/*
+ * Backward copy code. Check for pointer alignment and try to get both
+ * pointers aligned on a long boundary.
+ */
+gobackwards:
+ add from,count # Advance to end + 1
+ add to,count # Advance to end + 1
+
+ /* small byte counts use byte at a time copy */
+ blt count,MINCOPY,backwards_bytecopy
+ and v0,from,3 # v0 := from & 3
+ and v1,to,3 # v1 := to & 3
+ beq v0,v1,backalignable # low bits are identical
+/*
+ * Byte at a time copy code. This is used when the pointers are not
+ * alignable, when the byte count is small, or when cleaning up any
+ * remaining bytes on a larger transfer.
+ */
+backwards_bytecopy:
+ beq count,zero,ret # If count is zero quit
+ subu from,1 # Reduce by one (point at byte)
+ subu to,1 # Reduce by one (point at byte)
+ subu v1,from,count # v1 := original from - 1
+
+99: lb v0,0(from) # v0 = *from
+ subu from,1 # backup pointer
+ sb v0,0(to) # Store byte
+ subu to,1 # backup pointer
+ bne from,v1,99b # Loop until done
+ move v0,a3 # Set v0 to old "to" pointer
+ j ra # return to caller
+
+/*
+ * Pointers are alignable, and may be aligned. Since v0 == v1, we need only
+ * check what value v0 has to see how to get aligned. Also, since we have
+ * eliminated tiny copies, we know that the count is large enough to
+ * encompass the alignment copies.
+ */
+backalignable:
+ beq v0,zero,backwards # If v0==v1 && v0==0 then aligned
+ beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned
+ beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned
+
+/* need to copy 1 byte */
+ lb v0,-1(from) # get one byte
+ subu from,1 # backup pointer
+ sb v0,-1(to) # store one byte
+ subu to,1 # backup pointer
+ subu count,1 # and reduce count
+ b backwards # Now pointers are aligned
+
+/* need to copy 2 bytes */
+back_copy2:
+ lh v0,-2(from) # get one short
+ subu from,2 # backup pointer
+ sh v0,-2(to) # store one short
+ subu to,2 # backup pointer
+ subu count,2 # and reduce count
+ b backwards
+
+/* need to copy 3 bytes */
+back_copy3:
+ lb v0,-1(from) # get one byte
+ lh v1,-3(from) # and one short
+ subu from,3 # backup pointer
+ sb v0,-1(to) # store one byte
+ sh v1,-3(to) # and one short
+ subu to,3 # backup pointer
+ subu count,3 # and reduce count
+ /* FALLTHROUGH */
+/*
+ * Once we are here, the pointers are aligned on long boundaries.
+ * Begin copying in large chunks.
+ */
+backwards:
+
+/* 32 byte at a time loop */
+backwards_32:
+ blt count,32,backwards_16 # do 16 bytes at a time
+ lw v0,-4(from)
+ lw v1,-8(from)
+ lw t0,-12(from)
+ lw t1,-16(from)
+ lw t2,-20(from)
+ lw t3,-24(from)
+ lw t4,-28(from)
+ lw t5,-32(from) # Fetch 8*4 bytes
+ subu from,32 # backup from pointer now
+ sw v0,-4(to)
+ sw v1,-8(to)
+ sw t0,-12(to)
+ sw t1,-16(to)
+ sw t2,-20(to)
+ sw t3,-24(to)
+ sw t4,-28(to)
+ sw t5,-32(to) # Store 8*4 bytes
+ subu to,32 # backup to pointer now
+ subu count,32 # Reduce count
+ b backwards_32 # Try some more
+
+/* 16 byte at a time loop */
+backwards_16:
+ blt count,16,backwards_4 # Do rest in words
+ lw v0,-4(from)
+ lw v1,-8(from)
+ lw t0,-12(from)
+ lw t1,-16(from)
+ subu from,16 # backup from pointer now
+ sw v0,-4(to)
+ sw v1,-8(to)
+ sw t0,-12(to)
+ sw t1,-16(to)
+ subu to,16 # backup to pointer now
+ subu count,16 # Reduce count
+ b backwards_16 # Try some more
+
+/* 4 byte at a time loop */
+backwards_4:
+ blt count,4,backwards_bytecopy # Do rest
+ lw v0,-4(from)
+ subu from,4 # backup from pointer
+ sw v0,-4(to)
+ subu to,4 # backup to pointer
+ subu count,4 # Reduce count
+ b backwards_4
+.end memcpy