diff options
Diffstat (limited to 'private/crt32/string/mips/memcpym.s')
-rw-r--r-- | private/crt32/string/mips/memcpym.s | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memcpym.s b/private/crt32/string/mips/memcpym.s new file mode 100644 index 000000000..ca0f8fe78 --- /dev/null +++ b/private/crt32/string/mips/memcpym.s @@ -0,0 +1,298 @@ +/* + * Fast bcopy code which supports overlapped copies. + * Not fully optimized yet. + * + * Written by: Kipp Hickman + * + * $Source: /proj/sherwood/isms/irix/lib/libc/src/strings/RCS/bcopy.s,v $ + * $Revision: 1.7 $ + * $Date: 1993/11/20 19:23:11 $ + */ + +#include <kxmips.h> + +/* + * char *bcopy(from, to, count); + * unsigned char *from, *to; + * unsigned long count; + * + * OR + * + * void *memcpy/memmove(to, from, count); + * void *to, *from; + * unsigned long count; + * + * Both functions return "to" + */ + +#define MINCOPY 16 + +/* registers used */ + +#define to a0 +#define from a1 +#define count a2 + +LEAF_ENTRY(memcpy) +ALTERNATE_ENTRY(memmove) + move a3,to # Save to in a3 + beq count,zero,ret # Test for zero count + beq from,to,ret # Test for from == to + + /* use backwards copying code if the from and to regions overlap */ + blt to,from,goforwards # If to < from then use forwards copy + add v0,from,count # v0 := from + count + bge to,v0,goforwards # If to >= from + count; no overlap + b gobackwards # Oh well, go backwards + +/*****************************************************************************/ + +/* + * Forward copy code. Check for pointer alignment and try to get both + * pointers aligned on a long boundary. + */ +goforwards: + /* small byte counts use byte at a time copy */ + blt count,MINCOPY,forwards_bytecopy + and v0,from,3 # v0 := from & 3 + and v1,to,3 # v1 := to & 3 + beq v0,v1,forwalignable # low bits are identical +/* + * Byte at a time copy code. This is used when the pointers are not + * alignable, when the byte count is small, or when cleaning up any + * remaining bytes on a larger transfer. + */ +forwards_bytecopy: + beq count,zero,ret # If count is zero, then we are done + addu v1,from,count # v1 := from + count + +99: lb v0,0(from) # v0 = *from + addu from,1 # advance pointer + sb v0,0(to) # Store byte + addu to,1 # advance pointer + bne from,v1,99b # Loop until done +ret: move v0,a3 # Set v0 to old "to" pointer + j ra # return to caller + +/* + * Pointers are alignable, and may be aligned. Since v0 == v1, we need only + * check what value v0 has to see how to get aligned. Also, since we have + * eliminated tiny copies, we know that the count is large enough to + * encompass the alignment copies. + */ +forwalignable: + beq v0,zero,forwards # If v0==v1 && v0==0 then aligned + beq v0,1,forw_copy3 # Need to copy 3 bytes to get aligned + beq v0,2,forw_copy2 # Need to copy 2 bytes to get aligned + +/* need to copy 1 byte */ + lb v0,0(from) # get one byte + addu from,1 # advance pointer + sb v0,0(to) # store one byte + addu to,1 # advance pointer + subu count,1 # and reduce count + b forwards # Now pointers are aligned + +/* need to copy 2 bytes */ +forw_copy2: + lh v0,0(from) # get one short + addu from,2 # advance pointer + sh v0,0(to) # store one short + addu to,2 # advance pointer + subu count,2 # and reduce count + b forwards + +/* need to copy 3 bytes */ +forw_copy3: + lb v0,0(from) # get one byte + lh v1,1(from) # and one short + addu from,3 # advance pointer + sb v0,0(to) # store one byte + sh v1,1(to) # and one short + addu to,3 # advance pointer + subu count,3 # and reduce count + /* FALLTHROUGH */ +/* + * Once we are here, the pointers are aligned on long boundaries. + * Begin copying in large chunks. + */ +forwards: + +/* 32 byte at a time loop */ +forwards_32: + blt count,32,forwards_16 # do 16 bytes at a time + lw v0,0(from) + lw v1,4(from) + lw t0,8(from) + lw t1,12(from) + lw t2,16(from) + lw t3,20(from) + lw t4,24(from) + lw t5,28(from) # Fetch 8*4 bytes + addu from,32 # advance from pointer now + sw v0,0(to) + sw v1,4(to) + sw t0,8(to) + sw t1,12(to) + sw t2,16(to) + sw t3,20(to) + sw t4,24(to) + sw t5,28(to) # Store 8*4 bytes + addu to,32 # advance to pointer now + subu count,32 # Reduce count + b forwards_32 # Try some more + +/* 16 byte at a time loop */ +forwards_16: + blt count,16,forwards_4 # Do rest in words + lw v0,0(from) + lw v1,4(from) + lw t0,8(from) + lw t1,12(from) + addu from,16 # advance from pointer now + sw v0,0(to) + sw v1,4(to) + sw t0,8(to) + sw t1,12(to) + addu to,16 # advance to pointer now + subu count,16 # Reduce count + b forwards_16 # Try some more + +/* 4 bytes at a time loop */ +forwards_4: + blt count,4,forwards_bytecopy # Do rest + lw v0,0(from) + addu from,4 # advance pointer + sw v0,0(to) + addu to,4 # advance pointer + subu count,4 + b forwards_4 + +/*****************************************************************************/ + +/* + * Backward copy code. Check for pointer alignment and try to get both + * pointers aligned on a long boundary. + */ +gobackwards: + add from,count # Advance to end + 1 + add to,count # Advance to end + 1 + + /* small byte counts use byte at a time copy */ + blt count,MINCOPY,backwards_bytecopy + and v0,from,3 # v0 := from & 3 + and v1,to,3 # v1 := to & 3 + beq v0,v1,backalignable # low bits are identical +/* + * Byte at a time copy code. This is used when the pointers are not + * alignable, when the byte count is small, or when cleaning up any + * remaining bytes on a larger transfer. + */ +backwards_bytecopy: + beq count,zero,ret # If count is zero quit + subu from,1 # Reduce by one (point at byte) + subu to,1 # Reduce by one (point at byte) + subu v1,from,count # v1 := original from - 1 + +99: lb v0,0(from) # v0 = *from + subu from,1 # backup pointer + sb v0,0(to) # Store byte + subu to,1 # backup pointer + bne from,v1,99b # Loop until done + move v0,a3 # Set v0 to old "to" pointer + j ra # return to caller + +/* + * Pointers are alignable, and may be aligned. Since v0 == v1, we need only + * check what value v0 has to see how to get aligned. Also, since we have + * eliminated tiny copies, we know that the count is large enough to + * encompass the alignment copies. + */ +backalignable: + beq v0,zero,backwards # If v0==v1 && v0==0 then aligned + beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned + beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned + +/* need to copy 1 byte */ + lb v0,-1(from) # get one byte + subu from,1 # backup pointer + sb v0,-1(to) # store one byte + subu to,1 # backup pointer + subu count,1 # and reduce count + b backwards # Now pointers are aligned + +/* need to copy 2 bytes */ +back_copy2: + lh v0,-2(from) # get one short + subu from,2 # backup pointer + sh v0,-2(to) # store one short + subu to,2 # backup pointer + subu count,2 # and reduce count + b backwards + +/* need to copy 3 bytes */ +back_copy3: + lb v0,-1(from) # get one byte + lh v1,-3(from) # and one short + subu from,3 # backup pointer + sb v0,-1(to) # store one byte + sh v1,-3(to) # and one short + subu to,3 # backup pointer + subu count,3 # and reduce count + /* FALLTHROUGH */ +/* + * Once we are here, the pointers are aligned on long boundaries. + * Begin copying in large chunks. + */ +backwards: + +/* 32 byte at a time loop */ +backwards_32: + blt count,32,backwards_16 # do 16 bytes at a time + lw v0,-4(from) + lw v1,-8(from) + lw t0,-12(from) + lw t1,-16(from) + lw t2,-20(from) + lw t3,-24(from) + lw t4,-28(from) + lw t5,-32(from) # Fetch 8*4 bytes + subu from,32 # backup from pointer now + sw v0,-4(to) + sw v1,-8(to) + sw t0,-12(to) + sw t1,-16(to) + sw t2,-20(to) + sw t3,-24(to) + sw t4,-28(to) + sw t5,-32(to) # Store 8*4 bytes + subu to,32 # backup to pointer now + subu count,32 # Reduce count + b backwards_32 # Try some more + +/* 16 byte at a time loop */ +backwards_16: + blt count,16,backwards_4 # Do rest in words + lw v0,-4(from) + lw v1,-8(from) + lw t0,-12(from) + lw t1,-16(from) + subu from,16 # backup from pointer now + sw v0,-4(to) + sw v1,-8(to) + sw t0,-12(to) + sw t1,-16(to) + subu to,16 # backup to pointer now + subu count,16 # Reduce count + b backwards_16 # Try some more + +/* 4 byte at a time loop */ +backwards_4: + blt count,4,backwards_bytecopy # Do rest + lw v0,-4(from) + subu from,4 # backup from pointer + sw v0,-4(to) + subu to,4 # backup to pointer + subu count,4 # Reduce count + b backwards_4 +.end memcpy |