summaryrefslogtreecommitdiffstats
path: root/private/crt32/string/mips/memsetm.s
diff options
context:
space:
mode:
Diffstat (limited to 'private/crt32/string/mips/memsetm.s')
-rw-r--r--private/crt32/string/mips/memsetm.s105
1 files changed, 105 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memsetm.s b/private/crt32/string/mips/memsetm.s
new file mode 100644
index 000000000..a53f8f0a1
--- /dev/null
+++ b/private/crt32/string/mips/memsetm.s
@@ -0,0 +1,105 @@
+/* --------------------------------------------------- */
+/* | Copyright (c) 1986 MIPS Computer Systems, Inc. | */
+/* | All Rights Reserved. | */
+/* --------------------------------------------------- */
+/* $Revision: 1.3 $ */
+
+/*
+ * char *
+ * memset(s, c, n)
+ * register char * s;
+ * register c, n;
+ * {
+ * register char * p = s;
+ *
+ * while (--n >= 0)
+ * *s++ = c;
+ *
+ * return (p);
+ * }
+ */
+
+/*
+ * Copyright 1986 by MIPS Computer Systems, Inc.
+ */
+
+#include <kxmips.h>
+
+#define NBPW 4
+
+/*
+ * memset(dst, c, bcount)
+ * set block of memory with blanks
+ *
+ * Calculating MINSET, assuming 10% cache-miss on non-loop code:
+ * Overhead =~ 18 instructions => 28 (30) cycles
+ * Byte set =~ 12 (24) cycles/word for 08M44 (08V11)
+ * Word set =~ 3 (5) cycles/word for 08M44 (08V11)
+ * If I-cache-miss nears 0, MINSET ==> 4 bytes; otherwise, times are:
+ * breakeven (MEM) = 28 / (12 - 3) =~ 3 words
+ * breakeven (VME) = 30 / (24 - 5) =~ 1.5 words
+ * Since the overhead is pessimistic (worst-case alignment), and many calls
+ * will be for well-aligned data, and since Word-set at least leaves
+ * the set in the cache, we shade these values (6-12) down to 8 bytes
+ */
+#define MINSET 8
+
+/* It turns out better to think of lwl/lwr and swl/swr as
+ smaller-vs-bigger address rather than left-vs-right.
+ Such a representation makes the code endian-independent. */
+
+#define LWS lwr
+#define LWB lwl
+#define SWS swr
+#define SWB swl
+
+LEAF_ENTRY(memset)
+ move v0,a0 # return first argument; BDSLOT
+ blt a2,MINSET,byteset
+ subu v1,zero,a0 # number of bytes til aligned; BDSLOT
+ beq a1,$0,1f # make memset(s, 0, n) faster
+ sll t0,a1,8
+ or a1,t0
+ sll t0,a1,16
+ or a1,t0
+1: and v1,NBPW-1
+ subu a2,v1 # adjust count; BDSLOT
+ beq v1,zero,blkset # already aligned
+ SWS a1,0(a0)
+ addu a0,v1
+
+/*
+ * set 8 byte, aligned block (no point in unrolling further,
+ * since maximum write rate in M/500 is two cycles/word write)
+ */
+blkset:
+ and t0,a2,NBPW+NBPW-1 # count after by-8-byte loop done
+ subu a3,a2,t0 # total in 8 byte chunks; BDSLOT
+ beq a2,t0,wordset # less than 8 bytes to set
+ addu a3,a0 # dst endpoint
+1: addu a0,NBPW+NBPW
+ sw a1,-NBPW-NBPW(a0)
+ sw a1,-NBPW(a0)
+ bne a0,a3,1b
+ move a2,t0 # set end-of loop count
+
+/*
+ * do a word (if required) this is not a loop since loop above
+ * guarantees that at most one word must be written here.
+ */
+wordset:
+ and t0,a2,NBPW # count after by-word non-loop done
+ subu a2,t0 # adjust count; BDSLOT
+ beq t0,zero,byteset # less than word to set
+ sw a1,0(a0)
+ addu a0,NBPW
+
+byteset:
+ addu a3,a2,a0 # dst endpoint; BDSLOT
+ ble a2,zero,setdone
+1: addu a0,1
+ sb a1,-1(a0)
+ bne a0,a3,1b
+setdone:
+ j ra
+.end memset