diff options
Diffstat (limited to 'private/crt32/string/mips/memsetm.s')
-rw-r--r-- | private/crt32/string/mips/memsetm.s | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/private/crt32/string/mips/memsetm.s b/private/crt32/string/mips/memsetm.s new file mode 100644 index 000000000..a53f8f0a1 --- /dev/null +++ b/private/crt32/string/mips/memsetm.s @@ -0,0 +1,105 @@ +/* --------------------------------------------------- */ +/* | Copyright (c) 1986 MIPS Computer Systems, Inc. | */ +/* | All Rights Reserved. | */ +/* --------------------------------------------------- */ +/* $Revision: 1.3 $ */ + +/* + * char * + * memset(s, c, n) + * register char * s; + * register c, n; + * { + * register char * p = s; + * + * while (--n >= 0) + * *s++ = c; + * + * return (p); + * } + */ + +/* + * Copyright 1986 by MIPS Computer Systems, Inc. + */ + +#include <kxmips.h> + +#define NBPW 4 + +/* + * memset(dst, c, bcount) + * set block of memory with blanks + * + * Calculating MINSET, assuming 10% cache-miss on non-loop code: + * Overhead =~ 18 instructions => 28 (30) cycles + * Byte set =~ 12 (24) cycles/word for 08M44 (08V11) + * Word set =~ 3 (5) cycles/word for 08M44 (08V11) + * If I-cache-miss nears 0, MINSET ==> 4 bytes; otherwise, times are: + * breakeven (MEM) = 28 / (12 - 3) =~ 3 words + * breakeven (VME) = 30 / (24 - 5) =~ 1.5 words + * Since the overhead is pessimistic (worst-case alignment), and many calls + * will be for well-aligned data, and since Word-set at least leaves + * the set in the cache, we shade these values (6-12) down to 8 bytes + */ +#define MINSET 8 + +/* It turns out better to think of lwl/lwr and swl/swr as + smaller-vs-bigger address rather than left-vs-right. + Such a representation makes the code endian-independent. */ + +#define LWS lwr +#define LWB lwl +#define SWS swr +#define SWB swl + +LEAF_ENTRY(memset) + move v0,a0 # return first argument; BDSLOT + blt a2,MINSET,byteset + subu v1,zero,a0 # number of bytes til aligned; BDSLOT + beq a1,$0,1f # make memset(s, 0, n) faster + sll t0,a1,8 + or a1,t0 + sll t0,a1,16 + or a1,t0 +1: and v1,NBPW-1 + subu a2,v1 # adjust count; BDSLOT + beq v1,zero,blkset # already aligned + SWS a1,0(a0) + addu a0,v1 + +/* + * set 8 byte, aligned block (no point in unrolling further, + * since maximum write rate in M/500 is two cycles/word write) + */ +blkset: + and t0,a2,NBPW+NBPW-1 # count after by-8-byte loop done + subu a3,a2,t0 # total in 8 byte chunks; BDSLOT + beq a2,t0,wordset # less than 8 bytes to set + addu a3,a0 # dst endpoint +1: addu a0,NBPW+NBPW + sw a1,-NBPW-NBPW(a0) + sw a1,-NBPW(a0) + bne a0,a3,1b + move a2,t0 # set end-of loop count + +/* + * do a word (if required) this is not a loop since loop above + * guarantees that at most one word must be written here. + */ +wordset: + and t0,a2,NBPW # count after by-word non-loop done + subu a2,t0 # adjust count; BDSLOT + beq t0,zero,byteset # less than word to set + sw a1,0(a0) + addu a0,NBPW + +byteset: + addu a3,a2,a0 # dst endpoint; BDSLOT + ble a2,zero,setdone +1: addu a0,1 + sb a1,-1(a0) + bne a0,a3,1b +setdone: + j ra +.end memset |