1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
/* ------------------------------------------------------------------ */
/* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | */
/* | Reserved. This software contains proprietary and confidential | */
/* | information of MIPS and its suppliers. Use, disclosure or | */
/* | reproduction is prohibited without the prior express written | */
/* | consent of MIPS. | */
/* ------------------------------------------------------------------ */
#ident "$Header"
/*
* Copyright 1985 by MIPS Computer Systems, Inc.
*/
/* bcmp(s1, s2, n) */
#include "kxmips.h"
/*
* bcmp(src, dst, bcount)
*
* MINCMP is minimum number of byte that its worthwhile to try and
* align cmp into word transactions
*
* Calculating MINCMP
* Overhead =~ 15 instructions => 90 cycles
* Byte cmp =~ 38 cycles/word
* Word cmp =~ 17 cycles/word
* Breakeven =~ 16 bytes
*/
#define MINCMP 16
#define NBPW 4
LEAF_ENTRY(memcmp)
xor v0,a0,a1
blt a2,MINCMP,bytecmp # too short, just byte cmp
and v0,NBPW-1
subu t8,zero,a0 # number of bytes til aligned
bne v0,zero,unalgncmp # src and dst not alignable
/*
* src and dst can be simultaneously word aligned
*/
and t8,NBPW-1
subu a2,t8
beq t8,zero,wordcmp # already aligned
move v0,v1 # lw[lr] don't clear target reg
lwr v0,0(a0)
lwr v1,0(a1)
addu a0,t8
addu a1,t8
bne v0,v1,cmpne
/*
* word cmp loop
*/
wordcmp:
and a3,a2,~(NBPW-1)
subu a2,a3
beq a3,zero,bytecmp
addu a3,a0 # src1 endpoint
1: lw v0,0(a0)
lw v1,0(a1)
addu a0,NBPW # 1st BDSLOT
addu a1,NBPW # 2nd BDSLOT (asm doesn't move)
bne v0,v1,cmpne
bne a0,a3,1b # at least one more word
b bytecmp
/*
* deal with simultaneously unalignable cmp by aligning one src
*/
unalgncmp:
subu a3,zero,a1 # calc byte cnt to get src2 aligned
and a3,NBPW-1
subu a2,a3
beq a3,zero,partaligncmp # already aligned
addu a3,a0 # src1 endpoint
1: lbu v0,0(a0)
lbu v1,0(a1)
addu a0,1
addu a1,1
bne v0,v1,cmpne
bne a0,a3,1b
/*
* src unaligned, dst aligned loop
*/
partaligncmp:
and a3,a2,~(NBPW-1)
subu a2,a3
beq a3,zero,bytecmp
addu a3,a0
1:
lwr v0,0(a0)
lwl v0,3(a0)
lw v1,0(a1)
addu a0,NBPW
addu a1,NBPW
bne v0,v1,cmpne
bne a0,a3,1b
/*
* brute force byte cmp loop
*/
bytecmp:
addu a3,a2,a0 # src1 endpoint; BDSLOT
ble a2,zero,cmpdone
1: lbu v0,0(a0)
lbu v1,0(a1)
addu a0,1
addu a1,1
bne v0,v1,cmpne
bne a0,a3,1b
cmpdone:
move v0,zero
j ra
cmpne:
sltu a2,v1,v0
bne a2,zero,9f
li v0,-1
j ra
9:
li v0,1
j ra
.end bcmp
|