summaryrefslogtreecommitdiffstats
path: root/private/crt32/misc/alpha/insv.s
blob: 7d09561031ef5c0776e5ff05416c3d12bdc4d117 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
 #+
 #  Copyright 1991, 1994 Digital Equipment Corporation
 # 
 #      int ots_insv(char *addr, int position, unsigned size, int value)
 # 
 #      Arbitrary bitfield insertion, longword granularity
 # 
 #      Special conventions: No stack space, r0-r1, r16-r19 and r26-r28 ONLY,
 #      no linkage pointer required.
 #      (Warning: The auto-loader potentially takes some regs across
 #      the call if this is being used in a shared lib. environment.)
 #
 #   See also: ots_ext[z]v
 #
 #   001           5 Sep 1991   KDG     Initial version
 #
 #   002          19 May 1992   KDG     Changes for common VMS/OSF sources
 #
 #   003          22 Sep 1992   KDG     Add case-sensitive name
 #
 #   004          26 Jan 1993   KDG     Add underscore
 #
 #   005          19 Apr 1994   kdg     Longword granularity version based on quadword
 #                              granularity version 004

#include        "ots_defs.hs"

 #   Totally general field insertion - arbitrary run-time field of 0-64 bits
 #   at an unknown alignment target.  Longword granularity.
 #
 #   Conceptually, this operation takes a 67 bit bit-address, which is the sum
 #   of a byte-aligned memory address and the bit offset (which is signed).
 #
 #   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 #   | | | | | | | | | | | | | | | | | | | | | | | | | |.|.|.|Q|L|W|B|
 #   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 #        | | | | | | | | | | | | | | | | | | | | | | | | | | |.|.|.|b|b|b|
 #         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 #
 #   Inputs:
 #      r16 - input address
 #      r17 - input bit offset
 #      r18 - input size
 #      r19 - input value
 #
 #   This is based on the original insert routine modified for longword
 #   granularity.  This routine could probably be improved.  (It does
 #   a "reasonable" job, but hasn't had as much attention as the quadword
 #   granularity version.  Fields contained in a single longword are
 #   roughly the same cost as the quadword granularity version.  Fields
 #   contained in two longwords in the same quadword are somewhat slower,
 #   while the two longword spanning a quadword case is roughly comparable.
 #   The 3 longword case is relatively slow [2 mispredicted branches,
 #   on unnecessary safe speculative load, could potentially use better
 #   scheduling too.])
 #
        .globl  _OtsFieldInsert
        .ent    _OtsFieldInsert
_OtsFieldInsert:
        .set    noat
        .set    noreorder
        ble     r18, noacc      # check for zero size - no memory access
        sra     r17, 3, r27     # get byte part of bit offset (signed!)
        addq    r16, r27, r16   # add to initial base addr.
        and     r17, 7, r17     # get bit-in-byte from bit offset
        and     r16, 3, r27     # get byte-in-longword (must be clean for compares)
        bic     r16, 3, r16     # get a longword aligned address
        s8addq  r27, r17, r17   # form the true bit offset in the longword
        ldl     r28, (r16)      # load first or only longword
        addq    r17, r18, r27   # get bit offset of bit following field
        subq    r27, 32, r0     # if <=32, field is contained in 1 longword
        bgt     r0, multiple    # handle multi-longword case if not
        # Common case of field in single LW - fall through  
        negq    r27, r27        # <5:0> = bits for right shift
        negq    r18, r0         # bits for left shift (wordlength-is)
        not     r31, r1         # all ones
        sll     r1, r0, r1      # shift mask to high bits
        sll     r19, r0, r19    # shift source to high bits (hand interleaving for better sched)
        srl     r1, r27, r1     # and into position
        srl     r19, r27, r19   # and into position
        bic     r28, r1, r28    # clear the bits...
        bis     r28, r19, r28   # insert them
        stl     r28, (r16)      # put the value back...
noacc:  ret     r31, (r26)

        # At this point:
        #  Field is known to be contained in at least 2 longwords
        #   r0 is bit position past end of field, -32
        #   r1 junk
        #   r16 is longword aligned
        #   r17 is bit offset in longword
        #   r18 is field size
        #   r19 is value to store
        #   r27 is bit position past end of field
        #   r28 first lw from memory
        #
multiple:
        subq    r0, 32, r27     # if <=64, the field is contained in 2 longwords
        ldl     r1, 4(r16)      # load the 2nd longword (safe)
        bgt     r27, three      # handle 3 longword case (rare...)
        not     r31, r27        # all ones
        sll     r27, r17, r27   # get mask in correct place
        sll     r19, r17, r17   # get insert value to top of register
        bic     r28, r27, r28   # clear bits in target
        bis     r28, r17, r28   # merge the field in
        srl     r1, r0, r1      # clear bits in target
        negq    r18, r27        #
        sll     r19, r27, r19   # shift to high bits
        negq    r0, r27         #
        srl     r19, r27, r19   # and into position
        sll     r1, r0, r1      # 
        stl     r28, (r16)      # store the first longword
        bis     r1, r19, r1     # merge
        stl     r1, 4(r16)      # store back the second longword
        ret     r31, (r26)

        # At this point:
        #  Field is known to be contained in exactly 3 longwords
        #   r0 is bit position past end of field, -32
        #   r1 value loaded for 2nd longword (which will be totally overwritten - i.e. junk)
        #   r16 is longword aligned
        #   r17 is bit offset in longword
        #   r18 is field size
        #   r19 is value to store
        #   r27 is bit position past end of field -64
        #   r28 first lw from memory
        #
        # Three word case is roughly similar to two word case, except
        # the middle store isn't a merge, just a real store, and the offsets
        # for the 3rd word need to be adjusted.  (This case hasn't
        # received much attention and could probably be improved by
        # at least a few instructions...)
        #
three:
        not     r31, r0         # all ones
        sll     r0, r17, r0     # get mask in correct place
        sll     r19, r17, r1    # get insert value to top of register
        bic     r28, r0, r28    # clear bits in target
        bis     r28, r1, r28    # merge the field in
        ldl     r1, 8(r16)      # load the 3rd longword
        lda     r0, 32(r31)     # load 32
        stl     r28, (r16)      # store the first longword
        subq    r0, r17, r0     # shift amount
        srl     r19, r0, r28    # discard bits already stored
        negq    r18, r0         #
        srl     r1, r27, r1     # clear bits in target
        sll     r19, r0, r19    # shift to high bits
        negq    r27, r0         #
        srl     r19, r0, r19    # and into position
        sll     r1, r27, r1     # 
        stl     r28, 4(r16)     # store second complete longword
        bis     r1, r19, r1     # merge
        stl     r1, 8(r16)      # store back the third longword
        ret     r31, (r26)

        .set    at
        .set    reorder
        .end    _OtsFieldInsert