3 // Copyright 2018 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
18 func load_le64(b []byte) uint64 {
19 // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
20 // s390x:`MOVDBR\s\(.*\),`
21 // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
22 // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
23 // ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
24 return binary.LittleEndian.Uint64(b)
27 func load_le64_idx(b []byte, idx int) uint64 {
28 // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
29 // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
30 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
31 // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
32 // ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
33 return binary.LittleEndian.Uint64(b[idx:])
36 func load_le32(b []byte) uint32 {
37 // amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
38 // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
39 // s390x:`MOVWBR\s\(.*\),`
40 // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
41 // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
42 // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
43 return binary.LittleEndian.Uint32(b)
46 func load_le32_idx(b []byte, idx int) uint32 {
47 // amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
48 // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
49 // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
50 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
51 // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
52 // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
53 return binary.LittleEndian.Uint32(b[idx:])
56 func load_le16(b []byte) uint16 {
57 // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
58 // ppc64le:`MOVHZ\s`,-`MOVBZ`
59 // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
60 // s390x:`MOVHBR\s\(.*\),`
61 // ppc64:`MOVHBR\s`,-`MOVBZ`
62 return binary.LittleEndian.Uint16(b)
65 func load_le16_idx(b []byte, idx int) uint16 {
66 // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
67 // ppc64le:`MOVHZ\s`,-`MOVBZ`
68 // ppc64:`MOVHBR\s`,-`MOVBZ`
69 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
70 // s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
71 return binary.LittleEndian.Uint16(b[idx:])
74 func load_be64(b []byte) uint64 {
75 // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
77 // s390x:`MOVD\s\(.*\),`
78 // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
79 // ppc64le:`MOVDBR`,-`MOV[BHW]Z`
80 // ppc64:`MOVD`,-`MOV[BHW]Z`
81 return binary.BigEndian.Uint64(b)
84 func load_be64_idx(b []byte, idx int) uint64 {
85 // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
86 // amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
87 // s390x:`MOVD\s\(.*\)\(.*\*1\),`
88 // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
89 // ppc64le:`MOVDBR`,-`MOV[BHW]Z`
90 // ppc64:`MOVD`,-`MOV[BHW]Z`
91 return binary.BigEndian.Uint64(b[idx:])
94 func load_be32(b []byte) uint32 {
95 // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
97 // s390x:`MOVWZ\s\(.*\),`
98 // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
99 // ppc64le:`MOVWBR`,-`MOV[BH]Z`
100 // ppc64:`MOVWZ`,-MOV[BH]Z`
101 return binary.BigEndian.Uint32(b)
104 func load_be32_idx(b []byte, idx int) uint32 {
105 // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
106 // amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
107 // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
108 // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
109 // ppc64le:`MOVWBR`,-`MOV[BH]Z`
110 // ppc64:`MOVWZ`,-MOV[BH]Z`
111 return binary.BigEndian.Uint32(b[idx:])
114 func load_be16(b []byte) uint16 {
115 // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
116 // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
117 // ppc64le:`MOVHBR`,-`MOVBZ`
118 // ppc64:`MOVHZ`,-`MOVBZ`
119 // s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
120 return binary.BigEndian.Uint16(b)
123 func load_be16_idx(b []byte, idx int) uint16 {
124 // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
125 // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
126 // ppc64le:`MOVHBR`,-`MOVBZ`
127 // ppc64:`MOVHZ`,-`MOVBZ`
128 // s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
129 return binary.BigEndian.Uint16(b[idx:])
132 func load_le_byte2_uint16(s []byte) uint16 {
133 // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
134 // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
135 // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
136 // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
137 // ppc64:`MOVHBR`,-`MOVBZ`
138 return uint16(s[0]) | uint16(s[1])<<8
141 func load_le_byte2_uint16_inv(s []byte) uint16 {
142 // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
143 // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
144 // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
145 // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
146 // ppc64:`MOVHBR`,-`MOVBZ`
147 return uint16(s[1])<<8 | uint16(s[0])
150 func load_le_byte4_uint32(s []byte) uint32 {
151 // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
152 // 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
153 // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
154 // ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
155 // ppc64:`MOVWBR`,-MOV[BH]Z`
156 return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
159 func load_le_byte4_uint32_inv(s []byte) uint32 {
160 // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
161 // ppc64le:`MOVWZ`,-`MOV[BH]Z`
162 // ppc64:`MOVWBR`,-`MOV[BH]Z`
163 return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
166 func load_le_byte8_uint64(s []byte) uint64 {
167 // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
168 // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
169 // ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
170 // ppc64:`MOVDBR`,-`MOVW[WHB]Z`
171 return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
174 func load_le_byte8_uint64_inv(s []byte) uint64 {
175 // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
176 // ppc64le:`MOVD`,-`MOV[WHB]Z`
177 // ppc64:`MOVDBR`,-`MOV[WHB]Z`
178 return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
181 func load_be_byte2_uint16(s []byte) uint16 {
182 // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
183 // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
184 // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
185 // ppc64:`MOVHZ`,-`MOVBZ`
186 return uint16(s[0])<<8 | uint16(s[1])
189 func load_be_byte2_uint16_inv(s []byte) uint16 {
190 // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
191 // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
192 // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
193 // ppc64:`MOVHZ`,-`MOVBZ`
194 return uint16(s[1]) | uint16(s[0])<<8
197 func load_be_byte4_uint32(s []byte) uint32 {
198 // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
199 // ppc64le:`MOVWBR`,-`MOV[HB]Z`
200 // ppc64:`MOVWZ`,-`MOV[HB]Z`
201 return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
204 func load_be_byte4_uint32_inv(s []byte) uint32 {
205 // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
206 // amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
207 // amd64/v3: `MOVBEL`
208 // ppc64le:`MOVWBR`,-`MOV[HB]Z`
209 // ppc64:`MOVWZ`,-`MOV[HB]Z`
210 return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
213 func load_be_byte8_uint64(s []byte) uint64 {
214 // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
215 // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
216 // ppc64:`MOVD`,-`MOV[WHB]Z`
217 return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
220 func load_be_byte8_uint64_inv(s []byte) uint64 {
221 // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
222 // amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
223 // amd64/v3: `MOVBEQ`
224 // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
225 // ppc64:`MOVD`,-`MOV[BHW]Z`
226 return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
229 func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
230 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
231 // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
232 // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
233 // ppc64le:`MOVHZ`,-`MOVBZ`
234 // ppc64:`MOVHBR`,-`MOVBZ`
235 return uint16(s[idx]) | uint16(s[idx+1])<<8
238 func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
239 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
240 // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
241 // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
242 // ppc64le:`MOVHZ`,-`MOVBZ`
243 // ppc64:`MOVHBR`,-`MOVBZ`
244 return uint16(s[idx+1])<<8 | uint16(s[idx])
247 func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
248 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
249 // amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
250 return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
253 func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
254 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
255 return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
258 func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
259 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
260 // amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
261 return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
264 func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
265 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
266 return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
269 func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
270 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
271 // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
272 return uint16(s[idx])<<8 | uint16(s[idx+1])
275 func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
276 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
277 // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
278 return uint16(s[idx+1]) | uint16(s[idx])<<8
281 func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
282 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
283 return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
286 func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
287 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
288 return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
291 func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
292 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
293 return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
296 func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
297 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
298 return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
301 func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
302 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
303 return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
306 func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
307 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
308 return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
311 func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
312 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
313 return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
316 func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
317 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
318 return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
321 func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
322 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
323 return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
326 func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
327 // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
328 return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
331 func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
332 // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
333 return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
336 func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
337 // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
338 return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
341 // Some tougher cases for the memcombine pass.
343 func reassoc_load_uint32(b []byte) uint32 {
344 // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
345 return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
348 func extrashift_load_uint32(b []byte) uint32 {
349 // amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
350 return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
354 func outoforder_load_uint32(b []byte) uint32 {
355 // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
356 return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
359 func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
360 // amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
361 return x | binary.LittleEndian.Uint32(b) | y
363 // x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
364 // doesn't work because it associates in a way that memcombine can't detect it.
367 // Check load combining across function calls.
369 func fcall_byte(a [2]byte) [2]byte {
370 return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
373 func fcall_uint16(a [2]uint16) [2]uint16 {
374 return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
377 func fcall_uint32(a [2]uint32) [2]uint32 {
378 return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
381 // We want to merge load+op in the first function, but not in the
382 // second. See Issue 19595.
383 func load_op_merge(p, q *int) {
384 x := *p // amd64:`ADDQ\t\(`
385 *q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
387 func load_op_no_merge(p, q *int) {
389 for i := 0; i < 10; i++ {
390 *q += x // amd64:`ADDQ\t[A-Z]`
394 // Make sure offsets are folded into loads and stores.
395 func offsets_fold(_, a [20]byte) (b [20]byte) {
396 // arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)`
401 // Make sure we don't put pointers in SSE registers across safe
404 func safe_point(p, q *[2]*int) {
405 a, b := p[0], p[1] // amd64:-`MOVUPS`
407 q[0], q[1] = a, b // amd64:-`MOVUPS`
414 func store_le64(b []byte, x uint64) {
415 // amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
416 // arm64:`MOVD`,-`MOV[WBH]`
417 // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
418 // ppc64:`MOVDBR`,-MOVB\s`
419 // s390x:`MOVDBR\s.*\(.*\)$`
420 binary.LittleEndian.PutUint64(b, x)
423 func store_le64_idx(b []byte, x uint64, idx int) {
424 // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
425 // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
426 // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
427 // ppc64:`MOVDBR`,-`MOVBZ`
428 // s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
429 binary.LittleEndian.PutUint64(b[idx:], x)
432 func store_le64_idx2(dst []byte, d, length, offset int) []byte {
433 a := dst[d : d+length]
435 // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
436 binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b))
440 func store_le64_idx_const(b []byte, idx int) {
441 // amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$`
442 binary.LittleEndian.PutUint64(b[idx:], 123)
445 func store_le64_load(b []byte, x *[8]byte) {
449 // ppc64le:`MOVD\s`,-`MOV[BWH]Z`
451 // s390x:-`MOVB`,-`MOV[WH]BR`
452 binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
455 func store_le32(b []byte, x uint32) {
457 // arm64:`MOVW`,-`MOV[BH]`
460 // s390x:`MOVWBR\s.*\(.*\)$`
461 binary.LittleEndian.PutUint32(b, x)
464 func store_le32_idx(b []byte, x uint32, idx int) {
466 // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
469 // s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
470 binary.LittleEndian.PutUint32(b[idx:], x)
473 func store_le32_idx_const(b []byte, idx int) {
474 // amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$`
475 // ppc64x:`MOVW\s`,-MOV[HB]`
476 binary.LittleEndian.PutUint32(b[idx:], 123)
479 func store_le16(b []byte, x uint16) {
481 // arm64:`MOVH`,-`MOVB`
484 // s390x:`MOVHBR\s.*\(.*\)$`
485 binary.LittleEndian.PutUint16(b, x)
488 func store_le16_idx(b []byte, x uint16, idx int) {
490 // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
493 // s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
494 binary.LittleEndian.PutUint16(b[idx:], x)
497 func store_le16_idx_const(b []byte, idx int) {
498 // amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$`
500 binary.LittleEndian.PutUint16(b[idx:], 123)
503 func store_be64(b []byte, x uint64) {
504 // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
505 // amd64/v3: `MOVBEQ`
506 // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
509 // s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
510 binary.BigEndian.PutUint64(b, x)
513 func store_be64_idx(b []byte, x uint64, idx int) {
514 // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
515 // amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
516 // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
519 // s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
520 binary.BigEndian.PutUint64(b[idx:], x)
523 func store_be32(b []byte, x uint32) {
524 // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
526 // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
529 // s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
530 binary.BigEndian.PutUint32(b, x)
533 func store_be64_load(b, x *[8]byte) {
536 binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:]))
539 func store_be32_load(b, x *[8]byte) {
542 binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:]))
545 func store_be32_idx(b []byte, x uint32, idx int) {
546 // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
547 // amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
548 // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
551 // s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
552 binary.BigEndian.PutUint32(b[idx:], x)
555 func store_be16(b []byte, x uint16) {
556 // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
557 // amd64/v3:`MOVBEW`,-`ROLW`
558 // arm64:`MOVH`,`REV16W`,-`MOVB`
561 // s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
562 binary.BigEndian.PutUint16(b, x)
565 func store_be16_idx(b []byte, x uint16, idx int) {
566 // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
567 // amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
568 // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
571 // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
572 binary.BigEndian.PutUint16(b[idx:], x)
575 func store_le_byte_2(b []byte, val uint16) {
577 // arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
578 // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
579 // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
580 // ppc64le:`MOVH\s`,-`MOVB`
581 // ppc64:`MOVHBR`,-`MOVB`
582 b[1], b[2] = byte(val), byte(val>>8)
585 func store_le_byte_2_inv(b []byte, val uint16) {
587 // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
588 // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
589 // ppc64le:`MOVH\s`,-`MOVB`
590 // ppc64:`MOVHBR`,-`MOVB`
591 b[2], b[1] = byte(val>>8), byte(val)
594 func store_le_byte_4(b []byte, val uint32) {
596 // arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
597 // 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
598 // amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
601 b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
604 func store_le_byte_8(b []byte, val uint64) {
606 // arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
607 // amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
608 // ppc64le:`MOVD\s`,-`MOVW`
610 b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
613 func store_be_byte_2(b []byte, val uint16) {
615 // arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
616 // amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
617 // amd64/v3: `MOVBEW`
620 b[1], b[2] = byte(val>>8), byte(val)
623 func store_be_byte_4(b []byte, val uint32) {
625 // arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
626 // amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
627 // amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)`
630 b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
633 func store_be_byte_8(b []byte, val uint64) {
635 // arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
636 // amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
637 // amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL`
640 b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
643 func store_le_byte_2_idx(b []byte, idx int, val uint16) {
644 _, _ = b[idx+0], b[idx+1]
645 // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
646 // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
649 b[idx+1], b[idx+0] = byte(val>>8), byte(val)
652 func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
653 _, _ = b[idx+0], b[idx+1]
654 // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
657 b[idx+0], b[idx+1] = byte(val), byte(val>>8)
660 func store_le_byte_4_idx(b []byte, idx int, val uint32) {
661 _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
662 // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
665 b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
668 func store_be_byte_2_idx(b []byte, idx int, val uint16) {
669 _, _ = b[idx+0], b[idx+1]
670 // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
673 b[idx+0], b[idx+1] = byte(val>>8), byte(val)
676 func store_be_byte_4_idx(b []byte, idx int, val uint32) {
677 _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
678 // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
681 b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
684 func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
685 _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
686 // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
689 b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
692 func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
693 _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
694 // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
697 b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
700 func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
701 _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
702 // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
705 b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
708 func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
709 _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
710 // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
713 b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
720 // Check that zero stores are combined into larger stores
722 func zero_byte_2(b1, b2 []byte) {
723 // bounds checks to guarantee safety of writes below
725 // arm64:"MOVH\tZR",-"MOVB"
726 // amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
727 // 386:`MOVW\s[$]0,\s\([A-Z]+\)`
730 // arm64:"MOVH\tZR",-"MOVB"
731 // 386:`MOVW\s[$]0,\s\([A-Z]+\)`
732 // amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
737 func zero_byte_4(b1, b2 []byte) {
739 // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
740 // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
741 // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
743 b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
744 // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
746 b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
749 func zero_byte_8(b []byte) {
751 b[0], b[1], b[2], b[3] = 0, 0, 0, 0
752 b[4], b[5], b[6], b[7] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
755 func zero_byte_16(b []byte) {
757 b[0], b[1], b[2], b[3] = 0, 0, 0, 0
758 b[4], b[5], b[6], b[7] = 0, 0, 0, 0
759 b[8], b[9], b[10], b[11] = 0, 0, 0, 0
760 b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
763 func zero_byte_30(a *[30]byte) {
764 *a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
767 func zero_byte_39(a *[39]byte) {
768 *a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
771 func zero_byte_2_idx(b []byte, idx int) {
772 _, _ = b[idx+0], b[idx+1]
773 // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
775 b[idx+0], b[idx+1] = 0, 0
778 func zero_byte_2_idx2(b []byte, idx int) {
779 _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
780 // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
782 b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
785 func zero_uint16_2(h1, h2 []uint16) {
787 // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
788 // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
789 // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
792 // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
793 // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
794 // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
799 func zero_uint16_4(h1, h2 []uint16) {
801 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
802 // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
804 h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
805 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
807 h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
810 func zero_uint16_8(h []uint16) {
812 h[0], h[1], h[2], h[3] = 0, 0, 0, 0
813 h[4], h[5], h[6], h[7] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
816 func zero_uint32_2(w1, w2 []uint32) {
818 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
819 // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
822 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
823 // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
828 func zero_uint32_4(w1, w2 []uint32) {
830 w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
831 w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
834 func zero_uint64_2(d1, d2 []uint64) {
836 d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
837 d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
840 func loadstore(p, q *[4]uint8) {
841 // amd64:"MOVL",-"MOVB"
842 // arm64:"MOVWU",-"MOVBU"
843 x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
844 // amd64:"MOVL",-"MOVB"
845 // arm64:"MOVW",-"MOVB"
846 p[0], p[1], p[2], p[3] = x0, x1, x2, x3
853 func loadstore2(p, q *S1) {
854 // amd64:"MOVL",-"MOVWLZX"
855 // arm64:"MOVWU",-"MOVH"
857 // amd64:"MOVL",-"MOVW"
858 // arm64:"MOVW",-"MOVH"