3 // Copyright 2018 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
9 // This file contains codegen tests related to arithmetic
10 // simplifications and optimizations on integer types.
11 // For codegen tests on float types, see floats.go.
13 // ----------------- //
15 // ----------------- //
17 func AddLargeConst(a uint64, out []uint64) {
18 // ppc64x/power10:"ADD\t[$]4294967296,"
19 // ppc64x/power9:"MOVD\t[$]i64.0000000100000000[(]SB[)]", "ADD\tR[0-9]*"
20 // ppc64x/power8:"MOVD\t[$]i64.0000000100000000[(]SB[)]", "ADD\tR[0-9]*"
21 out[0] = a + 0x100000000
22 // ppc64x/power10:"ADD\t[$]-8589934592,"
23 // ppc64x/power9:"MOVD\t[$]i64.fffffffe00000000[(]SB[)]", "ADD\tR[0-9]*"
24 // ppc64x/power8:"MOVD\t[$]i64.fffffffe00000000[(]SB[)]", "ADD\tR[0-9]*"
25 out[1] = a + 0xFFFFFFFE00000000
28 // ----------------- //
30 // ----------------- //
34 func SubMem(arr []int, b, c, d int) int {
35 // 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
36 // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
38 // 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
39 // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
41 // 386:`DECL\s16\([A-Z]+\)`
43 // 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
45 // 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
47 // 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
49 // 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
51 // 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
53 // amd64:`DECQ\s64\([A-Z]+\)`
57 return arr[0] - arr[1]
60 func SubFromConst(a int) int {
61 // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
66 func SubFromConstNeg(a int) int {
67 // ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR`
72 func SubSubFromConst(a int) int {
73 // ppc64x: `ADD\t[$]20,\sR[0-9]+,\sR`
78 func AddSubFromConst(a int) int {
79 // ppc64x: `SUBC\tR[0-9]+,\s[$]60,\sR`
84 func NegSubFromConst(a int) int {
85 // ppc64x: `ADD\t[$]-20,\sR[0-9]+,\sR`
90 func NegAddFromConstNeg(a int) int {
91 // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
96 func SubSubNegSimplify(a, b int) int {
103 func SubAddSimplify(a, b int) int {
104 // amd64:-"SUBQ",-"ADDQ"
105 // ppc64x:-"SUB",-"ADD"
110 func SubAddSimplify2(a, b, c int) (int, int, int, int, int, int) {
112 r := (a + b) - (a + c)
114 r1 := (a + b) - (c + a)
116 r2 := (b + a) - (a + c)
118 r3 := (b + a) - (c + a)
120 r4 := (a - c) + (c + b)
122 r5 := (a - c) + (b + c)
123 return r, r1, r2, r3, r4, r5
126 func SubAddNegSimplify(a, b int) int {
127 // amd64:"NEGQ",-"ADDQ",-"SUBQ"
128 // ppc64x:"NEG",-"ADD",-"SUB"
133 func AddAddSubSimplify(a, b, c int) int {
136 r := a + (b + (c - a))
140 // -------------------- //
142 // -------------------- //
144 func Pow2Muls(n1, n2 int) (int, int) {
145 // amd64:"SHLQ\t[$]5",-"IMULQ"
146 // 386:"SHLL\t[$]5",-"IMULL"
147 // arm:"SLL\t[$]5",-"MUL"
148 // arm64:"LSL\t[$]5",-"MUL"
149 // ppc64x:"SLD\t[$]5",-"MUL"
152 // amd64:"SHLQ\t[$]6",-"IMULQ"
153 // 386:"SHLL\t[$]6",-"IMULL"
154 // arm:"SLL\t[$]6",-"MUL"
155 // arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
156 // ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
162 func Mul_96(n int) int {
163 // amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
164 // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
165 // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
166 // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
167 // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
171 func Mul_n120(n int) int {
172 // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
176 func MulMemSrc(a []uint32, b []float32) {
177 // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
179 // 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
180 // amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
184 // Multiplications merging tests
186 func MergeMuls1(n int) int {
187 // amd64:"IMUL3Q\t[$]46"
188 // 386:"IMUL3L\t[$]46"
189 // ppc64x:"MULLD\t[$]46"
190 return 15*n + 31*n // 46n
193 func MergeMuls2(n int) int {
194 // amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
195 // 386:"IMUL3L\t[$]23","ADDL\t[$]29"
196 // ppc64x/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
197 // ppc64x/power8:"MULLD\t[$]23","ADD\t[$]29"
198 return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
201 func MergeMuls3(a, n int) int {
202 // amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
203 // 386:"ADDL\t[$]19",-"IMULL\t[$]19"
204 // ppc64x:"ADD\t[$]19",-"MULLD\t[$]19"
205 return a*n + 19*n // (a+19)n
208 func MergeMuls4(n int) int {
209 // amd64:"IMUL3Q\t[$]14"
210 // 386:"IMUL3L\t[$]14"
211 // ppc64x:"MULLD\t[$]14"
212 return 23*n - 9*n // 14n
215 func MergeMuls5(a, n int) int {
216 // amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
217 // 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
218 // ppc64x:"ADD\t[$]-19",-"MULLD\t[$]19"
219 return a*n - 19*n // (a-19)n
226 func DivMemSrc(a []float64) {
227 // 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
228 // amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
232 func Pow2Divs(n1 uint, n2 int) (uint, int) {
233 // 386:"SHRL\t[$]5",-"DIVL"
234 // amd64:"SHRQ\t[$]5",-"DIVQ"
235 // arm:"SRL\t[$]5",-".*udiv"
236 // arm64:"LSR\t[$]5",-"UDIV"
238 a := n1 / 32 // unsigned
240 // amd64:"SARQ\t[$]6",-"IDIVQ"
241 // 386:"SARL\t[$]6",-"IDIVL"
242 // arm:"SRA\t[$]6",-".*udiv"
243 // arm64:"ASR\t[$]6",-"SDIV"
245 b := n2 / 64 // signed
250 // Check that constant divisions get turned into MULs
251 func ConstDivs(n1 uint, n2 int) (uint, int) {
252 // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
253 // 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
254 // arm64:`MOVD`,`UMULH`,-`DIV`
255 // arm:`MOVW`,`MUL`,-`.*udiv`
256 a := n1 / 17 // unsigned
258 // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
259 // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
260 // arm64:`SMULH`,-`DIV`
261 // arm:`MOVW`,`MUL`,-`.*udiv`
262 b := n2 / 17 // signed
267 func FloatDivs(a []float32) float32 {
268 // amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
269 // 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
273 func Pow2Mods(n1 uint, n2 int) (uint, int) {
274 // 386:"ANDL\t[$]31",-"DIVL"
275 // amd64:"ANDL\t[$]31",-"DIVQ"
276 // arm:"AND\t[$]31",-".*udiv"
277 // arm64:"AND\t[$]31",-"UDIV"
279 a := n1 % 32 // unsigned
281 // 386:"SHRL",-"IDIVL"
282 // amd64:"SHRQ",-"IDIVQ"
283 // arm:"SRA",-".*udiv"
284 // arm64:"ASR",-"REM"
286 b := n2 % 64 // signed
291 // Check that signed divisibility checks get converted to AND on low bits
292 func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
293 // 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
294 // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
295 // arm:"AND\t[$]63",-".*udiv",-"SRA"
296 // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
297 // ppc64x:"RLDICL",-"SRAD"
298 a := n1%64 == 0 // signed divisible
300 // 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
301 // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
302 // arm:"AND\t[$]63",-".*udiv",-"SRA"
303 // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
304 // ppc64x:"RLDICL",-"SRAD"
305 b := n2%64 != 0 // signed indivisible
310 // Check that constant modulo divs get turned into MULs
311 func ConstMods(n1 uint, n2 int) (uint, int) {
312 // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
313 // 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
314 // arm64:`MOVD`,`UMULH`,-`DIV`
315 // arm:`MOVW`,`MUL`,-`.*udiv`
316 a := n1 % 17 // unsigned
318 // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
319 // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
320 // arm64:`SMULH`,-`DIV`
321 // arm:`MOVW`,`MUL`,-`.*udiv`
322 b := n2 % 17 // signed
327 // Check that divisibility checks x%c==0 are converted to MULs and rotates
328 func DivisibleU(n uint) (bool, bool) {
329 // amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
330 // 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
331 // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
332 // arm:"MUL","CMP\t[$]715827882",-".*udiv"
333 // ppc64x:"MULLD","ROTL\t[$]63"
336 // amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
337 // 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
338 // arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
339 // arm:"MUL","CMP\t[$]226050910",-".*udiv"
340 // ppc64x:"MULLD",-"ROTL"
346 func Divisible(n int) (bool, bool) {
347 // amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
348 // 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
349 // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ADD\tR","ROR",-"DIV"
350 // arm:"MUL","ADD\t[$]715827882",-".*udiv"
351 // ppc64x/power8:"MULLD","ADD","ROTL\t[$]63"
352 // ppc64x/power9:"MADDLD","ROTL\t[$]63"
355 // amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
356 // 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
357 // arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
358 // arm:"MUL","ADD\t[$]113025455",-".*udiv"
359 // ppc64x/power8:"MULLD","ADD",-"ROTL"
360 // ppc64x/power9:"MADDLD",-"ROTL"
366 // Check that fix-up code is not generated for divisions where it has been proven that
367 // that the divisor is not -1 or that the dividend is > MinIntNN.
368 func NoFix64A(divr int64) (int64, int64) {
372 d /= divr // amd64:-"JMP"
373 e %= divr // amd64:-"JMP"
374 // The following statement is to avoid conflict between the above check
375 // and the normal JMP generated at the end of the block.
381 func NoFix64B(divd int64) (int64, int64) {
385 if divd > -9223372036854775808 {
386 d = divd / divr // amd64:-"JMP"
387 e = divd % divr // amd64:-"JMP"
393 func NoFix32A(divr int32) (int32, int32) {
408 func NoFix32B(divd int32) (int32, int32) {
412 if divd > -2147483648 {
424 func NoFix16A(divr int16) (int16, int16) {
439 func NoFix16B(divd int16) (int16, int16) {
455 // Check that len() and cap() calls divided by powers of two are
456 // optimized into shifts and ands
458 func LenDiv1(a []int) int {
460 // amd64:"SHRQ\t[$]10"
461 // arm64:"LSR\t[$]10",-"SDIV"
462 // arm:"SRL\t[$]10",-".*udiv"
463 // ppc64x:"SRD"\t[$]10"
467 func LenDiv2(s string) int {
469 // amd64:"SHRQ\t[$]11"
470 // arm64:"LSR\t[$]11",-"SDIV"
471 // arm:"SRL\t[$]11",-".*udiv"
472 // ppc64x:"SRD\t[$]11"
473 return len(s) / (4097 >> 1)
476 func LenMod1(a []int) int {
477 // 386:"ANDL\t[$]1023"
478 // amd64:"ANDL\t[$]1023"
479 // arm64:"AND\t[$]1023",-"SDIV"
480 // arm/6:"AND",-".*udiv"
481 // arm/7:"BFC",-".*udiv",-"AND"
486 func LenMod2(s string) int {
487 // 386:"ANDL\t[$]2047"
488 // amd64:"ANDL\t[$]2047"
489 // arm64:"AND\t[$]2047",-"SDIV"
490 // arm/6:"AND",-".*udiv"
491 // arm/7:"BFC",-".*udiv",-"AND"
493 return len(s) % (4097 >> 1)
496 func CapDiv(a []int) int {
498 // amd64:"SHRQ\t[$]12"
499 // arm64:"LSR\t[$]12",-"SDIV"
500 // arm:"SRL\t[$]12",-".*udiv"
501 // ppc64x:"SRD\t[$]12"
502 return cap(a) / ((1 << 11) + 2048)
505 func CapMod(a []int) int {
506 // 386:"ANDL\t[$]4095"
507 // amd64:"ANDL\t[$]4095"
508 // arm64:"AND\t[$]4095",-"SDIV"
509 // arm/6:"AND",-".*udiv"
510 // arm/7:"BFC",-".*udiv",-"AND"
512 return cap(a) % ((1 << 11) + 2048)
515 func AddMul(x int) int {
520 func MULA(a, b, c uint32) (uint32, uint32, uint32) {
521 // arm:`MULA`,-`MUL\s`
522 // arm64:`MADDW`,-`MULW`
524 // arm:`MULA`,-`MUL\s`
525 // arm64:`MADDW`,-`MULW`
527 // arm:`ADD`,-`MULA`,-`MUL\s`
528 // arm64:`ADD`,-`MADD`,-`MULW`
529 // ppc64x:`ADD`,-`MULLD`
534 func MULS(a, b, c uint32) (uint32, uint32, uint32) {
535 // arm/7:`MULS`,-`MUL\s`
536 // arm/6:`SUB`,`MUL\s`,-`MULS`
537 // arm64:`MSUBW`,-`MULW`
539 // arm/7:`MULS`,-`MUL\s`
540 // arm/6:`SUB`,`MUL\s`,-`MULS`
541 // arm64:`MSUBW`,-`MULW`
543 // arm/7:`SUB`,-`MULS`,-`MUL\s`
544 // arm64:`SUB`,-`MSUBW`,-`MULW`
545 // ppc64x:`SUB`,-`MULLD`
550 func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
555 // amd64:`SUBL.*-128`
560 // Divide -> shift rules usually require fixup for negative inputs.
561 // If the input is non-negative, make sure the fixup is eliminated.
562 func divInt(v int64) int64 {
566 // amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
570 // The reassociate rules "x - (z + C) -> (x - z) - C" and
571 // "(z + C) -x -> C + (z - x)" can optimize the following cases.
572 func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
573 // arm64:"SUB","ADD\t[$]2"
574 // ppc64x:"SUB","ADD\t[$]2"
575 r0 := (i0 + 3) - (j0 + 1)
576 // arm64:"SUB","SUB\t[$]4"
577 // ppc64x:"SUB","ADD\t[$]-4"
578 r1 := (i1 - 3) - (j1 + 1)
579 // arm64:"SUB","ADD\t[$]4"
580 // ppc64x:"SUB","ADD\t[$]4"
581 r2 := (i2 + 3) - (j2 - 1)
582 // arm64:"SUB","SUB\t[$]2"
583 // ppc64x:"SUB","ADD\t[$]-2"
584 r3 := (i3 - 3) - (j3 - 1)
585 return r0, r1, r2, r3
588 // The reassociate rules "x - (z + C) -> (x - z) - C" and
589 // "(C - z) - x -> C - (z + x)" can optimize the following cases.
590 func constantFold2(i0, j0, i1, j1 int) (int, int) {
591 // arm64:"ADD","MOVD\t[$]2","SUB"
592 // ppc64x: `SUBC\tR[0-9]+,\s[$]2,\sR`
593 r0 := (3 - i0) - (j0 + 1)
594 // arm64:"ADD","MOVD\t[$]4","SUB"
595 // ppc64x: `SUBC\tR[0-9]+,\s[$]4,\sR`
596 r1 := (3 - i1) - (j1 - 1)
600 func constantFold3(i, j int) int {
601 // arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
602 // ppc64x:"MULLD\t[$]30","MULLD"
603 r := (5 * i) * (6 * j)