]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: add late lower pass for last rules to run
authoreric fang <eric.fang@arm.com>
Wed, 17 Aug 2022 10:01:17 +0000 (10:01 +0000)
committerEric Fang <eric.fang@arm.com>
Wed, 5 Oct 2022 02:40:56 +0000 (02:40 +0000)
Usually optimization rules have corresponding priorities, some need to
be run first, some run next, and some run last, which produces the best
code. But currently our optimization rules have no priority, this CL
adds a late lower pass that runs those rules that need to be run at last,
such as split unreasonable constant folding. This pass can be seen as
the second round of the lower pass.

For example:
func foo(a, b uint64) uint64 {
        d := a+0x1234568
        d1 := b+0x1234568
        return d&d1
}
The code generated by the master branch:
0x0004 00004        ADD     $19088744, R0, R2 // movz+movk+add
0x0010 00016        ADD     $19088744, R1, R1 // movz+movk+add
0x001c 00028        AND     R1, R2, R0

This is because the current constant folding optimization rules do not
take into account the range of constants, causing the constant to be
loaded repeatedly. This CL splits these unreasonable constants folding
in the late lower pass. With this CL the generated code:
0x0004 00004        MOVD    $19088744, R2 // movz+movk
0x000c 00012        ADD     R0, R2, R3
0x0010 00016        ADD     R1, R2, R1
0x0014 00020        AND     R1, R3, R0

This CL also adds constant folding optimization for ADDS instruction.

In addition, in order not to introduce the codegen regression, an
optimization rule is added to change the addition of a negative number
into a subtraction of a positive number.

go1 benchmarks:
name                     old time/op    new time/op    delta
BinaryTree17-8              1.22s ± 1%     1.24s ± 0%  +1.56%  (p=0.008 n=5+5)
Fannkuch11-8                1.54s ± 0%     1.53s ± 0%  -0.69%  (p=0.016 n=4+5)
FmtFprintfEmpty-8          14.1ns ± 0%    14.1ns ± 0%    ~     (p=0.079 n=4+5)
FmtFprintfString-8         26.0ns ± 0%    26.1ns ± 0%  +0.23%  (p=0.008 n=5+5)
FmtFprintfInt-8            32.3ns ± 0%    32.9ns ± 1%  +1.72%  (p=0.008 n=5+5)
FmtFprintfIntInt-8         54.5ns ± 0%    55.5ns ± 0%  +1.83%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-8    61.5ns ± 0%    62.0ns ± 0%  +0.93%  (p=0.008 n=5+5)
FmtFprintfFloat-8          72.0ns ± 0%    73.6ns ± 0%  +2.24%  (p=0.008 n=5+5)
FmtManyArgs-8               221ns ± 0%     224ns ± 0%  +1.22%  (p=0.008 n=5+5)
GobDecode-8                1.91ms ± 0%    1.93ms ± 0%  +0.98%  (p=0.008 n=5+5)
GobEncode-8                1.40ms ± 1%    1.39ms ± 0%  -0.79%  (p=0.032 n=5+5)
Gzip-8                      115ms ± 0%     117ms ± 1%  +1.17%  (p=0.008 n=5+5)
Gunzip-8                   19.4ms ± 1%    19.3ms ± 0%  -0.71%  (p=0.016 n=5+4)
HTTPClientServer-8         27.0µs ± 0%    27.3µs ± 0%  +0.80%  (p=0.008 n=5+5)
JSONEncode-8               3.36ms ± 1%    3.33ms ± 0%    ~     (p=0.056 n=5+5)
JSONDecode-8               17.5ms ± 2%    17.8ms ± 0%  +1.71%  (p=0.016 n=5+4)
Mandelbrot200-8            2.29ms ± 0%    2.29ms ± 0%    ~     (p=0.151 n=5+5)
GoParse-8                  1.35ms ± 1%    1.36ms ± 1%    ~     (p=0.056 n=5+5)
RegexpMatchEasy0_32-8      24.5ns ± 0%    24.5ns ± 0%    ~     (p=0.444 n=4+5)
RegexpMatchEasy0_1K-8       131ns ±11%     118ns ± 6%    ~     (p=0.056 n=5+5)
RegexpMatchEasy1_32-8      22.9ns ± 0%    22.9ns ± 0%    ~     (p=0.905 n=4+5)
RegexpMatchEasy1_1K-8       126ns ± 0%     127ns ± 0%    ~     (p=0.063 n=4+5)
RegexpMatchMedium_32-8      486ns ± 5%     483ns ± 0%    ~     (p=0.381 n=5+4)
RegexpMatchMedium_1K-8     15.4µs ± 1%    15.5µs ± 0%    ~     (p=0.151 n=5+5)
RegexpMatchHard_32-8        687ns ± 0%     686ns ± 0%    ~     (p=0.103 n=5+5)
RegexpMatchHard_1K-8       20.7µs ± 0%    20.7µs ± 1%    ~     (p=0.151 n=5+5)
Revcomp-8                   175ms ± 2%     176ms ± 3%    ~     (p=1.000 n=5+5)
Template-8                 20.4ms ± 6%    20.1ms ± 2%    ~     (p=0.151 n=5+5)
TimeParse-8                 112ns ± 0%     113ns ± 0%  +0.97%  (p=0.016 n=5+4)
TimeFormat-8                156ns ± 0%     145ns ± 0%  -7.14%  (p=0.029 n=4+4)

Change-Id: I3ced26e89041f873ac989586514ccc5ee09f13da
Reviewed-on: https://go-review.googlesource.com/c/go/+/425134
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>

16 files changed:
src/cmd/compile/internal/ssa/_gen/ARM64.rules
src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules [new file with mode: 0644]
src/cmd/compile/internal/ssa/_gen/main.go
src/cmd/compile/internal/ssa/_gen/rulegen.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/lower.go
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewriteARM64latelower.go [new file with mode: 0644]
src/cmd/dist/buildtool.go
src/cmd/internal/obj/arm64/obj7.go
test/codegen/arithmetic.go
test/codegen/bits.go
test/codegen/comparisons.go
test/codegen/switch.go

index 3392644e7ddd713ffcf2015b078be4a40b9b2439..8b3c0e72f6e39583c66b06b38c1f7ba1f73f5414 100644 (file)
 (ROR x (MOVDconst [c])) => (RORconst x [c&63])
 (RORW x (MOVDconst [c])) => (RORWconst x [c&31])
 
+(ADDSflags x (MOVDconst [c]))  => (ADDSconstflags [c] x)
+
+(ADDconst [c] y) && c < 0 => (SUBconst [-c] y)
+
 // Canonicalize the order of arguments to comparisons - helps with CSE.
 ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x))
 
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
new file mode 100644 (file)
index 0000000..d0c2099
--- /dev/null
@@ -0,0 +1,21 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains rules used by the laterLower pass.
+// These are often the exact inverse of rules in ARM64.rules.
+
+(ADDconst [c] x) && !isARM64addcon(c)  => (ADD x (MOVDconst [c]))
+(SUBconst [c] x) && !isARM64addcon(c)  => (SUB x (MOVDconst [c]))
+(ANDconst [c] x) && !isARM64bitcon(uint64(c)) => (AND x (MOVDconst [c]))
+(ORconst  [c] x) && !isARM64bitcon(uint64(c))  => (OR  x (MOVDconst [c]))
+(XORconst [c] x) && !isARM64bitcon(uint64(c))  => (XOR x (MOVDconst [c]))
+(TSTconst [c] x) && !isARM64bitcon(uint64(c))  => (TST x (MOVDconst [c]))
+(TSTWconst [c] x) && !isARM64bitcon(uint64(c)|uint64(c)<<32)  => (TSTW x (MOVDconst [int64(c)]))
+
+(CMPconst [c] x) && !isARM64addcon(c)  => (CMP x (MOVDconst [c]))
+(CMPWconst [c] x) && !isARM64addcon(int64(c))  => (CMPW x (MOVDconst [int64(c)]))
+(CMNconst [c] x) && !isARM64addcon(c)  => (CMN x (MOVDconst [c]))
+(CMNWconst [c] x) && !isARM64addcon(int64(c))  => (CMNW x (MOVDconst [int64(c)]))
+
+(ADDSconstflags [c] x) && !isARM64addcon(c)  => (ADDSflags x (MOVDconst [c]))
index 2fbd94df45fa4b0b8dc321faba1cb3f156a59a8b..b3fbe96ba7fa9d699e20e64bb9f771eea898b0bb 100644 (file)
@@ -159,6 +159,7 @@ func main() {
                tasks = append(tasks, func() {
                        genRules(a)
                        genSplitLoadRules(a)
+                       genLateLowerRules(a)
                })
        }
        var wg sync.WaitGroup
index 20b35dc448a631040909d4f3f3fa312295e88bcd..aa038255e160399a6a5e9be89aeb721dd4f8878c 100644 (file)
@@ -89,6 +89,7 @@ func (r Rule) parse() (match, cond, result string) {
 
 func genRules(arch arch)          { genRulesSuffix(arch, "") }
 func genSplitLoadRules(arch arch) { genRulesSuffix(arch, "splitload") }
+func genLateLowerRules(arch arch) { genRulesSuffix(arch, "latelower") }
 
 func genRulesSuffix(arch arch, suff string) {
        // Open input file.
index 3be2cc7c3738e55abf82312db27f4f095f79c221..2eaef724454236e7b48ee10206d7e8c50e6d52e3 100644 (file)
@@ -486,6 +486,7 @@ var passes = [...]pass{
        {name: "insert resched checks", fn: insertLoopReschedChecks,
                disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
        {name: "lower", fn: lower, required: true},
+       {name: "late lower", fn: lateLower, required: true},
        {name: "addressing modes", fn: addressingModes, required: false},
        {name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
        {name: "lowered cse", fn: cse},
@@ -559,9 +560,14 @@ var passOrder = [...]constraint{
        {"critical", "regalloc"},
        // regalloc requires all the values in a block to be scheduled
        {"schedule", "regalloc"},
+       // the rules in late lower run after the general rules.
+       {"lower", "late lower"},
+       // late lower may generate some values that need to be CSEed.
+       {"late lower", "lowered cse"},
        // checkLower must run after lowering & subsequent dead code elim
        {"lower", "checkLower"},
        {"lowered deadcode", "checkLower"},
+       {"late lower", "checkLower"},
        // late nilcheck needs instructions to be scheduled.
        {"schedule", "late nilcheck"},
        // flagalloc needs instructions to be scheduled.
index d7a413268b6eb007946c8317eb0eac64305099c7..0ad2d94dce85b29abc216a63c478461e61d62e3d 100644 (file)
@@ -22,7 +22,8 @@ type Config struct {
        RegSize        int64  // 4 or 8; copy of cmd/internal/sys.Arch.RegSize
        Types          Types
        lowerBlock     blockRewriter  // lowering function
-       lowerValue     valueRewriter  // lowering function
+       lowerValue     valueRewriter  // lowering function, first round
+       lateLowerValue valueRewriter  // lowering function that needs to be run after the first round of lower function; only used on some architectures
        splitLoad      valueRewriter  // function for splitting merged load ops; only used on some architectures
        registers      []Register     // machine registers
        gpRegMask      regMask        // general purpose integer register mask
@@ -222,6 +223,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.RegSize = 8
                c.lowerBlock = rewriteBlockARM64
                c.lowerValue = rewriteValueARM64
+               c.lateLowerValue = rewriteValueARM64latelower
                c.registers = registersARM64[:]
                c.gpRegMask = gpRegMaskARM64
                c.fpRegMask = fpRegMaskARM64
index fb4b7484136a8745ec518ee4abd7d85dd20eda3f..b850ac52e3d4075816092e0d304fe640193f7c98 100644 (file)
@@ -10,6 +10,14 @@ func lower(f *Func) {
        applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue, removeDeadValues)
 }
 
+// lateLower applies those rules that need to be run after the general lower rules.
+func lateLower(f *Func) {
+       // repeat rewrites until we find no more rewrites
+       if f.Config.lateLowerValue != nil {
+               applyRewrite(f, f.Config.lowerBlock, f.Config.lateLowerValue, removeDeadValues)
+       }
+}
+
 // checkLower checks for unlowered opcodes and fails if we find one.
 func checkLower(f *Func) {
        // Needs to be a separate phase because it must run after both
index c95d8734564c490c7fb392c990b8bd06cc157c67..77ff4b100e2c6470b20663115613e4626d942728 100644 (file)
@@ -1994,3 +1994,52 @@ func canRotate(c *Config, bits int64) bool {
                return false
        }
 }
+
+// isARM64bitcon reports whether a constant can be encoded into a logical instruction.
+func isARM64bitcon(x uint64) bool {
+       if x == 1<<64-1 || x == 0 {
+               return false
+       }
+       // determine the period and sign-extend a unit to 64 bits
+       switch {
+       case x != x>>32|x<<32:
+               // period is 64
+               // nothing to do
+       case x != x>>16|x<<48:
+               // period is 32
+               x = uint64(int64(int32(x)))
+       case x != x>>8|x<<56:
+               // period is 16
+               x = uint64(int64(int16(x)))
+       case x != x>>4|x<<60:
+               // period is 8
+               x = uint64(int64(int8(x)))
+       default:
+               // period is 4 or 2, always true
+               // 0001, 0010, 0100, 1000 -- 0001 rotate
+               // 0011, 0110, 1100, 1001 -- 0011 rotate
+               // 0111, 1011, 1101, 1110 -- 0111 rotate
+               // 0101, 1010             -- 01   rotate, repeat
+               return true
+       }
+       return sequenceOfOnes(x) || sequenceOfOnes(^x)
+}
+
+// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
+func sequenceOfOnes(x uint64) bool {
+       y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
+       y += x
+       return (y-1)&y == 0
+}
+
+// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
+func isARM64addcon(v int64) bool {
+       /* uimm12 or uimm24? */
+       if v < 0 {
+               return false
+       }
+       if (v & 0xFFF) == 0 {
+               v >>= 12
+       }
+       return v <= 0xFFF
+}
index b026532df3d4d2785221772a39dc11f974eca234..f8b6252d6040d1e44708f80554f5f7d823f9ac7f 100644 (file)
@@ -11,6 +11,8 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64ADCSflags(v)
        case OpARM64ADD:
                return rewriteValueARM64_OpARM64ADD(v)
+       case OpARM64ADDSflags:
+               return rewriteValueARM64_OpARM64ADDSflags(v)
        case OpARM64ADDconst:
                return rewriteValueARM64_OpARM64ADDconst(v)
        case OpARM64ADDshiftLL:
@@ -1360,6 +1362,27 @@ func rewriteValueARM64_OpARM64ADD(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64ADDSflags(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ADDSflags x (MOVDconst [c]))
+       // result: (ADDSconstflags [c] x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpARM64MOVDconst {
+                               continue
+                       }
+                       c := auxIntToInt64(v_1.AuxInt)
+                       v.reset(OpARM64ADDSconstflags)
+                       v.AuxInt = int64ToAuxInt(c)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
        v_0 := v.Args[0]
        // match: (ADDconst [off1] (MOVDaddr [off2] {sym} ptr))
@@ -1382,6 +1405,20 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
                v.AddArg(ptr)
                return true
        }
+       // match: (ADDconst [c] y)
+       // cond: c < 0
+       // result: (SUBconst [-c] y)
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               y := v_0
+               if !(c < 0) {
+                       break
+               }
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64ToAuxInt(-c)
+               v.AddArg(y)
+               return true
+       }
        // match: (ADDconst [0] x)
        // result: x
        for {
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
new file mode 100644 (file)
index 0000000..d4890fe
--- /dev/null
@@ -0,0 +1,289 @@
+// Code generated from gen/ARM64latelower.rules; DO NOT EDIT.
+// generated with: cd gen; go run *.go
+
+package ssa
+
+func rewriteValueARM64latelower(v *Value) bool {
+       switch v.Op {
+       case OpARM64ADDSconstflags:
+               return rewriteValueARM64latelower_OpARM64ADDSconstflags(v)
+       case OpARM64ADDconst:
+               return rewriteValueARM64latelower_OpARM64ADDconst(v)
+       case OpARM64ANDconst:
+               return rewriteValueARM64latelower_OpARM64ANDconst(v)
+       case OpARM64CMNWconst:
+               return rewriteValueARM64latelower_OpARM64CMNWconst(v)
+       case OpARM64CMNconst:
+               return rewriteValueARM64latelower_OpARM64CMNconst(v)
+       case OpARM64CMPWconst:
+               return rewriteValueARM64latelower_OpARM64CMPWconst(v)
+       case OpARM64CMPconst:
+               return rewriteValueARM64latelower_OpARM64CMPconst(v)
+       case OpARM64ORconst:
+               return rewriteValueARM64latelower_OpARM64ORconst(v)
+       case OpARM64SUBconst:
+               return rewriteValueARM64latelower_OpARM64SUBconst(v)
+       case OpARM64TSTWconst:
+               return rewriteValueARM64latelower_OpARM64TSTWconst(v)
+       case OpARM64TSTconst:
+               return rewriteValueARM64latelower_OpARM64TSTconst(v)
+       case OpARM64XORconst:
+               return rewriteValueARM64latelower_OpARM64XORconst(v)
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64ADDSconstflags(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (ADDSconstflags [c] x)
+       // cond: !isARM64addcon(c)
+       // result: (ADDSflags x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(c)) {
+                       break
+               }
+               v.reset(OpARM64ADDSflags)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64ADDconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (ADDconst [c] x)
+       // cond: !isARM64addcon(c)
+       // result: (ADD x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(c)) {
+                       break
+               }
+               v.reset(OpARM64ADD)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64ANDconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (ANDconst [c] x)
+       // cond: !isARM64bitcon(uint64(c))
+       // result: (AND x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64bitcon(uint64(c))) {
+                       break
+               }
+               v.reset(OpARM64AND)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64CMNWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (CMNWconst [c] x)
+       // cond: !isARM64addcon(int64(c))
+       // result: (CMNW x (MOVDconst [int64(c)]))
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(int64(c))) {
+                       break
+               }
+               v.reset(OpARM64CMNW)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(int64(c))
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64CMNconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (CMNconst [c] x)
+       // cond: !isARM64addcon(c)
+       // result: (CMN x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(c)) {
+                       break
+               }
+               v.reset(OpARM64CMN)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64CMPWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (CMPWconst [c] x)
+       // cond: !isARM64addcon(int64(c))
+       // result: (CMPW x (MOVDconst [int64(c)]))
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(int64(c))) {
+                       break
+               }
+               v.reset(OpARM64CMPW)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(int64(c))
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64CMPconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (CMPconst [c] x)
+       // cond: !isARM64addcon(c)
+       // result: (CMP x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(c)) {
+                       break
+               }
+               v.reset(OpARM64CMP)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64ORconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (ORconst [c] x)
+       // cond: !isARM64bitcon(uint64(c))
+       // result: (OR x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64bitcon(uint64(c))) {
+                       break
+               }
+               v.reset(OpARM64OR)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64SUBconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (SUBconst [c] x)
+       // cond: !isARM64addcon(c)
+       // result: (SUB x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64addcon(c)) {
+                       break
+               }
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64TSTWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (TSTWconst [c] x)
+       // cond: !isARM64bitcon(uint64(c)|uint64(c)<<32)
+       // result: (TSTW x (MOVDconst [int64(c)]))
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(!isARM64bitcon(uint64(c) | uint64(c)<<32)) {
+                       break
+               }
+               v.reset(OpARM64TSTW)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(int64(c))
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64TSTconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (TSTconst [c] x)
+       // cond: !isARM64bitcon(uint64(c))
+       // result: (TST x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64bitcon(uint64(c))) {
+                       break
+               }
+               v.reset(OpARM64TST)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64latelower_OpARM64XORconst(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (XORconst [c] x)
+       // cond: !isARM64bitcon(uint64(c))
+       // result: (XOR x (MOVDconst [c]))
+       for {
+               c := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if !(!isARM64bitcon(uint64(c))) {
+                       break
+               }
+               v.reset(OpARM64XOR)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(c)
+               v.AddArg2(x, v0)
+               return true
+       }
+       return false
+}
+func rewriteBlockARM64latelower(b *Block) bool {
+       return false
+}
index 828e93aa4cf65f4de190561454df33e3a4cac410..eee738b43c8ecf56d89e35b5bd12af282aa2136d 100644 (file)
@@ -266,6 +266,7 @@ func isUnneededSSARewriteFile(srcFile, goArch string) (archCaps string, unneeded
        archCaps = fileArch
        fileArch = strings.ToLower(fileArch)
        fileArch = strings.TrimSuffix(fileArch, "splitload")
+       fileArch = strings.TrimSuffix(fileArch, "latelower")
        if fileArch == goArch {
                return "", false
        }
index 7d20beb5d61694022ff8ebb1593b3efa3fedaa82..6c2cb63e9b4467ed41eb4e1b4fdd45ed607e03d9 100644 (file)
@@ -40,13 +40,6 @@ import (
        "math"
 )
 
-var complements = []obj.As{
-       AADD:  ASUB,
-       AADDW: ASUBW,
-       ASUB:  AADD,
-       ASUBW: AADDW,
-}
-
 // zrReplace is the set of instructions for which $0 in the From operand
 // should be replaced with REGZERO.
 var zrReplace = map[obj.As]bool{
@@ -375,21 +368,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                break
        }
 
-       // Rewrite negative immediates as positive immediates with
-       // complementary instruction.
-       switch p.As {
-       case AADD, ASUB:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       case AADDW, ASUBW:
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-                       p.From.Offset = -p.From.Offset
-                       p.As = complements[p.As]
-               }
-       }
-
        if c.ctxt.Flag_dynlink {
                c.rewriteToUseGot(p)
        }
index 3fb9ce646b81c371968ed3062f8f1467a6ef7b17..327be24db8125f7ea9aaa98556c62e40130e10f3 100644 (file)
@@ -319,7 +319,7 @@ func ConstMods(n1 uint, n2 int) (uint, int) {
 func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
        // amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
        // 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
-       // arm64:"MOVD\t[$]-6148914691236517205","MUL","ROR",-"DIV"
+       // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
        // arm:"MUL","CMP\t[$]715827882",-".*udiv"
        // ppc64:"MULLD","ROTL\t[$]63"
        // ppc64le:"MULLD","ROTL\t[$]63"
@@ -335,7 +335,7 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 
        // amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
        // 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
-       // arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
+       // arm64:"MUL","ADD\tR","ROR",-"DIV"
        // arm:"MUL","ADD\t[$]715827882",-".*udiv"
        // ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
        // ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
@@ -345,7 +345,7 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 
        // amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
        // 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
-       // arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
+       // arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
        // arm:"MUL","ADD\t[$]113025455",-".*udiv"
        // ppc64/power8:"MULLD","ADD",-"ROTL"
        // ppc64/power9:"MADDLD",-"ROTL"
index e7826b8e658dc89d6ae97516b6f8a6339796f8b1..4f70627c258cedb31a5441d03f3eb8d5341e21f8 100644 (file)
@@ -363,3 +363,14 @@ func issue48467(x, y uint64) uint64 {
        d, borrow := bits.Sub64(x, y, 0)
        return x - d&(-borrow)
 }
+
+func foldConst(x, y uint64) uint64 {
+       // arm64: "ADDS\t[$]7",-"MOVD\t[$]7"
+       d, b := bits.Add64(x, 7, 0)
+       return b & d
+}
+
+func foldConstOutOfRange(a uint64) uint64 {
+       // arm64: "MOVD\t[$]19088744",-"ADD\t[$]19088744"
+       return a + 0x1234568
+}
index 4b66044804418e59561ef49c4ee1d47e962aa64e..5a54a960bc629ee32fac464af1030a989be29a66 100644 (file)
@@ -19,7 +19,7 @@ import "unsafe"
 
 func CompareString1(s string) bool {
        // amd64:`CMPW\t\(.*\), [$]`
-       // arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
+       // arm64:`MOVHU\t\(.*\), [R]`,`MOVD\t[$]`,`CMPW\tR`
        // ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
        // s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
        return s == "xx"
@@ -327,12 +327,12 @@ func CmpToZero_ex1(a int64, e int32) int {
                return 3
        }
 
-       // arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+       // arm64:`SUB`,`TBZ`
        if a-11 >= 0 {
                return 4
        }
 
-       // arm64:`CMP|CMN`,-`(ADD|SUB)`,`BEQ`,`(BMI|BPL)`
+       // arm64:`SUB`,`CMP`,`BGT`
        if a-19 > 0 {
                return 4
        }
@@ -355,7 +355,7 @@ func CmpToZero_ex1(a int64, e int32) int {
                return 7
        }
 
-       // arm64:`CMPW|CMNW`,`(BMI|BPL)`
+       // arm64:`SUB`,`TBNZ`
        // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
        if e-11 >= 0 {
                return 8
index c3c24e2e11249757c3e50d4226db2cce887fd1d9..603e0befbbdc752df4a3ba2aee2c5ab3ef8654ce 100644 (file)
@@ -80,19 +80,19 @@ func mimetype(ext string) string {
        // arm64: `MOVB\s1\(R.*\), R.*$`, `CMPW\s\$104, R.*$`, -`cmpstring`
        switch ext {
        // amd64: `CMPL\s\(.*\), \$1836345390$`
-       // arm64: `CMPW\s\$1836345390, R.*$`
+       // arm64: `MOVD\s\$1836345390`, `CMPW\sR.*, R.*$`
        case ".htm":
                return "A"
        // amd64: `CMPL\s\(.*\), \$1953457454$`
-       // arm64: `CMPW\s\$1953457454, R.*$`
+       // arm64: `MOVD\s\$1953457454`, `CMPW\sR.*, R.*$`
        case ".eot":
                return "B"
        // amd64: `CMPL\s\(.*\), \$1735815982$`
-       // arm64: `CMPW\s\$1735815982, R.*$`
+       // arm64: `MOVD\s\$1735815982`, `CMPW\sR.*, R.*$`
        case ".svg":
                return "C"
        // amd64: `CMPL\s\(.*\), \$1718907950$`
-       // arm64: `CMPW\s\$1718907950, R.*$`
+       // arm64: `MOVD\s\$1718907950`, `CMPW\sR.*, R.*$`
        case ".ttf":
                return "D"
        default: