]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: optimize absorbing InvertFlags into Noov comparisons for arm64
authoreric fang <eric.fang@arm.com>
Tue, 12 Sep 2023 03:01:33 +0000 (03:01 +0000)
committerEric Fang <eric.fang@arm.com>
Thu, 21 Sep 2023 02:36:06 +0000 (02:36 +0000)
Previously (LessThanNoov (InvertFlags x)) is lowered as:
CSET
CSET
BIC
With this CL it's lowered as:
CSET
CSEL
This saves one instruction.

Similarly (GreaterEqualNoov (InvertFlags x)) is now lowered as:
CSET
CSINC

$ benchstat old.bench new.bench
goos: linux
goarch: arm64
                       │  old.bench  │             new.bench              │
                       │   sec/op    │   sec/op     vs base               │
InvertLessThanNoov-160   2.249n ± 2%   2.190n ± 1%  -2.62% (p=0.003 n=10)

Change-Id: Idd8979b7f4fe466e74b1a201c4aba7f1b0cffb0b
Reviewed-on: https://go-review.googlesource.com/c/go/+/526237
Reviewed-by: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/ssa/_gen/ARM64.rules
src/cmd/compile/internal/ssa/bench_test.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/comparisons.go

index 1719312fbd47b820cd4c99a1cb780939a268addf..227ec5d6104d56c5501c3fd41d1c24e95b511709 100644 (file)
 (LessEqualF       (InvertFlags x)) => (GreaterEqualF x)
 (GreaterThanF     (InvertFlags x)) => (LessThanF x)
 (GreaterEqualF    (InvertFlags x)) => (LessEqualF x)
-(LessThanNoov     (InvertFlags x)) => (BIC (GreaterEqualNoov <typ.Bool> x) (Equal <typ.Bool> x))
-(GreaterEqualNoov (InvertFlags x)) => (OR (LessThanNoov <typ.Bool> x) (Equal <typ.Bool> x))
+(LessThanNoov     (InvertFlags x)) => (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x)
+(GreaterEqualNoov (InvertFlags x)) => (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x)
 
 // Boolean-generating instructions (NOTE: NOT all boolean Values) always
 // zero upper bit of the register; no need to zero-extend
index 09716675071ee0e11c609a1c90134569054727fe..1dc733bf5597277610e3865d4f266ee7292ecd22 100644 (file)
@@ -30,3 +30,21 @@ func BenchmarkPhioptPass(b *testing.B) {
                }
        }
 }
+
+type Point struct {
+       X, Y int
+}
+
+//go:noinline
+func sign(p1, p2, p3 Point) bool {
+       return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
+}
+
+func BenchmarkInvertLessThanNoov(b *testing.B) {
+       p1 := Point{1, 2}
+       p2 := Point{2, 3}
+       p3 := Point{3, 4}
+       for i := 0; i < b.N; i++ {
+               sign(p1, p2, p3)
+       }
+}
index caeed8b6b9e53555998e5b14739a5ce768b65996..d7752d3876c90401c6b4f2785b150d299d50af5a 100644 (file)
@@ -5974,18 +5974,19 @@ func rewriteValueARM64_OpARM64GreaterEqualNoov(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (GreaterEqualNoov (InvertFlags x))
-       // result: (OR (LessThanNoov <typ.Bool> x) (Equal <typ.Bool> x))
+       // result: (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x)
        for {
                if v_0.Op != OpARM64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpARM64OR)
+               v.reset(OpARM64CSINC)
+               v.AuxInt = opToAuxInt(OpARM64NotEqual)
                v0 := b.NewValue0(v.Pos, OpARM64LessThanNoov, typ.Bool)
                v0.AddArg(x)
-               v1 := b.NewValue0(v.Pos, OpARM64Equal, typ.Bool)
-               v1.AddArg(x)
-               v.AddArg2(v0, v1)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+               v1.AuxInt = int64ToAuxInt(0)
+               v.AddArg3(v0, v1, x)
                return true
        }
        return false
@@ -6709,18 +6710,17 @@ func rewriteValueARM64_OpARM64LessThanNoov(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (LessThanNoov (InvertFlags x))
-       // result: (BIC (GreaterEqualNoov <typ.Bool> x) (Equal <typ.Bool> x))
+       // result: (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x)
        for {
                if v_0.Op != OpARM64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpARM64BIC)
+               v.reset(OpARM64CSEL0)
+               v.AuxInt = opToAuxInt(OpARM64NotEqual)
                v0 := b.NewValue0(v.Pos, OpARM64GreaterEqualNoov, typ.Bool)
                v0.AddArg(x)
-               v1 := b.NewValue0(v.Pos, OpARM64Equal, typ.Bool)
-               v1.AddArg(x)
-               v.AddArg2(v0, v1)
+               v.AddArg2(v0, x)
                return true
        }
        return false
index 071b68facfce44d63d09c487ed1e97ca5f72ae82..4edf9303dfd29d93b96e7f62f2f762302709970a 100644 (file)
@@ -788,3 +788,16 @@ func cmp7() {
        cmp5[string]("") // force instantiation
        cmp6[string]("") // force instantiation
 }
+
+type Point struct {
+       X, Y int
+}
+
+// invertLessThanNoov checks (LessThanNoov (InvertFlags x)) is lowered as
+// CMP, CSET, CSEL instruction sequence. InvertFlags are only generated under
+// certain conditions, see canonLessThan, so if the code below does not
+// generate an InvertFlags OP, this check may fail.
+func invertLessThanNoov(p1, p2, p3 Point) bool {
+       // arm64:`CMP`,`CSET`,`CSEL`
+       return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
+}