]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: strengthen phiopt pass
authorfanzha02 <fannie.zhang@arm.com>
Tue, 28 Jul 2020 02:29:06 +0000 (10:29 +0800)
committerfannie zhang <Fannie.Zhang@arm.com>
Mon, 29 Mar 2021 05:50:11 +0000 (05:50 +0000)
The current phiopt pass just transforms the following code
  x := false
  if b { x = true}
into
  x = b

But we find code in runtime.atoi like this:
  neg := false
  if s[0] == '-' {
    neg = true
    s = s[1:]
  }

The current phiopt pass does not covert it into code like:
  neg := s[0] == '-'
  if neg { s = s[1:] }

Therefore, this patch strengthens the phiopt pass so that the
boolean Phi value "neg" can be replaced with a copy of control
value "s[0] == '-'", thereby using "cmp+cset" instead of a branch.

But in some cases even replacing the boolean Phis cannot eliminate
this branch. In the following case, this patch replaces "d" with a
copy of "a<0", but the regalloc pass will insert the "Load {c}"
value into an empty block to split the live ranges, which causes
the branch to not be eliminated.

For example:
  func test(a, b, c int) (bool, int) {
    d := false
    if (a<0) {
      if (b<0) {
        c = c+1
      }
      d = true
    }
    return d, c
  }

The optimized assembly code:
  MOVD "".a(FP), R0
  TBZ $63, R0, 48
  MOVD "".c+16(FP), R1
  ADD $1, R1, R2
  MOVD "".b+8(FP), R3
  CMP ZR, R3
  CSEL LT, R2, R1, R1
  CMP ZR, R0
  CSET LT, R0
  MOVB R0, "".~r3+24(FP)
  MOVD R1, "".~r4+32(FP)
  RET (R30)
  MOVD "".c+16(FP), R1
  JMP 28

The benchmark:

name          old time/op            new time/op            delta
pkg:cmd/compile/internal/ssa goos:linux goarch:arm64
PhioptPass  117783.250000ns +- 1%  117219.111111ns +- 1%   ~  (p=0.074  n=8+9)

Statistical data from compilecmp tool:

compilecmp local/master -> HEAD
local/master (a826f7dc45): debug/dwarf: support DW_FORM_rnglistx aka formRnglistx
HEAD (e57e003c10): cmd/compile/internal/ssa: strengthen phiopt pass

benchstat -geomean  /tmp/2516644532 /tmp/1075915815
completed 50 of 50, estimated time remaining 0s (ETA 7:10PM)
name                      old time/op       new time/op       delta
Template                        554ms _ 3%        553ms _ 3%    ~     (p=0.986 n=49+48)
Unicode                         252ms _ 4%        249ms _ 4%  -1.33%  (p=0.002 n=47+49)
GoTypes                         3.16s _ 3%        3.18s _ 3%  +0.77%  (p=0.022 n=44+48)
Compiler                        257ms _ 4%        258ms _ 4%    ~     (p=0.121 n=50+49)
SSA                             24.2s _ 4%        24.2s _ 5%    ~     (p=0.694 n=49+50)
Flate                           338ms _ 4%        338ms _ 4%    ~     (p=0.592 n=43+46)
GoParser                        506ms _ 3%        507ms _ 3%    ~     (p=0.942 n=49+50)
Reflect                         1.37s _ 4%        1.37s _ 5%    ~     (p=0.408 n=50+50)
Tar                             486ms _ 3%        487ms _ 4%    ~     (p=0.911 n=47+50)
XML                             619ms _ 2%        619ms _ 3%    ~     (p=0.368 n=46+48)
LinkCompiler                    1.29s _31%        1.32s _23%    ~     (p=0.306 n=49+44)
ExternalLinkCompiler            3.39s _10%        3.36s _ 6%    ~     (p=0.311 n=48+46)
LinkWithoutDebugCompiler        846ms _37%        793ms _24%  -6.29%  (p=0.040 n=50+49)
[Geo mean]                      974ms             971ms       -0.36%

name                      old user-time/op  new user-time/op  delta
Template                        910ms _12%        893ms _13%    ~     (p=0.098 n=49+49)
Unicode                         495ms _28%        492ms _18%    ~     (p=0.562 n=50+46)
GoTypes                         4.42s _15%        4.39s _13%    ~     (p=0.684 n=49+50)
Compiler                        419ms _22%        422ms _16%    ~     (p=0.579 n=48+50)
SSA                             36.5s _ 7%        36.6s _ 8%    ~     (p=0.465 n=50+47)
Flate                           521ms _21%        523ms _16%    ~     (p=0.889 n=50+47)
GoParser                        810ms _12%        792ms _15%    ~     (p=0.149 n=50+50)
Reflect                         1.98s _13%        2.02s _13%    ~     (p=0.144 n=47+50)
Tar                             826ms _15%        806ms _19%    ~     (p=0.115 n=49+49)
XML                             988ms _14%       1003ms _14%    ~     (p=0.179 n=50+50)
LinkCompiler                    1.79s _ 8%        1.84s _11%  +2.81%  (p=0.001 n=49+49)
ExternalLinkCompiler            3.69s _ 4%        3.71s _ 3%    ~     (p=0.261 n=50+50)
LinkWithoutDebugCompiler        838ms _10%        827ms _11%    ~     (p=0.323 n=50+48)
[Geo mean]                      1.44s             1.44s       -0.05%

name                      old alloc/op      new alloc/op      delta
Template                       39.0MB _ 1%       39.0MB _ 1%    ~     (p=0.445 n=50+49)
Unicode                        28.5MB _ 0%       28.5MB _ 0%    ~     (p=0.460 n=50+50)
GoTypes                         169MB _ 1%        169MB _ 1%    ~     (p=0.092 n=48+50)
Compiler                       23.4MB _ 1%       23.4MB _ 1%  -0.19%  (p=0.032 n=50+49)
SSA                            1.54GB _ 0%       1.55GB _ 1%  +0.14%  (p=0.001 n=50+50)
Flate                          23.8MB _ 1%       23.8MB _ 2%    ~     (p=0.702 n=49+49)
GoParser                       35.4MB _ 1%       35.4MB _ 1%    ~     (p=0.786 n=50+50)
Reflect                        85.3MB _ 1%       85.3MB _ 1%    ~     (p=0.298 n=50+50)
Tar                            34.6MB _ 2%       34.6MB _ 2%    ~     (p=0.683 n=50+50)
XML                            44.5MB _ 3%       44.0MB _ 2%  -1.05%  (p=0.000 n=50+46)
LinkCompiler                    136MB _ 0%        136MB _ 0%  +0.01%  (p=0.005 n=50+50)
ExternalLinkCompiler            128MB _ 0%        128MB _ 0%    ~     (p=0.179 n=50+50)
LinkWithoutDebugCompiler       84.3MB _ 0%       84.3MB _ 0%  +0.01%  (p=0.006 n=50+50)
[Geo mean]                     70.7MB            70.6MB       -0.07%

name                      old allocs/op     new allocs/op     delta
Template                         410k _ 0%         410k _ 0%    ~     (p=0.606 n=48+49)
Unicode                          310k _ 0%         310k _ 0%    ~     (p=0.674 n=50+50)
GoTypes                         1.81M _ 0%        1.81M _ 0%    ~     (p=0.674 n=50+50)
Compiler                         202k _ 0%         202k _ 0%  +0.02%  (p=0.046 n=50+50)
SSA                             16.3M _ 0%        16.3M _ 0%  +0.10%  (p=0.000 n=50+50)
Flate                            244k _ 0%         244k _ 0%    ~     (p=0.834 n=49+50)
GoParser                         380k _ 0%         380k _ 0%    ~     (p=0.410 n=50+50)
Reflect                         1.08M _ 0%        1.08M _ 0%    ~     (p=0.782 n=48+50)
Tar                              368k _ 0%         368k _ 0%    ~     (p=0.585 n=50+49)
XML                              453k _ 0%         453k _ 0%  -0.01%  (p=0.025 n=49+49)
LinkCompiler                     713k _ 0%         713k _ 0%  +0.01%  (p=0.044 n=50+50)
ExternalLinkCompiler             794k _ 0%         794k _ 0%  +0.01%  (p=0.000 n=50+49)
LinkWithoutDebugCompiler         251k _ 0%         251k _ 0%    ~     (p=0.092 n=47+50)
[Geo mean]                       615k              615k       +0.01%

name                      old maxRSS/op     new maxRSS/op     delta
Template                        37.0M _ 4%        37.2M _ 3%    ~     (p=0.062 n=48+48)
Unicode                         36.9M _ 5%        37.3M _ 4%  +1.10%  (p=0.021 n=50+47)
GoTypes                         94.3M _ 3%        94.9M _ 4%  +0.69%  (p=0.022 n=45+46)
Compiler                        33.4M _ 3%        33.4M _ 5%    ~     (p=0.964 n=49+50)
SSA                              741M _ 3%         738M _ 3%    ~     (p=0.164 n=50+50)
Flate                           28.5M _ 6%        28.8M _ 4%  +1.07%  (p=0.009 n=50+49)
GoParser                        35.0M _ 3%        35.3M _ 4%  +0.83%  (p=0.010 n=50+48)
Reflect                         57.2M _ 6%        57.1M _ 4%    ~     (p=0.815 n=50+49)
Tar                             34.9M _ 3%        35.0M _ 3%    ~     (p=0.134 n=49+48)
XML                             39.5M _ 5%        40.0M _ 3%  +1.35%  (p=0.001 n=50+48)
LinkCompiler                     220M _ 2%         220M _ 2%    ~     (p=0.547 n=49+48)
ExternalLinkCompiler             235M _ 2%         236M _ 2%    ~     (p=0.538 n=47+44)
LinkWithoutDebugCompiler         179M _ 1%         179M _ 1%    ~     (p=0.775 n=50+50)
[Geo mean]                      74.9M             75.2M       +0.43%

name                      old text-bytes    new text-bytes    delta
HelloSize                       784kB _ 0%        784kB _ 0%  +0.01%  (p=0.000 n=50+50)

name                      old data-bytes    new data-bytes    delta
HelloSize                      13.1kB _ 0%       13.1kB _ 0%    ~     (all equal)

name                      old bss-bytes     new bss-bytes     delta
HelloSize                       206kB _ 0%        206kB _ 0%    ~     (all equal)

name                      old exe-bytes     new exe-bytes     delta
HelloSize                      1.28MB _ 0%       1.28MB _ 0%  +0.00%  (p=0.000 n=50+50)

file      before    after     _       %
addr2line 4006300   4004484   -1816   -0.045%
api       5029956   5029324   -632    -0.013%
asm       4936311   4939423   +3112   +0.063%
buildid   2595059   2595291   +232    +0.009%
cgo       4401029   4397333   -3696   -0.084%
compile   22246677  22246863  +186    +0.001%
cover     4443825   4443065   -760    -0.017%
dist      3366078   3365838   -240    -0.007%
doc       3776391   3776615   +224    +0.006%
fix       3218800   3218648   -152    -0.005%
link      6365321   6365345   +24     +0.000%
nm        3923625   3923857   +232    +0.006%
objdump   4295569   4295041   -528    -0.012%
pack      2390745   2389217   -1528   -0.064%
pprof     12870094  12866942  -3152   -0.024%
test2json 2587265   2587073   -192    -0.007%
trace     9612629   9613981   +1352   +0.014%
vet       6791008   6792072   +1064   +0.016%
total     106856682 106850412 -6270   -0.006%

Update #37608

Change-Id: Ic6206b22fd1faf570be9fd3c2511aa6c4ce38cdb
Reviewed-on: https://go-review.googlesource.com/c/go/+/252937
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/bench_test.go [new file with mode: 0644]
src/cmd/compile/internal/ssa/phiopt.go
src/cmd/compile/internal/ssa/sparsetree.go
test/phiopt.go

diff --git a/src/cmd/compile/internal/ssa/bench_test.go b/src/cmd/compile/internal/ssa/bench_test.go
new file mode 100644 (file)
index 0000000..0971667
--- /dev/null
@@ -0,0 +1,32 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package ssa
+
+import (
+       "math/rand"
+       "testing"
+)
+
+var d int
+
+//go:noinline
+func fn(a, b int) bool {
+       c := false
+       if a > 0 {
+               if b < 0 {
+                       d = d + 1
+               }
+               c = true
+       }
+       return c
+}
+
+func BenchmarkPhioptPass(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               a := rand.Perm(i/10 + 10)
+               for i := 1; i < len(a)/2; i++ {
+                       fn(a[i]-a[i-1], a[i+len(a)/2-2]-a[i+len(a)/2-1])
+               }
+       }
+}
index db7b02275cf934c62d66a9e9434bc61d041fff38..ee583d022522127638f569d32349edf89de3e33e 100644 (file)
@@ -46,7 +46,6 @@ func phiopt(f *Func) {
                        continue
                }
                // b0 is the if block giving the boolean value.
-
                // reverse is the predecessor from which the truth value comes.
                var reverse int
                if b0.Succs[0].b == pb0 && b0.Succs[1].b == pb1 {
@@ -120,6 +119,141 @@ func phiopt(f *Func) {
                        }
                }
        }
+       // strengthen phi optimization.
+       // Main use case is to transform:
+       //   x := false
+       //   if c {
+       //     x = true
+       //     ...
+       //   }
+       // into
+       //   x := c
+       //   if x { ... }
+       //
+       // For example, in SSA code a case appears as
+       // b0
+       //   If c -> b, sb0
+       // sb0
+       //   If d -> sd0, sd1
+       // sd1
+       //   ...
+       // sd0
+       //   Plain -> b
+       // b
+       //   x = (OpPhi (ConstBool [true]) (ConstBool [false]))
+       //
+       // In this case we can also replace x with a copy of c.
+       //
+       // The optimization idea:
+       // 1. block b has a phi value x, x = OpPhi (ConstBool [true]) (ConstBool [false]),
+       //    and len(b.Preds) is equal to 2.
+       // 2. find the common dominator(b0) of the predecessors(pb0, pb1) of block b, and the
+       //    dominator(b0) is a If block.
+       //    Special case: one of the predecessors(pb0 or pb1) is the dominator(b0).
+       // 3. the successors(sb0, sb1) of the dominator need to dominate the predecessors(pb0, pb1)
+       //    of block b respectively.
+       // 4. replace this boolean Phi based on dominator block.
+       //
+       //     b0(pb0)            b0(pb1)          b0
+       //    |  \               /  |             /  \
+       //    |  sb1           sb0  |           sb0  sb1
+       //    |  ...           ...  |           ...   ...
+       //    |  pb1           pb0  |           pb0  pb1
+       //    |  /               \  |            \   /
+       //     b                   b               b
+       //
+       var lca *lcaRange
+       for _, b := range f.Blocks {
+               if len(b.Preds) != 2 || len(b.Values) == 0 {
+                       // TODO: handle more than 2 predecessors, e.g. a || b || c.
+                       continue
+               }
+
+               for _, v := range b.Values {
+                       // find a phi value v = OpPhi (ConstBool [true]) (ConstBool [false]).
+                       // TODO: v = OpPhi (ConstBool [true]) (Arg <bool> {value})
+                       if v.Op != OpPhi {
+                               continue
+                       }
+                       if v.Args[0].Op != OpConstBool || v.Args[1].Op != OpConstBool {
+                               continue
+                       }
+                       if v.Args[0].AuxInt == v.Args[1].AuxInt {
+                               continue
+                       }
+
+                       pb0 := b.Preds[0].b
+                       pb1 := b.Preds[1].b
+                       if pb0.Kind == BlockIf && pb0 == sdom.Parent(b) {
+                               // special case: pb0 is the dominator block b0.
+                               //     b0(pb0)
+                               //    |  \
+                               //    |  sb1
+                               //    |  ...
+                               //    |  pb1
+                               //    |  /
+                               //     b
+                               // if another successor sb1 of b0(pb0) dominates pb1, do replace.
+                               ei := b.Preds[0].i
+                               sb1 := pb0.Succs[1-ei].b
+                               if sdom.IsAncestorEq(sb1, pb1) {
+                                       convertPhi(pb0, v, ei)
+                                       break
+                               }
+                       } else if pb1.Kind == BlockIf && pb1 == sdom.Parent(b) {
+                               // special case: pb1 is the dominator block b0.
+                               //       b0(pb1)
+                               //     /   |
+                               //    sb0  |
+                               //    ...  |
+                               //    pb0  |
+                               //      \  |
+                               //        b
+                               // if another successor sb0 of b0(pb0) dominates pb0, do replace.
+                               ei := b.Preds[1].i
+                               sb0 := pb1.Succs[1-ei].b
+                               if sdom.IsAncestorEq(sb0, pb0) {
+                                       convertPhi(pb1, v, ei-1)
+                                       break
+                               }
+                       } else {
+                               //      b0
+                               //     /   \
+                               //    sb0  sb1
+                               //    ...  ...
+                               //    pb0  pb1
+                               //      \   /
+                               //        b
+                               //
+                               // Build data structure for fast least-common-ancestor queries.
+                               if lca == nil {
+                                       lca = makeLCArange(f)
+                               }
+                               b0 := lca.find(pb0, pb1)
+                               if b0.Kind != BlockIf {
+                                       break
+                               }
+                               sb0 := b0.Succs[0].b
+                               sb1 := b0.Succs[1].b
+                               var reverse int
+                               if sdom.IsAncestorEq(sb0, pb0) && sdom.IsAncestorEq(sb1, pb1) {
+                                       reverse = 0
+                               } else if sdom.IsAncestorEq(sb1, pb0) && sdom.IsAncestorEq(sb0, pb1) {
+                                       reverse = 1
+                               } else {
+                                       break
+                               }
+                               if len(sb0.Preds) != 1 || len(sb1.Preds) != 1 {
+                                       // we can not replace phi value x in the following case.
+                                       //   if gp == nil || sp < lo { x = true}
+                                       //   if a || b { x = true }
+                                       // so the if statement can only have one condition.
+                                       break
+                               }
+                               convertPhi(b0, v, reverse)
+                       }
+               }
+       }
 }
 
 func phioptint(v *Value, b0 *Block, reverse int) {
@@ -174,3 +308,16 @@ func phioptint(v *Value, b0 *Block, reverse int) {
                f.Warnl(v.Block.Pos, "converted OpPhi bool -> int%d", v.Type.Size()*8)
        }
 }
+
+// b is the If block giving the boolean value.
+// v is the phi value v = (OpPhi (ConstBool [true]) (ConstBool [false])).
+// reverse is the predecessor from which the truth value comes.
+func convertPhi(b *Block, v *Value, reverse int) {
+       f := b.Func
+       ops := [2]Op{OpNot, OpCopy}
+       v.reset(ops[v.Args[reverse].AuxInt])
+       v.AddArg(b.Controls[0])
+       if f.pass.debug > 0 {
+               f.Warnl(b.Pos, "converted OpPhi to %v", v.Op)
+       }
+}
index 1be20b2cdaedca53759d6eae787d8839f5b3e81f..be914c8644de42a0d70fc1474bb267a5fa5ed1d2 100644 (file)
@@ -178,6 +178,12 @@ func (t SparseTree) Child(x *Block) *Block {
        return t[x.ID].child
 }
 
+// Parent returns the parent of x in the dominator tree, or
+// nil if x is the function's entry.
+func (t SparseTree) Parent(x *Block) *Block {
+       return t[x.ID].parent
+}
+
 // isAncestorEq reports whether x is an ancestor of or equal to y.
 func (t SparseTree) IsAncestorEq(x, y *Block) bool {
        if x == y {
index 98a7b75d10911c1fd811e0919af5d70d75d85837..e04373eb72371e7979707fd2cc1629a8a177c762 100644 (file)
@@ -1,4 +1,4 @@
-// +build amd64 s390x
+// +build amd64 s390x arm64
 // errorcheck -0 -d=ssa/phiopt/debug=3
 
 // Copyright 2016 The Go Authors. All rights reserved.
@@ -104,5 +104,29 @@ func f7and(a bool, b bool) bool {
        return a && b // ERROR "converted OpPhi to AndB$"
 }
 
+//go:noinline
+func f8(s string) (string, bool) {
+       neg := false
+       if s[0] == '-' {    // ERROR "converted OpPhi to Copy$"
+               neg = true
+               s = s[1:]
+       }
+       return s, neg
+}
+
+var d int
+
+//go:noinline
+func f9(a, b int) bool {
+       c := false
+       if a < 0 {          // ERROR "converted OpPhi to Copy$"
+               if b < 0 {
+                       d = d + 1
+               }
+               c = true
+       }
+       return c
+}
+
 func main() {
 }