]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: prioritize non-CALL struct member comparisons
authorDerek Parker <parkerderek86@gmail.com>
Mon, 8 May 2023 21:28:43 +0000 (21:28 +0000)
committerGopher Robot <gobot@golang.org>
Wed, 24 May 2023 21:55:14 +0000 (21:55 +0000)
This patch optimizes reflectdata.geneq to pick apart structs in array
equality and prioritize non-CALL comparisons over those which involve
a runtime function call. This is similar to how arrays of strings
operate currently. Instead of looping over the entire array of structs
once, if there are any comparisons which involve a runtime function
call we instead loop twice. The first loop is all simple, quick
comparisons. If no inequality is found in the first loop the second loop
calls runtime functions for larger memory comparison, which is more
expensive.

For the benchmarks added in this change:

Old:

```
goos: linux
goarch: amd64
pkg: cmd/compile/internal/reflectdata
cpu: AMD Ryzen 9 3950X 16-Core Processor
BenchmarkEqArrayOfStructsEq
BenchmarkEqArrayOfStructsEq-32            797196              1497 ns/op
BenchmarkEqArrayOfStructsEq-32            758332              1581 ns/op
BenchmarkEqArrayOfStructsEq-32            764871              1599 ns/op
BenchmarkEqArrayOfStructsEq-32            760706              1558 ns/op
BenchmarkEqArrayOfStructsEq-32            763112              1476 ns/op
BenchmarkEqArrayOfStructsEq-32            747696              1547 ns/op
BenchmarkEqArrayOfStructsEq-32            756526              1562 ns/op
BenchmarkEqArrayOfStructsEq-32            768829              1486 ns/op
BenchmarkEqArrayOfStructsEq-32            764248              1477 ns/op
BenchmarkEqArrayOfStructsEq-32            752767              1545 ns/op
BenchmarkEqArrayOfStructsNotEq
BenchmarkEqArrayOfStructsNotEq-32         757194              1542 ns/op
BenchmarkEqArrayOfStructsNotEq-32         748942              1552 ns/op
BenchmarkEqArrayOfStructsNotEq-32         766687              1554 ns/op
BenchmarkEqArrayOfStructsNotEq-32         732069              1541 ns/op
BenchmarkEqArrayOfStructsNotEq-32         759163              1576 ns/op
BenchmarkEqArrayOfStructsNotEq-32         796402              1629 ns/op
BenchmarkEqArrayOfStructsNotEq-32         726610              1570 ns/op
BenchmarkEqArrayOfStructsNotEq-32         735770              1584 ns/op
BenchmarkEqArrayOfStructsNotEq-32         745255              1610 ns/op
BenchmarkEqArrayOfStructsNotEq-32         743872              1591 ns/op
PASS
ok      cmd/compile/internal/reflectdata        35.446s
```

New:

```
goos: linux
goarch: amd64
pkg: cmd/compile/internal/reflectdata
cpu: AMD Ryzen 9 3950X 16-Core Processor
BenchmarkEqArrayOfStructsEq
BenchmarkEqArrayOfStructsEq-32            618379              1827 ns/op
BenchmarkEqArrayOfStructsEq-32            619368              1922 ns/op
BenchmarkEqArrayOfStructsEq-32            616023              1910 ns/op
BenchmarkEqArrayOfStructsEq-32            617575              1905 ns/op
BenchmarkEqArrayOfStructsEq-32            610399              1889 ns/op
BenchmarkEqArrayOfStructsEq-32            615378              1823 ns/op
BenchmarkEqArrayOfStructsEq-32            613732              1883 ns/op
BenchmarkEqArrayOfStructsEq-32            613924              1894 ns/op
BenchmarkEqArrayOfStructsEq-32            657799              1876 ns/op
BenchmarkEqArrayOfStructsEq-32            665580              1873 ns/op
BenchmarkEqArrayOfStructsNotEq
BenchmarkEqArrayOfStructsNotEq-32        1834915               627.4 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1806370               660.5 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1828075               625.5 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1819741               641.6 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1813128               632.3 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1865250               643.7 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1828617               632.8 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1862748               633.6 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1825432               638.7 ns/op
BenchmarkEqArrayOfStructsNotEq-32        1804382               628.8 ns/op
PASS
ok      cmd/compile/internal/reflectdata        36.571s
```

Benchstat comparison:

```
name                      old time/op  new time/op  delta
EqArrayOfStructsEq-32     1.53µs ± 4%  1.88µs ± 3%  +22.66%  (p=0.000 n=10+10)
EqArrayOfStructsNotEq-32  1.57µs ± 3%  0.64µs ± 4%  -59.59%  (p=0.000 n=10+10)
```

So, the equal case is a bit slower (unrolling the loop helps with that),
but the non-equal case is now much faster.

Change-Id: I05d776456c79c48a3d6d74b18c45246e58ffbea6
GitHub-Last-Rev: f57ee07d053ec4269a6d7d9109c845d8c862cba1
GitHub-Pull-Request: golang/go#59409
Reviewed-on: https://go-review.googlesource.com/c/go/+/481895
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
src/cmd/compile/internal/compare/compare.go
src/cmd/compile/internal/reflectdata/alg.go
src/cmd/compile/internal/reflectdata/alg_test.go
src/cmd/compile/internal/walk/compare.go
test/fixedbugs/issue8606.go

index 0e78013cf3a934c9b105ae1d06594ad98633025b..1674065556401f257a87142d95cb2e559b64c212 100644 (file)
@@ -166,7 +166,10 @@ func calculateCostForType(t *types.Type) int64 {
 // It works by building a list of boolean conditions to satisfy.
 // Conditions must be evaluated in the returned order and
 // properly short-circuited by the caller.
-func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
+// The first return value is the flattened list of conditions,
+// the second value is a boolean indicating whether any of the
+// comparisons could panic.
+func EqStruct(t *types.Type, np, nq ir.Node) ([]ir.Node, bool) {
        // The conditions are a list-of-lists. Conditions are reorderable
        // within each inner list. The outer lists must be evaluated in order.
        var conds [][]ir.Node
@@ -187,9 +190,11 @@ func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
                        continue
                }
 
+               typeCanPanic := EqCanPanic(f.Type)
+
                // Compare non-memory fields with field equality.
                if !IsRegularMemory(f.Type) {
-                       if EqCanPanic(f.Type) {
+                       if typeCanPanic {
                                // Enforce ordering by starting a new set of reorderable conditions.
                                conds = append(conds, []ir.Node{})
                        }
@@ -203,7 +208,7 @@ func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
                        default:
                                and(ir.NewBinaryExpr(base.Pos, ir.OEQ, p, q))
                        }
-                       if EqCanPanic(f.Type) {
+                       if typeCanPanic {
                                // Also enforce ordering after something that can panic.
                                conds = append(conds, []ir.Node{})
                        }
@@ -238,7 +243,7 @@ func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
                })
                flatConds = append(flatConds, c...)
        }
-       return flatConds
+       return flatConds, len(conds) > 1
 }
 
 // EqString returns the nodes
index 10240b2f1fb120833fd183c2c392a41403b35c71..69de685ca0555268d179dad6354e5bb203bb52e4 100644 (file)
@@ -14,6 +14,7 @@ import (
        "cmd/compile/internal/typecheck"
        "cmd/compile/internal/types"
        "cmd/internal/obj"
+       "cmd/internal/src"
 )
 
 // AlgType returns the fixed-width AMEMxx variants instead of the general
@@ -507,7 +508,66 @@ func eqFunc(t *types.Type) *ir.Func {
                                // p[i] == q[i]
                                return ir.NewBinaryExpr(base.Pos, ir.OEQ, pi, qi)
                        })
-               // TODO: pick apart structs, do them piecemeal too
+               case types.TSTRUCT:
+                       isCall := func(n ir.Node) bool {
+                               return n.Op() == ir.OCALL || n.Op() == ir.OCALLFUNC
+                       }
+                       var expr ir.Node
+                       var hasCallExprs bool
+                       allCallExprs := true
+                       and := func(cond ir.Node) {
+                               if expr == nil {
+                                       expr = cond
+                               } else {
+                                       expr = ir.NewLogicalExpr(base.Pos, ir.OANDAND, expr, cond)
+                               }
+                       }
+
+                       var tmpPos src.XPos
+                       pi := ir.NewIndexExpr(tmpPos, np, ir.NewInt(tmpPos, 0))
+                       pi.SetBounded(true)
+                       pi.SetType(t.Elem())
+                       qi := ir.NewIndexExpr(tmpPos, nq, ir.NewInt(tmpPos, 0))
+                       qi.SetBounded(true)
+                       qi.SetType(t.Elem())
+                       flatConds, canPanic := compare.EqStruct(t.Elem(), pi, qi)
+                       for _, c := range flatConds {
+                               if isCall(c) {
+                                       hasCallExprs = true
+                               } else {
+                                       allCallExprs = false
+                               }
+                       }
+                       if !hasCallExprs || allCallExprs || canPanic {
+                               checkAll(1, true, func(pi, qi ir.Node) ir.Node {
+                                       // p[i] == q[i]
+                                       return ir.NewBinaryExpr(base.Pos, ir.OEQ, pi, qi)
+                               })
+                       } else {
+                               checkAll(4, false, func(pi, qi ir.Node) ir.Node {
+                                       expr = nil
+                                       flatConds, _ := compare.EqStruct(t.Elem(), pi, qi)
+                                       if len(flatConds) == 0 {
+                                               return ir.NewBool(base.Pos, true)
+                                       }
+                                       for _, c := range flatConds {
+                                               if !isCall(c) {
+                                                       and(c)
+                                               }
+                                       }
+                                       return expr
+                               })
+                               checkAll(2, true, func(pi, qi ir.Node) ir.Node {
+                                       expr = nil
+                                       flatConds, _ := compare.EqStruct(t.Elem(), pi, qi)
+                                       for _, c := range flatConds {
+                                               if isCall(c) {
+                                                       and(c)
+                                               }
+                                       }
+                                       return expr
+                               })
+                       }
                default:
                        checkAll(1, true, func(pi, qi ir.Node) ir.Node {
                                // p[i] == q[i]
@@ -516,7 +576,7 @@ func eqFunc(t *types.Type) *ir.Func {
                }
 
        case types.TSTRUCT:
-               flatConds := compare.EqStruct(t, np, nq)
+               flatConds, _ := compare.EqStruct(t, np, nq)
                if len(flatConds) == 0 {
                        fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(base.Pos, true)))
                } else {
index a1fc8c590cb309dd9999e9bcdfb3cd653a859a08..38fb974f6197ba39502a24a0806b14b83c0f7394 100644 (file)
@@ -4,7 +4,9 @@
 
 package reflectdata_test
 
-import "testing"
+import (
+       "testing"
+)
 
 func BenchmarkEqArrayOfStrings5(b *testing.B) {
        var a [5]string
@@ -75,6 +77,56 @@ func BenchmarkEqArrayOfFloats1024(b *testing.B) {
        }
 }
 
+func BenchmarkEqArrayOfStructsEq(b *testing.B) {
+       type T2 struct {
+               a string
+               b int
+       }
+       const size = 1024
+       var (
+               str1 = "foobar"
+
+               a [size]T2
+               c [size]T2
+       )
+
+       for i := 0; i < size; i++ {
+               a[i].a = str1
+               c[i].a = str1
+       }
+
+       b.ResetTimer()
+       for j := 0; j < b.N; j++ {
+               _ = a == c
+       }
+}
+
+func BenchmarkEqArrayOfStructsNotEq(b *testing.B) {
+       type T2 struct {
+               a string
+               b int
+       }
+       const size = 1024
+       var (
+               str1 = "foobar"
+               str2 = "foobarz"
+
+               a [size]T2
+               c [size]T2
+       )
+
+       for i := 0; i < size; i++ {
+               a[i].a = str1
+               c[i].a = str1
+       }
+       c[len(c)-1].a = str2
+
+       b.ResetTimer()
+       for j := 0; j < b.N; j++ {
+               _ = a == c
+       }
+}
+
 const size = 16
 
 type T1 struct {
index 58d6b574969370b442dbe7d3e981955899a7307c..625cfecee0c8feadaeec0421fe35ddd82c49af82 100644 (file)
@@ -228,7 +228,7 @@ func walkCompare(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
        cmpl = safeExpr(cmpl, init)
        cmpr = safeExpr(cmpr, init)
        if t.IsStruct() {
-               conds := compare.EqStruct(t, cmpl, cmpr)
+               conds, _ := compare.EqStruct(t, cmpl, cmpr)
                if n.Op() == ir.OEQ {
                        for _, cond := range conds {
                                and(cond)
index 8c850696954746f911d1a8b2e3dc4e8a9df43978..6bac02a1da71f271dc6b16c610431c6c2a8fe08b 100644 (file)
@@ -30,7 +30,17 @@ func main() {
                s string
                j interface{}
        }
+       type S3 struct {
+               f any
+               i int
+       }
+       type S4 struct {
+               a [1000]byte
+               b any
+       }
        b := []byte{1}
+       s1 := S3{func() {}, 0}
+       s2 := S3{func() {}, 1}
 
        for _, test := range []struct {
                panic bool
@@ -64,6 +74,9 @@ func main() {
                {false, T3{s: "foo", j: b}, T3{s: "bar", j: b}},
                {true, T3{i: b, s: "fooz"}, T3{i: b, s: "bar"}},
                {false, T3{s: "fooz", j: b}, T3{s: "bar", j: b}},
+               {true, A{s1, s2}, A{s2, s1}},
+               {true, s1, s2},
+               {false, S4{[1000]byte{0}, func() {}}, S4{[1000]byte{1}, func() {}}},
        } {
                f := func() {
                        defer func() {