cmd/compile: new inline heuristic for struct compares

author Derek Parker <parkerderek86@gmail.com>

Tue, 30 Aug 2022 21:48:17 +0000 (21:48 +0000)

committer Gopher Robot <gobot@golang.org>

Fri, 2 Sep 2022 17:48:22 +0000 (17:48 +0000)
author Derek Parker <parkerderek86@gmail.com>
Tue, 30 Aug 2022 21:48:17 +0000 (21:48 +0000)
committer Gopher Robot <gobot@golang.org>
Fri, 2 Sep 2022 17:48:22 +0000 (17:48 +0000)
diff --git a/src/cmd/compile/internal/compare/compare.go b/src/cmd/compile/internal/compare/compare.go

index c0017b1b729347e3f293424113660f5194724ac7..512ad25237805a06669bd033339b17c62e434780 100644 (file)
--- a/src/cmd/compile/internal/compare/compare.go
+++ b/src/cmd/compile/internal/compare/compare.go
@@ -79,10 +79,93 @@ func EqCanPanic(t *types.Type) bool {
         }
  }
  
+// EqStructCost returns the cost of an equality comparison of two structs.
+//
+// The cost is determined using an algorithm which takes into consideration
+// the size of the registers in the current architecture and the size of the
+// memory-only fields in the struct.
+func EqStructCost(t *types.Type) int64 {
+       cost := int64(0)
+
+       for i, fields := 0, t.FieldSlice(); i < len(fields); {
+               f := fields[i]
+
+               // Skip blank-named fields.
+               if f.Sym.IsBlank() {
+                       i++
+                       continue
+               }
+
+               n, _, next := eqStructFieldCost(t, i)
+
+               cost += n
+               i = next
+       }
+
+       return cost
+}
+
+// eqStructFieldCost returns the cost of an equality comparison of two struct fields.
+// t is the parent struct type, and i is the index of the field in the parent struct type.
+// eqStructFieldCost may compute the cost of several adjacent fields at once. It returns
+// the cost, the size of the set of fields it computed the cost for (in bytes), and the
+// index of the first field not part of the set of fields for which the cost
+// has already been calculated.
+func eqStructFieldCost(t *types.Type, i int) (int64, int64, int) {
+       var (
+               cost    = int64(0)
+               regSize = int64(types.RegSize)
+
+               size int64
+               next int
+       )
+
+       if base.Ctxt.Arch.CanMergeLoads {
+               // If we can merge adjacent loads then we can calculate the cost of the
+               // comparison using the size of the memory run and the size of the registers.
+               size, next = Memrun(t, i)
+               cost = size / regSize
+               if size%regSize != 0 {
+                       cost++
+               }
+               return cost, size, next
+       }
+
+       // If we cannot merge adjacent loads then we have to use the size of the
+       // field and take into account the type to determine how many loads and compares
+       // are needed.
+       ft := t.Field(i).Type
+       size = ft.Size()
+       next = i + 1
+
+       return calculateCostForType(ft), size, next
+}
+
+func calculateCostForType(t *types.Type) int64 {
+       var cost int64
+       switch t.Kind() {
+       case types.TSTRUCT:
+               return EqStructCost(t)
+       case types.TSLICE:
+               // Slices are not comparable.
+               base.Fatalf("eqStructFieldCost: unexpected slice type")
+       case types.TARRAY:
+               elemCost := calculateCostForType(t.Elem())
+               cost = t.NumElem() * elemCost
+       case types.TSTRING, types.TINTER, types.TCOMPLEX64, types.TCOMPLEX128:
+               cost = 2
+       case types.TINT64, types.TUINT64:
+               cost = 8 / int64(types.RegSize)
+       default:
+               cost = 1
+       }
+       return cost
+}
+
  // EqStruct compares two structs np and nq for equality.
  // It works by building a list of boolean conditions to satisfy.
  // Conditions must be evaluated in the returned order and
-// properly short circuited by the caller.
+// properly short-circuited by the caller.
  func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
         // The conditions are a list-of-lists. Conditions are reorderable
         // within each inner list. The outer lists must be evaluated in order.
@@ -128,18 +211,15 @@ func EqStruct(t *types.Type, np, nq ir.Node) []ir.Node {
                         continue
                 }
  
-               // Find maximal length run of memory-only fields.
-               size, next := Memrun(t, i)
-
-               // TODO(rsc): All the calls to newname are wrong for
-               // cross-package unexported fields.
-               if s := fields[i:next]; len(s) <= 2 {
-                       // Two or fewer fields: use plain field equality.
+               cost, size, next := eqStructFieldCost(t, i)
+               if cost <= 4 {
+                       // Cost of 4 or less: use plain field equality.
+                       s := fields[i:next]
                         for _, f := range s {
                                 and(eqfield(np, nq, ir.OEQ, f.Sym))
                         }
                 } else {
-                       // More than two fields: use memequal.
+                       // Higher cost: use memequal.
                         cc := eqmem(np, nq, f.Sym, size)
                         and(cc)
                 }
diff --git a/src/cmd/compile/internal/compare/compare_test.go b/src/cmd/compile/internal/compare/compare_test.go

new file mode 100644 (file)

index 0000000..85c11bf
--- /dev/null
+++ b/src/cmd/compile/internal/compare/compare_test.go
@@ -0,0 +1,178 @@
+package compare
+
+import (
+       "cmd/compile/internal/base"
+       "cmd/compile/internal/typecheck"
+       "cmd/compile/internal/types"
+       "cmd/internal/obj"
+       "cmd/internal/src"
+       "cmd/internal/sys"
+       "testing"
+)
+
+type typefn func() *types.Type
+
+func init() {
+       // These are the few constants that need to be initialized in order to use
+       // the types package without using the typecheck package by calling
+       // typecheck.InitUniverse() (the normal way to initialize the types package).
+       types.PtrSize = 8
+       types.RegSize = 8
+       types.MaxWidth = 1 << 50
+       typecheck.InitUniverse()
+       base.Ctxt = &obj.Link{Arch: &obj.LinkArch{Arch: &sys.Arch{Alignment: 1, CanMergeLoads: true}}}
+}
+
+func TestEqStructCost(t *testing.T) {
+       newByteField := func(parent *types.Type, offset int64) *types.Field {
+               f := types.NewField(src.XPos{}, parent.Sym(), types.ByteType)
+               f.Offset = offset
+               return f
+       }
+       newArrayField := func(parent *types.Type, offset int64, len int64, kind types.Kind) *types.Field {
+               f := types.NewField(src.XPos{}, parent.Sym(), types.NewArray(types.Types[kind], len))
+               // Call Type.Size here to force the size calculation to be done. If not done here the size returned later is incorrect.
+               f.Type.Size()
+               f.Offset = offset
+               return f
+       }
+       newField := func(parent *types.Type, offset int64, kind types.Kind) *types.Field {
+               f := types.NewField(src.XPos{}, parent.Sym(), types.Types[kind])
+               f.Offset = offset
+               return f
+       }
+       tt := []struct {
+               name             string
+               cost             int64
+               nonMergeLoadCost int64
+               tfn              typefn
+       }{
+               {"struct without fields", 0, 0,
+                       func() *types.Type {
+                               return types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                       }},
+               {"struct with 1 byte field", 1, 1,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := []*types.Field{
+                                       newByteField(parent, 0),
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 8 byte fields", 1, 8,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 8)
+                               for i := range fields {
+                                       fields[i] = newByteField(parent, int64(i))
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 16 byte fields", 2, 16,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 16)
+                               for i := range fields {
+                                       fields[i] = newByteField(parent, int64(i))
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 32 byte fields", 4, 32,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 32)
+                               for i := range fields {
+                                       fields[i] = newByteField(parent, int64(i))
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 2 int32 fields", 1, 2,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 2)
+                               for i := range fields {
+                                       fields[i] = newField(parent, int64(i*4), types.TINT32)
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 2 int32 fields and 1 int64", 2, 3,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 3)
+                               fields[0] = newField(parent, int64(0), types.TINT32)
+                               fields[1] = newField(parent, int64(4), types.TINT32)
+                               fields[2] = newField(parent, int64(8), types.TINT64)
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 1 int field and 1 string", 3, 3,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 2)
+                               fields[0] = newField(parent, int64(0), types.TINT64)
+                               fields[1] = newField(parent, int64(8), types.TSTRING)
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 2 strings", 4, 4,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := make([]*types.Field, 2)
+                               fields[0] = newField(parent, int64(0), types.TSTRING)
+                               fields[1] = newField(parent, int64(8), types.TSTRING)
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with 1 large byte array field", 26, 101,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := []*types.Field{
+                                       newArrayField(parent, 0, 101, types.TUINT16),
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+               {"struct with string array field", 4, 4,
+                       func() *types.Type {
+                               parent := types.NewStruct(types.NewPkg("main", ""), []*types.Field{})
+                               fields := []*types.Field{
+                                       newArrayField(parent, 0, 2, types.TSTRING),
+                               }
+                               parent.SetFields(fields)
+                               return parent
+                       },
+               },
+       }
+
+       for _, tc := range tt {
+               t.Run(tc.name, func(t *testing.T) {
+                       want := tc.cost
+                       base.Ctxt.Arch.CanMergeLoads = true
+                       actual := EqStructCost(tc.tfn())
+                       if actual != want {
+                               t.Errorf("CanMergeLoads=true EqStructCost(%v) = %d, want %d", tc.tfn, actual, want)
+                       }
+
+                       base.Ctxt.Arch.CanMergeLoads = false
+                       want = tc.nonMergeLoadCost
+                       actual = EqStructCost(tc.tfn())
+                       if actual != want {
+                               t.Errorf("CanMergeLoads=false EqStructCost(%v) = %d, want %d", tc.tfn, actual, want)
+                       }
+               })
+       }
+}
diff --git a/src/cmd/compile/internal/reflectdata/alg_test.go b/src/cmd/compile/internal/reflectdata/alg_test.go

index 1e57b913fdab3a52624bf14197ae9aa02482aa63..a1fc8c590cb309dd9999e9bcdfb3cd653a859a08 100644 (file)
--- a/src/cmd/compile/internal/reflectdata/alg_test.go
+++ b/src/cmd/compile/internal/reflectdata/alg_test.go
@@ -74,3 +74,22 @@ func BenchmarkEqArrayOfFloats1024(b *testing.B) {
                 _ = a == c
         }
  }
+
+const size = 16
+
+type T1 struct {
+       a [size]byte
+}
+
+func BenchmarkEqStruct(b *testing.B) {
+       x, y := T1{}, T1{}
+       x.a = [size]byte{1, 2, 3, 4, 5, 6, 7, 8}
+       y.a = [size]byte{2, 3, 4, 5, 6, 7, 8, 9}
+
+       for i := 0; i < b.N; i++ {
+               f := x == y
+               if f {
+                       println("hello")
+               }
+       }
+}
diff --git a/src/cmd/compile/internal/walk/compare.go b/src/cmd/compile/internal/walk/compare.go

index 8a8f9b6d93beb389c104623282a6b84256ff9fe7..fe9c5d883339ac81160d0039ef9fc3486eee3374 100644 (file)
--- a/src/cmd/compile/internal/walk/compare.go
+++ b/src/cmd/compile/internal/walk/compare.go
@@ -167,7 +167,7 @@ func walkCompare(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
                 // We can compare several elements at once with 2/4/8 byte integer compares
                 inline = t.NumElem() <= 1 || (types.IsSimple[t.Elem().Kind()] && (t.NumElem() <= 4 || t.Elem().Size()*t.NumElem() <= maxcmpsize))
         case types.TSTRUCT:
-               inline = t.NumComponents(types.IgnoreBlankFields) <= 4
+               inline = compare.EqStructCost(t) <= 4
         }
  
         cmpl := n.X
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go

index 7e9d4745f1c2696785781ac480324f876696b310..b1dba2482f20cf1a2a3504f3bd58c42bdcd3baa3 100644 (file)
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -84,6 +84,51 @@ func CompareArray6(a, b unsafe.Pointer) bool {
         return *((*[4]byte)(a)) != *((*[4]byte)(b))
  }
  
+// Check that some structs generate 2/4/8 byte compares.
+
+type T1 struct {
+       a [8]byte
+}
+
+func CompareStruct1(s1, s2 T1) bool {
+       // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+       // amd64:-`CALL`
+       return s1 == s2
+}
+
+type T2 struct {
+       a [16]byte
+}
+
+func CompareStruct2(s1, s2 T2) bool {
+       // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+       // amd64:-`CALL`
+       return s1 == s2
+}
+
+// Assert that a memequal call is still generated when
+// inlining would increase binary size too much.
+
+type T3 struct {
+       a [24]byte
+}
+
+func CompareStruct3(s1, s2 T3) bool {
+       // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+       // amd64:`CALL`
+       return s1 == s2
+}
+
+type T4 struct {
+       a [32]byte
+}
+
+func CompareStruct4(s1, s2 T4) bool {
+       // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+       // amd64:`CALL`
+       return s1 == s2
+}
+
  // -------------- //
  //    Ordering    //
  // -------------- //
author	Derek Parker <parkerderek86@gmail.com>
	Tue, 30 Aug 2022 21:48:17 +0000 (21:48 +0000)
committer	Gopher Robot <gobot@golang.org>
	Fri, 2 Sep 2022 17:48:22 +0000 (17:48 +0000)
src/cmd/compile/internal/compare/compare.go		patch \| blob \| history
src/cmd/compile/internal/compare/compare_test.go	[new file with mode: 0644]	patch \| blob
src/cmd/compile/internal/reflectdata/alg_test.go		patch \| blob \| history
src/cmd/compile/internal/walk/compare.go		patch \| blob \| history
test/codegen/comparisons.go		patch \| blob \| history