]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: make memcombine pass a bit more robust to reassociation of exprs
authorKeith Randall <khr@golang.org>
Tue, 9 May 2023 20:25:40 +0000 (13:25 -0700)
committerKeith Randall <khr@golang.org>
Tue, 16 May 2023 19:13:26 +0000 (19:13 +0000)
Be more liberal about expanding the OR tree. Handle any tree shape
instead of a fully left or right associative tree.

Also remove tail feature, it isn't ever needed.

Change-Id: If16bebef94b952a604d6069e9be3d9129994cb6f
Reviewed-on: https://go-review.googlesource.com/c/go/+/494056
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Ryan Berger <ryanbberger@gmail.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/ssa/memcombine.go
test/codegen/memcombine.go

index fc0b665b3478c291810474d2edecb6c696a80994..b2c5fe3abf8728ee8ed021387789f088a51dd87e 100644 (file)
@@ -141,27 +141,26 @@ func combineLoads(root *Value, n int64) bool {
 
        // Find n values that are ORed together with the above op.
        a := make([]*Value, 0, 8)
-       v := root
-       for int64(len(a)) < n {
-               if v.Args[0].Op == orOp {
-                       a = append(a, v.Args[1])
-                       v = v.Args[0]
-               } else if v.Args[1].Op == orOp {
-                       a = append(a, v.Args[0])
-                       v = v.Args[1]
-               } else if int64(len(a)) == n-2 {
-                       a = append(a, v.Args[0])
-                       a = append(a, v.Args[1])
-                       v = nil
-               } else {
+       a = append(a, root)
+       for i := 0; i < len(a) && int64(len(a)) < n; i++ {
+               v := a[i]
+               if v.Uses != 1 && v != root {
+                       // Something in this subtree is used somewhere else.
                        return false
                }
+               if v.Op == orOp {
+                       a[i] = v.Args[0]
+                       a = append(a, v.Args[1])
+                       i--
+               }
+       }
+       if int64(len(a)) != n {
+               return false
        }
-       tail := v // Value to OR in beyond the ones we're working with (or nil if none).
 
        // Check that the first entry to see what ops we're looking for.
        // All the entries should be of the form shift(extend(load)), maybe with no shift.
-       v = a[0]
+       v := a[0]
        if v.Op == shiftOp {
                v = v.Args[0]
        }
@@ -317,15 +316,9 @@ func combineLoads(root *Value, n int64) bool {
                v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
        }
 
-       // Install. If there's a tail, make the root (OR v tail).
-       // If not, do (Copy v).
-       if tail != nil {
-               root.SetArg(0, v)
-               root.SetArg(1, tail)
-       } else {
-               root.reset(OpCopy)
-               root.AddArg(v)
-       }
+       // Install with (Copy v).
+       root.reset(OpCopy)
+       root.AddArg(v)
 
        // Clobber the loads, just to prevent additional work being done on
        // subtrees (which are now unreachable).
index c7a2c7e5ace04ea22f976195e22793a85752a3c1..0d1c390dfc33076cbd16742081231458a451199c 100644 (file)
@@ -338,6 +338,32 @@ func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
        return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
 }
 
+// Some tougher cases for the memcombine pass.
+
+func reassoc_load_uint32(b []byte) uint32 {
+       // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+       return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
+}
+
+func extrashift_load_uint32(b []byte) uint32 {
+       // amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
+       return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
+
+}
+
+func outoforder_load_uint32(b []byte) uint32 {
+       // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+       return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
+}
+
+func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
+       // amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
+       return x | binary.LittleEndian.Uint32(b) | y
+       // TODO: Note that
+       //   x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
+       // doesn't work because it associates in a way that memcombine can't detect it.
+}
+
 // Check load combining across function calls.
 
 func fcall_byte(a [2]byte) [2]byte {