// Find n values that are ORed together with the above op.
a := make([]*Value, 0, 8)
- v := root
- for int64(len(a)) < n {
- if v.Args[0].Op == orOp {
- a = append(a, v.Args[1])
- v = v.Args[0]
- } else if v.Args[1].Op == orOp {
- a = append(a, v.Args[0])
- v = v.Args[1]
- } else if int64(len(a)) == n-2 {
- a = append(a, v.Args[0])
- a = append(a, v.Args[1])
- v = nil
- } else {
+ a = append(a, root)
+ for i := 0; i < len(a) && int64(len(a)) < n; i++ {
+ v := a[i]
+ if v.Uses != 1 && v != root {
+ // Something in this subtree is used somewhere else.
return false
}
+ if v.Op == orOp {
+ a[i] = v.Args[0]
+ a = append(a, v.Args[1])
+ i--
+ }
+ }
+ if int64(len(a)) != n {
+ return false
}
- tail := v // Value to OR in beyond the ones we're working with (or nil if none).
// Check that the first entry to see what ops we're looking for.
// All the entries should be of the form shift(extend(load)), maybe with no shift.
- v = a[0]
+ v := a[0]
if v.Op == shiftOp {
v = v.Args[0]
}
v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
}
- // Install. If there's a tail, make the root (OR v tail).
- // If not, do (Copy v).
- if tail != nil {
- root.SetArg(0, v)
- root.SetArg(1, tail)
- } else {
- root.reset(OpCopy)
- root.AddArg(v)
- }
+ // Install with (Copy v).
+ root.reset(OpCopy)
+ root.AddArg(v)
// Clobber the loads, just to prevent additional work being done on
// subtrees (which are now unreachable).
return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
}
+// Some tougher cases for the memcombine pass.
+
+func reassoc_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
+}
+
+func extrashift_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
+ return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
+
+}
+
+func outoforder_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
+}
+
+func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
+ // amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
+ return x | binary.LittleEndian.Uint32(b) | y
+ // TODO: Note that
+ // x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
+ // doesn't work because it associates in a way that memcombine can't detect it.
+}
+
// Check load combining across function calls.
func fcall_byte(a [2]byte) [2]byte {