var p *[2]uint32 = ...
p[0] = 0
p[1] = 0
When we combine these two 32-bit stores into a single 64-bit store,
use the line number of the first store, not the second one.
This differs from the default behavior because usually with the combining
that the compiler does, we use the line number of the last instruction
in the combo (e.g. load+add, we use the line number of the add).
This is the same behavior that gcc does in C (picking the line
number of the first of a set of combined stores).
Change-Id: Ie70bf6151755322d33ecd50e4d9caf62f7881784
Reviewed-on: https://go-review.googlesource.com/c/go/+/521678
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
&& sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off()))
&& a.Val() == 0
&& c.Val() == 0
+ && setPos(v, x.Pos)
&& clobber(x)
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
(MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem))
&& sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off()))
&& a.Val() == 0
&& c.Val() == 0
+ && setPos(v, x.Pos)
&& clobber(x)
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
-(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i] ptr mem)
-(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem)
+(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i] ptr mem)
+(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem)
// strip off fractional word move
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(int64(i)-4)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STM2 [i-4] {s} p w0 w1 mem)
(MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-8)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STM3 [i-8] {s} p w0 w1 w2 mem)
(MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-12)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
(STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-8)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
// 64-bit
&& p.Op != OpSB
&& x.Uses == 1
&& is20Bit(int64(i)-8)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STMG2 [i-8] {s} p w0 w1 mem)
(MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-16)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STMG3 [i-16] {s} p w0 w1 w2 mem)
(MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-24)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
(STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& x.Uses == 1
&& is20Bit(int64(i)-16)
+ && setPos(v, x.Pos)
&& clobber(x)
=> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
}
// Before we sort, grab the memory arg the result should have.
mem := a[n-1].store.Args[2]
+ // Also grab position of first store (last in array = first in memory order).
+ pos := a[n-1].store.Pos
// Sort stores in increasing address order.
sort.Slice(a, func(i, j int) bool {
v := a[i].store
if v == root {
v.Aux = cv.Type // widen store type
+ v.Pos = pos
v.SetArg(0, ptr)
v.SetArg(1, cv)
v.SetArg(2, mem)
v := a[i].store
if v == root {
v.Aux = load.Type // widen store type
+ v.Pos = pos
v.SetArg(0, ptr)
v.SetArg(1, load)
v.SetArg(2, mem)
v := a[i].store
if v == root {
v.Aux = sv.Type // widen store type
+ v.Pos = pos
v.SetArg(0, ptr)
v.SetArg(1, sv)
v.SetArg(2, mem)
}
return v <= 0xFFF
}
+
+// setPos sets the position of v to pos, then returns true.
+// Useful for setting the result of a rewrite's position to
+// something other than the default.
+func setPos(v *Value, pos src.XPos) bool {
+ v.Pos = pos
+ return true
+}
return true
}
// match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem))
- // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
+ // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)
// result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
for {
c := auxIntToValAndOff(v.AuxInt)
}
mem := x.Args[1]
p0 := x.Args[0]
- if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
+ if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpAMD64MOVOstoreconst)
return true
}
// match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem))
- // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)
+ // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)
// result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
for {
a := auxIntToValAndOff(v.AuxInt)
}
mem := x.Args[1]
p1 := x.Args[0]
- if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) {
+ if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpAMD64MOVOstoreconst)
b := v.Block
config := b.Func.Config
// match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem))
- // cond: x.Uses == 1 && clobber(x)
+ // cond: x.Uses == 1 && setPos(v, x.Pos) && clobber(x)
// result: (MOVQstorezero {s} [i] ptr mem)
for {
i := auxIntToInt32(v.AuxInt)
break
}
mem := x.Args[1]
- if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ if ptr != x.Args[0] || !(x.Uses == 1 && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpARM64MOVQstorezero)
return true
}
// match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem))
- // cond: x.Uses == 1 && clobber(x)
+ // cond: x.Uses == 1 && setPos(v, x.Pos) && clobber(x)
// result: (MOVQstorezero {s} [i-8] ptr mem)
for {
i := auxIntToInt32(v.AuxInt)
break
}
mem := x.Args[1]
- if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ if ptr != x.Args[0] || !(x.Uses == 1 && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpARM64MOVQstorezero)
return true
}
// match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
- // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)
+ // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)
// result: (STMG2 [i-8] {s} p w0 w1 mem)
for {
i := auxIntToInt32(v.AuxInt)
break
}
w0 := x.Args[1]
- if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) {
+ if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTMG2)
return true
}
// match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)
// result: (STMG3 [i-16] {s} p w0 w1 w2 mem)
for {
i := auxIntToInt32(v.AuxInt)
}
w0 := x.Args[1]
w1 := x.Args[2]
- if !(x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTMG3)
return true
}
// match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-24) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-24) && setPos(v, x.Pos) && clobber(x)
// result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
for {
i := auxIntToInt32(v.AuxInt)
w0 := x.Args[1]
w1 := x.Args[2]
w2 := x.Args[3]
- if !(x.Uses == 1 && is20Bit(int64(i)-24) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-24) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTMG4)
return true
}
// match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
- // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)
+ // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && setPos(v, x.Pos) && clobber(x)
// result: (STM2 [i-4] {s} p w0 w1 mem)
for {
i := auxIntToInt32(v.AuxInt)
break
}
w0 := x.Args[1]
- if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)) {
+ if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTM2)
return true
}
// match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)
// result: (STM3 [i-8] {s} p w0 w1 w2 mem)
for {
i := auxIntToInt32(v.AuxInt)
}
w0 := x.Args[1]
w1 := x.Args[2]
- if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTM3)
return true
}
// match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-12) && setPos(v, x.Pos) && clobber(x)
// result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
for {
i := auxIntToInt32(v.AuxInt)
w0 := x.Args[1]
w1 := x.Args[2]
w2 := x.Args[3]
- if !(x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-12) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTM4)
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)
// result: (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
for {
i := auxIntToInt32(v.AuxInt)
}
w0 := x.Args[1]
w1 := x.Args[2]
- if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTM4)
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
- // cond: x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)
+ // cond: x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)
// result: (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
for {
i := auxIntToInt32(v.AuxInt)
}
w0 := x.Args[1]
w1 := x.Args[2]
- if !(x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)) {
+ if !(x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)) {
break
}
v.reset(OpS390XSTMG4)
func zero_byte_8(b []byte) {
_ = b[7]
- b[0], b[1], b[2], b[3] = 0, 0, 0, 0
- b[4], b[5], b[6], b[7] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ b[4], b[5], b[6], b[7] = 0, 0, 0, 0
}
func zero_byte_16(b []byte) {
_ = b[15]
- b[0], b[1], b[2], b[3] = 0, 0, 0, 0
+ b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
b[4], b[5], b[6], b[7] = 0, 0, 0, 0
b[8], b[9], b[10], b[11] = 0, 0, 0, 0
- b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+ b[12], b[13], b[14], b[15] = 0, 0, 0, 0
}
func zero_byte_30(a *[30]byte) {
func zero_uint16_8(h []uint16) {
_ = h[7]
- h[0], h[1], h[2], h[3] = 0, 0, 0, 0
- h[4], h[5], h[6], h[7] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+ h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+ h[4], h[5], h[6], h[7] = 0, 0, 0, 0
}
func zero_uint32_2(w1, w2 []uint32) {
// arm64:"MOVW",-"MOVH"
q.a, q.b = a, b
}
+
+func wideStore(p *[8]uint64) {
+ if p == nil {
+ return
+ }
+
+ // amd64:"MOVUPS",-"MOVQ"
+ // arm64:"STP",-"MOVD"
+ p[0] = 0
+ // amd64:-"MOVUPS",-"MOVQ"
+ // arm64:-"STP",-"MOVD"
+ p[1] = 0
+}
+
+func wideStore2(p *[8]uint64, x, y uint64) {
+ if p == nil {
+ return
+ }
+
+ // s390x:"STMG"
+ p[0] = x
+ // s390x:-"STMG",-"MOVD"
+ p[1] = y
+}