From: Keith Randall Date: Mon, 21 Aug 2023 22:55:35 +0000 (-0700) Subject: cmd/compile: when combining stores, use line number of first store X-Git-Tag: go1.22rc1~612 X-Git-Url: http://www.git.cypherpunks.ru/?a=commitdiff_plain;h=657c885fb9278f03d5b18bfb7eeca98c25ef67ac;p=gostls13.git cmd/compile: when combining stores, use line number of first store var p *[2]uint32 = ... p[0] = 0 p[1] = 0 When we combine these two 32-bit stores into a single 64-bit store, use the line number of the first store, not the second one. This differs from the default behavior because usually with the combining that the compiler does, we use the line number of the last instruction in the combo (e.g. load+add, we use the line number of the add). This is the same behavior that gcc does in C (picking the line number of the first of a set of combined stores). Change-Id: Ie70bf6151755322d33ecd50e4d9caf62f7881784 Reviewed-on: https://go-review.googlesource.com/c/go/+/521678 TryBot-Result: Gopher Robot Run-TryBot: Keith Randall Reviewed-by: Keith Randall Reviewed-by: David Chase --- diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 7840600ef6..aac6873d28 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1468,6 +1468,7 @@ && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 + && setPos(v, x.Pos) && clobber(x) => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) @@ -1476,6 +1477,7 @@ && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 + && setPos(v, x.Pos) && clobber(x) => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 227ec5d610..c5ee0285d9 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -485,8 +485,8 @@ (STP [16] dst (Select0 (LDP [16] src mem)) (Select1 (LDP [16] src mem)) (STP dst (Select0 (LDP src mem)) (Select1 (LDP src mem)) mem)))) -(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i] ptr mem) -(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem) +(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i] ptr mem) +(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && setPos(v, x.Pos) && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem) // strip off fractional word move (Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 => diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules index a9d62c79ce..2a6d7e737c 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules @@ -1300,21 +1300,25 @@ && p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) + && setPos(v, x.Pos) && clobber(x) => (STM2 [i-4] {s} p w0 w1 mem) (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-8) + && setPos(v, x.Pos) && clobber(x) => (STM3 [i-8] {s} p w0 w1 w2 mem) (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem)) && x.Uses == 1 && is20Bit(int64(i)-12) + && setPos(v, x.Pos) && clobber(x) => (STM4 [i-12] {s} p w0 w1 w2 w3 mem) (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-8) + && setPos(v, x.Pos) && clobber(x) => (STM4 [i-8] {s} p w0 w1 w2 w3 mem) // 64-bit @@ -1322,21 +1326,25 @@ && p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) + && setPos(v, x.Pos) && clobber(x) => (STMG2 [i-8] {s} p w0 w1 mem) (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-16) + && setPos(v, x.Pos) && clobber(x) => (STMG3 [i-16] {s} p w0 w1 w2 mem) (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem)) && x.Uses == 1 && is20Bit(int64(i)-24) + && setPos(v, x.Pos) && clobber(x) => (STMG4 [i-24] {s} p w0 w1 w2 w3 mem) (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem)) && x.Uses == 1 && is20Bit(int64(i)-16) + && setPos(v, x.Pos) && clobber(x) => (STMG4 [i-16] {s} p w0 w1 w2 w3 mem) diff --git a/src/cmd/compile/internal/ssa/memcombine.go b/src/cmd/compile/internal/ssa/memcombine.go index 8e3db5a378..848b1e57a7 100644 --- a/src/cmd/compile/internal/ssa/memcombine.go +++ b/src/cmd/compile/internal/ssa/memcombine.go @@ -512,6 +512,8 @@ func combineStores(root *Value, n int64) bool { } // Before we sort, grab the memory arg the result should have. mem := a[n-1].store.Args[2] + // Also grab position of first store (last in array = first in memory order). + pos := a[n-1].store.Pos // Sort stores in increasing address order. sort.Slice(a, func(i, j int) bool { @@ -564,6 +566,7 @@ func combineStores(root *Value, n int64) bool { v := a[i].store if v == root { v.Aux = cv.Type // widen store type + v.Pos = pos v.SetArg(0, ptr) v.SetArg(1, cv) v.SetArg(2, mem) @@ -632,6 +635,7 @@ func combineStores(root *Value, n int64) bool { v := a[i].store if v == root { v.Aux = load.Type // widen store type + v.Pos = pos v.SetArg(0, ptr) v.SetArg(1, load) v.SetArg(2, mem) @@ -703,6 +707,7 @@ func combineStores(root *Value, n int64) bool { v := a[i].store if v == root { v.Aux = sv.Type // widen store type + v.Pos = pos v.SetArg(0, ptr) v.SetArg(1, sv) v.SetArg(2, mem) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index eebedea68c..c5bd7cf3a9 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -2146,3 +2146,11 @@ func isARM64addcon(v int64) bool { } return v <= 0xFFF } + +// setPos sets the position of v to pos, then returns true. +// Useful for setting the result of a rewrite's position to +// something other than the default. +func setPos(v *Value, pos src.XPos) bool { + v.Pos = pos + return true +} diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 5cf5425fdc..5332512f2a 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -12204,7 +12204,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { return true } // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) - // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) + // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { c := auxIntToValAndOff(v.AuxInt) @@ -12220,7 +12220,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { } mem := x.Args[1] p0 := x.Args[0] - if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) { + if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpAMD64MOVOstoreconst) @@ -12230,7 +12230,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { return true } // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) - // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) + // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { a := auxIntToValAndOff(v.AuxInt) @@ -12246,7 +12246,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { } mem := x.Args[1] p1 := x.Args[0] - if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) { + if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpAMD64MOVOstoreconst) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index d7752d3876..f0a4425502 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -9867,7 +9867,7 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool { b := v.Block config := b.Func.Config // match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) - // cond: x.Uses == 1 && clobber(x) + // cond: x.Uses == 1 && setPos(v, x.Pos) && clobber(x) // result: (MOVQstorezero {s} [i] ptr mem) for { i := auxIntToInt32(v.AuxInt) @@ -9878,7 +9878,7 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool { break } mem := x.Args[1] - if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) { + if ptr != x.Args[0] || !(x.Uses == 1 && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpARM64MOVQstorezero) @@ -9888,7 +9888,7 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool { return true } // match: (MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) - // cond: x.Uses == 1 && clobber(x) + // cond: x.Uses == 1 && setPos(v, x.Pos) && clobber(x) // result: (MOVQstorezero {s} [i-8] ptr mem) for { i := auxIntToInt32(v.AuxInt) @@ -9899,7 +9899,7 @@ func rewriteValueARM64_OpARM64MOVDstorezero(v *Value) bool { break } mem := x.Args[1] - if ptr != x.Args[0] || !(x.Uses == 1 && clobber(x)) { + if ptr != x.Args[0] || !(x.Uses == 1 && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpARM64MOVQstorezero) diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index a3d621898f..c2342c944d 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -9060,7 +9060,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { return true } // match: (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem)) - // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x) + // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) // result: (STMG2 [i-8] {s} p w0 w1 mem) for { i := auxIntToInt32(v.AuxInt) @@ -9076,7 +9076,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { break } w0 := x.Args[1] - if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) { + if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTMG2) @@ -9086,7 +9086,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { return true } // match: (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x) // result: (STMG3 [i-16] {s} p w0 w1 w2 mem) for { i := auxIntToInt32(v.AuxInt) @@ -9103,7 +9103,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { } w0 := x.Args[1] w1 := x.Args[2] - if !(x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTMG3) @@ -9113,7 +9113,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { return true } // match: (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-24) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-24) && setPos(v, x.Pos) && clobber(x) // result: (STMG4 [i-24] {s} p w0 w1 w2 w3 mem) for { i := auxIntToInt32(v.AuxInt) @@ -9131,7 +9131,7 @@ func rewriteValueS390X_OpS390XMOVDstore(v *Value) bool { w0 := x.Args[1] w1 := x.Args[2] w2 := x.Args[3] - if !(x.Uses == 1 && is20Bit(int64(i)-24) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-24) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTMG4) @@ -10595,7 +10595,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { return true } // match: (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem)) - // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x) + // cond: p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && setPos(v, x.Pos) && clobber(x) // result: (STM2 [i-4] {s} p w0 w1 mem) for { i := auxIntToInt32(v.AuxInt) @@ -10611,7 +10611,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { break } w0 := x.Args[1] - if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && clobber(x)) { + if !(p.Op != OpSB && x.Uses == 1 && is20Bit(int64(i)-4) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTM2) @@ -10621,7 +10621,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { return true } // match: (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) // result: (STM3 [i-8] {s} p w0 w1 w2 mem) for { i := auxIntToInt32(v.AuxInt) @@ -10638,7 +10638,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { } w0 := x.Args[1] w1 := x.Args[2] - if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTM3) @@ -10648,7 +10648,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { return true } // match: (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-12) && setPos(v, x.Pos) && clobber(x) // result: (STM4 [i-12] {s} p w0 w1 w2 w3 mem) for { i := auxIntToInt32(v.AuxInt) @@ -10666,7 +10666,7 @@ func rewriteValueS390X_OpS390XMOVWstore(v *Value) bool { w0 := x.Args[1] w1 := x.Args[2] w2 := x.Args[3] - if !(x.Uses == 1 && is20Bit(int64(i)-12) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-12) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTM4) @@ -13107,7 +13107,7 @@ func rewriteValueS390X_OpS390XSTM2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x) // result: (STM4 [i-8] {s} p w0 w1 w2 w3 mem) for { i := auxIntToInt32(v.AuxInt) @@ -13125,7 +13125,7 @@ func rewriteValueS390X_OpS390XSTM2(v *Value) bool { } w0 := x.Args[1] w1 := x.Args[2] - if !(x.Uses == 1 && is20Bit(int64(i)-8) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-8) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTM4) @@ -13162,7 +13162,7 @@ func rewriteValueS390X_OpS390XSTMG2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem)) - // cond: x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x) + // cond: x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x) // result: (STMG4 [i-16] {s} p w0 w1 w2 w3 mem) for { i := auxIntToInt32(v.AuxInt) @@ -13180,7 +13180,7 @@ func rewriteValueS390X_OpS390XSTMG2(v *Value) bool { } w0 := x.Args[1] w1 := x.Args[2] - if !(x.Uses == 1 && is20Bit(int64(i)-16) && clobber(x)) { + if !(x.Uses == 1 && is20Bit(int64(i)-16) && setPos(v, x.Pos) && clobber(x)) { break } v.reset(OpS390XSTMG4) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index adad9c613d..1b8abc348a 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -748,16 +748,16 @@ func zero_byte_4(b1, b2 []byte) { func zero_byte_8(b []byte) { _ = b[7] - b[0], b[1], b[2], b[3] = 0, 0, 0, 0 - b[4], b[5], b[6], b[7] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + b[4], b[5], b[6], b[7] = 0, 0, 0, 0 } func zero_byte_16(b []byte) { _ = b[15] - b[0], b[1], b[2], b[3] = 0, 0, 0, 0 + b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" b[4], b[5], b[6], b[7] = 0, 0, 0, 0 b[8], b[9], b[10], b[11] = 0, 0, 0, 0 - b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" + b[12], b[13], b[14], b[15] = 0, 0, 0, 0 } func zero_byte_30(a *[30]byte) { @@ -809,8 +809,8 @@ func zero_uint16_4(h1, h2 []uint16) { func zero_uint16_8(h []uint16) { _ = h[7] - h[0], h[1], h[2], h[3] = 0, 0, 0, 0 - h[4], h[5], h[6], h[7] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH" + h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH" + h[4], h[5], h[6], h[7] = 0, 0, 0, 0 } func zero_uint32_2(w1, w2 []uint32) { @@ -858,3 +858,27 @@ func loadstore2(p, q *S1) { // arm64:"MOVW",-"MOVH" q.a, q.b = a, b } + +func wideStore(p *[8]uint64) { + if p == nil { + return + } + + // amd64:"MOVUPS",-"MOVQ" + // arm64:"STP",-"MOVD" + p[0] = 0 + // amd64:-"MOVUPS",-"MOVQ" + // arm64:-"STP",-"MOVD" + p[1] = 0 +} + +func wideStore2(p *[8]uint64, x, y uint64) { + if p == nil { + return + } + + // s390x:"STMG" + p[0] = x + // s390x:-"STMG",-"MOVD" + p[1] = y +}