Generate RLDIC[LR] instead of MOVD mask, Rx; AND Rx, Ry, Rz.
This helps reduce code size, and reduces the latency caused
by the constant load.
Similarly, for smaller-than-register values, truncate constants
which exceed the range of the value's type to avoid needing to
load a constant.
Change-Id: I6019684795eb8962d4fd6d9585d08b17c15e7d64
Reviewed-on: https://go-review.googlesource.com/c/go/+/515576
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
(SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp)
(SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp)
(SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp)
+
+// The upper bits of the smaller than register values is undefined. Take advantage of that.
+(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n))
+
+// Convert simple bit masks to an equivalent rldic[lr] if possible.
+(AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n)
+(AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n)
+
+// If the RLDICL does not rotate its value, a shifted value can be merged.
+(RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
+
+// Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state.
+(AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n)
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
}
+// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
+// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
+// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
+// operations can be combined. This functions assumes the two opcodes can
+// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
+func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
+ mb := s
+ r := 64 - s
+ // A larger mb is a smaller mask.
+ if (encoded>>8)&0xFF < mb {
+ encoded = (encoded &^ 0xFF00) | mb<<8
+ }
+ // The rotate is expected to be 0.
+ if (encoded & 0xFF0000) != 0 {
+ panic("non-zero rotate")
+ }
+ return encoded | r<<16
+}
+
// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
package ssa
import "internal/buildcfg"
+import "cmd/compile/internal/types"
func rewriteValuePPC64latelower(v *Value) bool {
switch v.Op {
+ case OpPPC64AND:
+ return rewriteValuePPC64latelower_OpPPC64AND(v)
case OpPPC64ISEL:
return rewriteValuePPC64latelower_OpPPC64ISEL(v)
+ case OpPPC64RLDICL:
+ return rewriteValuePPC64latelower_OpPPC64RLDICL(v)
case OpPPC64SETBC:
return rewriteValuePPC64latelower_OpPPC64SETBC(v)
case OpPPC64SETBCR:
}
return false
}
+func rewriteValuePPC64latelower_OpPPC64AND(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AND <t> x:(MOVDconst [m]) n)
+ // cond: t.Size() <= 2
+ // result: (Select0 (ANDCCconst [int64(int16(m))] n))
+ for {
+ t := v.Type
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x := v_0
+ if x.Op != OpPPC64MOVDconst {
+ continue
+ }
+ m := auxIntToInt64(x.AuxInt)
+ n := v_1
+ if !(t.Size() <= 2) {
+ continue
+ }
+ v.reset(OpSelect0)
+ v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+ v0.AuxInt = int64ToAuxInt(int64(int16(m)))
+ v0.AddArg(n)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (AND x:(MOVDconst [m]) n)
+ // cond: isPPC64ValidShiftMask(m)
+ // result: (RLDICL [encodePPC64RotateMask(0,m,64)] n)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x := v_0
+ if x.Op != OpPPC64MOVDconst {
+ continue
+ }
+ m := auxIntToInt64(x.AuxInt)
+ n := v_1
+ if !(isPPC64ValidShiftMask(m)) {
+ continue
+ }
+ v.reset(OpPPC64RLDICL)
+ v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
+ v.AddArg(n)
+ return true
+ }
+ break
+ }
+ // match: (AND x:(MOVDconst [m]) n)
+ // cond: m != 0 && isPPC64ValidShiftMask(^m)
+ // result: (RLDICR [encodePPC64RotateMask(0,m,64)] n)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x := v_0
+ if x.Op != OpPPC64MOVDconst {
+ continue
+ }
+ m := auxIntToInt64(x.AuxInt)
+ n := v_1
+ if !(m != 0 && isPPC64ValidShiftMask(^m)) {
+ continue
+ }
+ v.reset(OpPPC64RLDICR)
+ v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
+ v.AddArg(n)
+ return true
+ }
+ break
+ }
+ // match: (AND <t> x:(MOVDconst [m]) n)
+ // cond: t.Size() == 4 && isPPC64WordRotateMask(m)
+ // result: (RLWINM [encodePPC64RotateMask(0,m,32)] n)
+ for {
+ t := v.Type
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x := v_0
+ if x.Op != OpPPC64MOVDconst {
+ continue
+ }
+ m := auxIntToInt64(x.AuxInt)
+ n := v_1
+ if !(t.Size() == 4 && isPPC64WordRotateMask(m)) {
+ continue
+ }
+ v.reset(OpPPC64RLWINM)
+ v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
+ v.AddArg(n)
+ return true
+ }
+ break
+ }
+ return false
+}
func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
}
return false
}
+func rewriteValuePPC64latelower_OpPPC64RLDICL(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (RLDICL [em] x:(SRDconst [s] a))
+ // cond: (em&0xFF0000)==0
+ // result: (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
+ for {
+ em := auxIntToInt64(v.AuxInt)
+ x := v_0
+ if x.Op != OpPPC64SRDconst {
+ break
+ }
+ s := auxIntToInt64(x.AuxInt)
+ a := x.Args[0]
+ if !((em & 0xFF0000) == 0) {
+ break
+ }
+ v.reset(OpPPC64RLDICL)
+ v.AuxInt = int64ToAuxInt(mergePPC64RLDICLandSRDconst(em, s))
+ v.AddArg(a)
+ return true
+ }
+ return false
+}
func rewriteValuePPC64latelower_OpPPC64SETBC(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
return
}
+
+// Verify rotate and mask instructions, and further simplified instructions for small types
+func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+ // ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
+ io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
+ // ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
+ io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
+ // ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
+ io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
+ // ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
+ io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
+
+ // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
+ io32[0] = io32[0] & 0x0FFFF000
+ // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
+ io32[1] = io32[1] & 0xF0000FFF
+ // ppc64x: -"RLWNM", MOVD, AND
+ io32[2] = io32[2] & 0xFFFF0002
+
+ var bigc uint32 = 0x12345678
+ // ppc64x: "ANDCC\t[$]22136"
+ io16[0] = io16[0] & uint16(bigc)
+
+ // ppc64x: "ANDCC\t[$]120"
+ io8[0] = io8[0] & uint8(bigc)
+}