]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: improve masking codegen on PPC64
authorPaul E. Murphy <murp@ibm.com>
Tue, 27 Jun 2023 22:17:33 +0000 (17:17 -0500)
committerPaul Murphy <murp@ibm.com>
Wed, 6 Sep 2023 16:34:20 +0000 (16:34 +0000)
Generate RLDIC[LR] instead of MOVD mask, Rx; AND Rx, Ry, Rz.
This helps reduce code size, and reduces the latency caused
by the constant load.

Similarly, for smaller-than-register values, truncate constants
which exceed the range of the value's type to avoid needing to
load a constant.

Change-Id: I6019684795eb8962d4fd6d9585d08b17c15e7d64
Reviewed-on: https://go-review.googlesource.com/c/go/+/515576
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewritePPC64latelower.go
test/codegen/bits.go

index 00d898f783ced34c25cf834cc310eb34969a06e5..c4af55c328dc8a6e52990cad2a2fc43189109dda 100644 (file)
 (SETBCR [0] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [4] (MOVDconst [1]) cmp)
 (SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp)
 (SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp)
+
+// The upper bits of the smaller than register values is undefined. Take advantage of that.
+(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (Select0 (ANDCCconst [int64(int16(m))] n))
+
+// Convert simple bit masks to an equivalent rldic[lr] if possible.
+(AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n)
+(AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n)
+
+// If the RLDICL does not rotate its value, a shifted value can be merged.
+(RLDICL [em] x:(SRDconst [s] a)) && (em&0xFF0000) == 0 => (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
+
+// Convert rotated 32 bit masks on 32 bit values into rlwinm. In general, this leaves the upper 32 bits in an undefined state.
+(AND <t> x:(MOVDconst [m]) n) && t.Size() == 4 && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(0,m,32)] n)
index 6ee661dbbd1ed0ce1f33a9ec2cff74e70e34744d..efbaae4d465e55fec7d9f7844fb1aef096d1810d 100644 (file)
@@ -1499,6 +1499,25 @@ func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
        return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
 }
 
+// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
+// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
+// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
+// operations can be combined. This functions assumes the two opcodes can
+// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
+func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
+       mb := s
+       r := 64 - s
+       // A larger mb is a smaller mask.
+       if (encoded>>8)&0xFF < mb {
+               encoded = (encoded &^ 0xFF00) | mb<<8
+       }
+       // The rotate is expected to be 0.
+       if (encoded & 0xFF0000) != 0 {
+               panic("non-zero rotate")
+       }
+       return encoded | r<<16
+}
+
 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
index 56acbe403bc2fa09e19230a595a7c6f2a73e31ee..28e124d9e118c00679451482641da8cae3547422 100644 (file)
@@ -3,11 +3,16 @@
 package ssa
 
 import "internal/buildcfg"
+import "cmd/compile/internal/types"
 
 func rewriteValuePPC64latelower(v *Value) bool {
        switch v.Op {
+       case OpPPC64AND:
+               return rewriteValuePPC64latelower_OpPPC64AND(v)
        case OpPPC64ISEL:
                return rewriteValuePPC64latelower_OpPPC64ISEL(v)
+       case OpPPC64RLDICL:
+               return rewriteValuePPC64latelower_OpPPC64RLDICL(v)
        case OpPPC64SETBC:
                return rewriteValuePPC64latelower_OpPPC64SETBC(v)
        case OpPPC64SETBCR:
@@ -15,6 +20,101 @@ func rewriteValuePPC64latelower(v *Value) bool {
        }
        return false
 }
+func rewriteValuePPC64latelower_OpPPC64AND(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (AND <t> x:(MOVDconst [m]) n)
+       // cond: t.Size() <= 2
+       // result: (Select0 (ANDCCconst [int64(int16(m))] n))
+       for {
+               t := v.Type
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if x.Op != OpPPC64MOVDconst {
+                               continue
+                       }
+                       m := auxIntToInt64(x.AuxInt)
+                       n := v_1
+                       if !(t.Size() <= 2) {
+                               continue
+                       }
+                       v.reset(OpSelect0)
+                       v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+                       v0.AuxInt = int64ToAuxInt(int64(int16(m)))
+                       v0.AddArg(n)
+                       v.AddArg(v0)
+                       return true
+               }
+               break
+       }
+       // match: (AND x:(MOVDconst [m]) n)
+       // cond: isPPC64ValidShiftMask(m)
+       // result: (RLDICL [encodePPC64RotateMask(0,m,64)] n)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if x.Op != OpPPC64MOVDconst {
+                               continue
+                       }
+                       m := auxIntToInt64(x.AuxInt)
+                       n := v_1
+                       if !(isPPC64ValidShiftMask(m)) {
+                               continue
+                       }
+                       v.reset(OpPPC64RLDICL)
+                       v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
+                       v.AddArg(n)
+                       return true
+               }
+               break
+       }
+       // match: (AND x:(MOVDconst [m]) n)
+       // cond: m != 0 && isPPC64ValidShiftMask(^m)
+       // result: (RLDICR [encodePPC64RotateMask(0,m,64)] n)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if x.Op != OpPPC64MOVDconst {
+                               continue
+                       }
+                       m := auxIntToInt64(x.AuxInt)
+                       n := v_1
+                       if !(m != 0 && isPPC64ValidShiftMask(^m)) {
+                               continue
+                       }
+                       v.reset(OpPPC64RLDICR)
+                       v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 64))
+                       v.AddArg(n)
+                       return true
+               }
+               break
+       }
+       // match: (AND <t> x:(MOVDconst [m]) n)
+       // cond: t.Size() == 4 && isPPC64WordRotateMask(m)
+       // result: (RLWINM [encodePPC64RotateMask(0,m,32)] n)
+       for {
+               t := v.Type
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if x.Op != OpPPC64MOVDconst {
+                               continue
+                       }
+                       m := auxIntToInt64(x.AuxInt)
+                       n := v_1
+                       if !(t.Size() == 4 && isPPC64WordRotateMask(m)) {
+                               continue
+                       }
+                       v.reset(OpPPC64RLWINM)
+                       v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
+                       v.AddArg(n)
+                       return true
+               }
+               break
+       }
+       return false
+}
 func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -49,6 +149,29 @@ func rewriteValuePPC64latelower_OpPPC64ISEL(v *Value) bool {
        }
        return false
 }
+func rewriteValuePPC64latelower_OpPPC64RLDICL(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RLDICL [em] x:(SRDconst [s] a))
+       // cond: (em&0xFF0000)==0
+       // result: (RLDICL [mergePPC64RLDICLandSRDconst(em, s)] a)
+       for {
+               em := auxIntToInt64(v.AuxInt)
+               x := v_0
+               if x.Op != OpPPC64SRDconst {
+                       break
+               }
+               s := auxIntToInt64(x.AuxInt)
+               a := x.Args[0]
+               if !((em & 0xFF0000) == 0) {
+                       break
+               }
+               v.reset(OpPPC64RLDICL)
+               v.AuxInt = int64ToAuxInt(mergePPC64RLDICLandSRDconst(em, s))
+               v.AddArg(a)
+               return true
+       }
+       return false
+}
 func rewriteValuePPC64latelower_OpPPC64SETBC(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
index 88d5ebe9cf0949b0e9eae80323bbd934b5dacfaa..67daf12d6237bb372771b2fcc5474fb914eac782 100644 (file)
@@ -394,3 +394,29 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
        return
 
 }
+
+// Verify rotate and mask instructions, and further simplified instructions for small types
+func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+       // ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
+       io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
+       // ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
+       io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
+       // ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
+       io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
+       // ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
+       io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
+
+       // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
+       io32[0] = io32[0] & 0x0FFFF000
+       // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
+       io32[1] = io32[1] & 0xF0000FFF
+       // ppc64x: -"RLWNM", MOVD, AND
+       io32[2] = io32[2] & 0xFFFF0002
+
+       var bigc uint32 = 0x12345678
+       // ppc64x: "ANDCC\t[$]22136"
+       io16[0] = io16[0] & uint16(bigc)
+
+       // ppc64x: "ANDCC\t[$]120"
+       io8[0] = io8[0] & uint8(bigc)
+}