]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: add prefetch intrinsic support on PPC64
authorArchana R <aravind5@in.ibm.com>
Mon, 4 Oct 2021 09:16:50 +0000 (04:16 -0500)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Tue, 5 Oct 2021 21:25:51 +0000 (21:25 +0000)
This CL enables intrinsic support to emit the following prefetch
instructions for PPC64 platform that are already emitted on other
platforms
1. Prefetch - prefetches data from memory address to cache;
2. PrefetchStreamed - prefetches data from memory address, with a
hint that this data is being streamed.

Benchmarks picked from go/test/bench/garbage
Parameters tested with:
GOMAXPROCS=8
tree2 -heapsize=1000000000 -cpus=8
tree -n=18
parser
peano

Performance results with this change on POWER9

name                 old time/op  new time/op  delta
Tree2-8              75.3ms ± 2%  65.0ms ± 6%  -13.61%  (p=0.003 n=5+7)
Tree-8               576ms ± 2%   576ms ± 1%   ~     (p=0.756 n=11+10)
Parser-8             3.60s ± 2%   3.59s ± 1%   ~     (p=0.818 n=6+6)
Peano-8              84.8ms ± 1%  84.6ms ± 1%   ~     (p=0.180 n=6+6)

Results on POWER8 and POWER10 are similar

Change-Id: If4ac95a85aaa7b2266014e1f8fb7cd7440cbf906
Reviewed-on: https://go-review.googlesource.com/c/go/+/353730
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Trust: Michael Knyszek <mknyszek@google.com>

src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssagen/ssa.go

index c0f58e60b2c95c24fc0828a0176f545bf8f3d3b3..98316c16fab7f9b6756fa7c0cadd6f1e891c3af3 100644 (file)
@@ -901,6 +901,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
 
+       case ssa.OpPPC64DCBT:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_CONST
+               p.To.Offset = v.AuxInt
+
        case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
index 0393feb125662c93b82f676ee451a0c1aaff55df..4c766df4b3ade3f01e3d10b44d82312b850a9488 100644 (file)
         && clobber(call)
         => (Move [sz] dst src mem)
 
+// Prefetch instructions (aux is option: 0 - DCBT ; 8 - DCBT stream)
+(PrefetchCache ptr mem)          => (DCBT ptr mem [0])
+(PrefetchCacheStreamed ptr mem)  => (DCBT ptr mem [8])
+
index 5f84290002cc2bd3bb6b79bacd668d49e24f272e..ff9ce64e187edd0bee17e682dadf8126222a436a 100644 (file)
@@ -149,6 +149,7 @@ func init() {
                crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
                gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
                gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
+               prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
                gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
                gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
                gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
@@ -336,6 +337,10 @@ func init() {
                {name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
                {name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
 
+               // Prefetch instruction
+               // Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
+               {name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
+
                // Store bytes in the reverse endian order of the arch into arg0.
                // These are indexed stores with no offset field in the instruction so the auxint fields are not used.
                {name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
index 6266092f6f16ef662e3f589c6b7e45bccbef78df..09006c8c853549d321841cd508c631a2f9dc22a8 100644 (file)
@@ -1994,6 +1994,7 @@ const (
        OpPPC64MOVDBRloadidx
        OpPPC64FMOVDloadidx
        OpPPC64FMOVSloadidx
+       OpPPC64DCBT
        OpPPC64MOVDBRstore
        OpPPC64MOVWBRstore
        OpPPC64MOVHBRstore
@@ -26715,6 +26716,18 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "DCBT",
+               auxType:        auxInt64,
+               argLen:         2,
+               hasSideEffects: true,
+               asm:            ppc64.ADCBT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:           "MOVDBRstore",
                auxType:        auxSym,
index 450ea861f34450e990378501000a3585ccbf4208..b278a4cb4405dfff21e06a6325d71326fa6169a2 100644 (file)
@@ -639,6 +639,10 @@ func rewriteValuePPC64(v *Value) bool {
                return true
        case OpPopCount8:
                return rewriteValuePPC64_OpPopCount8(v)
+       case OpPrefetchCache:
+               return rewriteValuePPC64_OpPrefetchCache(v)
+       case OpPrefetchCacheStreamed:
+               return rewriteValuePPC64_OpPrefetchCacheStreamed(v)
        case OpRotateLeft16:
                return rewriteValuePPC64_OpRotateLeft16(v)
        case OpRotateLeft32:
@@ -14005,6 +14009,34 @@ func rewriteValuePPC64_OpPopCount8(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpPrefetchCache(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (PrefetchCache ptr mem)
+       // result: (DCBT ptr mem [0])
+       for {
+               ptr := v_0
+               mem := v_1
+               v.reset(OpPPC64DCBT)
+               v.AuxInt = int64ToAuxInt(0)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+}
+func rewriteValuePPC64_OpPrefetchCacheStreamed(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (PrefetchCacheStreamed ptr mem)
+       // result: (DCBT ptr mem [8])
+       for {
+               ptr := v_0
+               mem := v_1
+               v.reset(OpPPC64DCBT)
+               v.AuxInt = int64ToAuxInt(8)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+}
 func rewriteValuePPC64_OpRotateLeft16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 2d8e21ee054447027287f0ba4f3a0457e4d14e0c..08114b78282d0a09f3131e164afed7a1eba903a0 100644 (file)
@@ -3897,9 +3897,9 @@ func InitTables() {
        // Make Prefetch intrinsics for supported platforms
        // On the unsupported platforms stub function will be eliminated
        addF("runtime/internal/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache),
-               sys.AMD64, sys.ARM64)
+               sys.AMD64, sys.ARM64, sys.PPC64)
        addF("runtime/internal/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed),
-               sys.AMD64, sys.ARM64)
+               sys.AMD64, sys.ARM64, sys.PPC64)
 
        /******** runtime/internal/atomic ********/
        addF("runtime/internal/atomic", "Load",