Add SSA rules to intrinsify Mul/Mul64 on ppc64x.
benchmark old ns/op new ns/op delta
BenchmarkMul-40 8.80 0.93 -89.43%
BenchmarkMul32-40 1.39 1.39 +0.00%
BenchmarkMul64-40 5.39 0.93 -82.75%
Updates #24813
Change-Id: I6e95bfbe976a2278bd17799df184a7fbc0e57829
Reviewed-on: https://go-review.googlesource.com/138917
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
addF("math/bits", "OnesCount",
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
sys.AMD64)
- alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64)
+ alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
addF("math/bits", "Mul64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
},
- sys.AMD64, sys.ARM64)
+ sys.AMD64, sys.ARM64, sys.PPC64)
/******** sync/atomic ********/
p.To.Reg = y
}
+ case ssa.OpPPC64LoweredMuluhilo:
+ // MULHDU Rarg1, Rarg0, Reg0
+ // MULLD Rarg1, Rarg0, Reg1
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ p := s.Prog(ppc64.AMULHDU)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r1
+ p.Reg = r0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg0()
+ p1 := s.Prog(ppc64.AMULLD)
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = r1
+ p1.Reg = r0
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = v.Reg1()
+
case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicOr8:
// LWSYNC
(Mul64 x y) -> (MULLD x y)
(Mul(32|16|8) x y) -> (MULLW x y)
+(Mul64uhilo x y) -> (LoweredMuluhilo x y)
(Div64 x y) -> (DIVD x y)
(Div64u x y) -> (DIVDU x y)
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
+ gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
+ {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
OpPPC64MULHW
OpPPC64MULHDU
OpPPC64MULHWU
+ OpPPC64LoweredMuluhilo
OpPPC64FMUL
OpPPC64FMULS
OpPPC64FMADD
},
},
},
+ {
+ name: "LoweredMuluhilo",
+ argLen: 2,
+ resultNotInArgs: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "FMUL",
argLen: 2,
return rewriteValuePPC64_OpMul64_0(v)
case OpMul64F:
return rewriteValuePPC64_OpMul64F_0(v)
+ case OpMul64uhilo:
+ return rewriteValuePPC64_OpMul64uhilo_0(v)
case OpMul8:
return rewriteValuePPC64_OpMul8_0(v)
case OpNeg16:
return true
}
}
+func rewriteValuePPC64_OpMul64uhilo_0(v *Value) bool {
+ // match: (Mul64uhilo x y)
+ // cond:
+ // result: (LoweredMuluhilo x y)
+ for {
+ _ = v.Args[1]
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpPPC64LoweredMuluhilo)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+}
func rewriteValuePPC64_OpMul8_0(v *Value) bool {
// match: (Mul8 x y)
// cond:
func Mul(x, y uint) (hi, lo uint) {
// amd64:"MULQ"
// arm64:"UMULH","MUL"
+ // ppc64: "MULHDU", "MULLD"
return bits.Mul(x, y)
}
func Mul64(x, y uint64) (hi, lo uint64) {
// amd64:"MULQ"
// arm64:"UMULH","MUL"
+ // ppc64: "MULHDU", "MULLD"
return bits.Mul64(x, y)
}