]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: combine multiply/add into maddld on ppc64le/power9
authorPaul E. Murphy <murp@ibm.com>
Mon, 17 Aug 2020 21:14:48 +0000 (16:14 -0500)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Tue, 18 Aug 2020 21:09:30 +0000 (21:09 +0000)
Add a new lowering rule to match and replace such instances
with the MADDLD instruction available on power9 where
possible.

Likewise, this plumbs in a new ppc64 ssa opcode to house
the newly generated MADDLD instructions.

When testing ed25519, this reduced binary size by 936B.
Similarly, MADDLD combination occcurs in a few other less
obvious cases such as division by constant.

Testing of golang.org/x/crypto/ed25519 shows non-trivial
speedup during keygeneration:

name           old time/op  new time/op  delta
KeyGeneration  65.2µs ± 0%  63.1µs ± 0%  -3.19%
Signing        64.3µs ± 0%  64.4µs ± 0%  +0.16%
Verification    147µs ± 0%   147µs ± 0%  +0.11%

Similarly, this test binary has shrunk by 66488B.

Change-Id: I077aeda7943119b41f07e4e62e44a648f16e4ad0
Reviewed-on: https://go-review.googlesource.com/c/go/+/248723
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
test/codegen/arithmetic.go

index 0efdd710fb2cab1ee343b92fdd95b2286ba772e8..4d2ad48135f6654d4600f5254967ee8cd287a58d 100644 (file)
@@ -601,6 +601,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
 
+       case ssa.OpPPC64MADDLD:
+               r := v.Reg()
+               r1 := v.Args[0].Reg()
+               r2 := v.Args[1].Reg()
+               r3 := v.Args[2].Reg()
+               // r = r1*r2 ± r3
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.Reg = r2
+               p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+
        case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
                r := v.Reg()
                r1 := v.Args[0].Reg()
index fd28e10098b1ec8b6b44c2531896696a3202ed12..14942d50f96579697c6aa7dbef6d7e2b48de0eac 100644 (file)
@@ -11,6 +11,9 @@
 (Sub32F ...) => (FSUBS ...)
 (Sub64F ...) => (FSUB ...)
 
+// Combine 64 bit integer multiply and adds
+(ADD l:(MULLD x y) z) && objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
+
 (Mod16 x y) => (Mod32 (SignExt16to32 x) (SignExt16to32 y))
 (Mod16u x y) => (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Mod8 x y) => (Mod32 (SignExt8to32 x) (SignExt8to32 y))
index 0261dc283b970743a2d460dccbc2f4322c08e9e4..825d0faf3434a2c720d2ebd0e358b18328dbb6dd 100644 (file)
@@ -137,6 +137,7 @@ func init() {
                gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
                gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
                gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
+               gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
                gp22        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
                gp32        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
                gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
@@ -179,6 +180,7 @@ func init() {
 
                {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
                {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
+               {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
 
                {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
                {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
index df2a27368bb438dc72c90e73395e2fe9ee0b651a..4cd72799e85292b2fe3c23d899f1b60f7f1eaed6 100644 (file)
@@ -1832,6 +1832,7 @@ const (
        OpPPC64FSUBS
        OpPPC64MULLD
        OpPPC64MULLW
+       OpPPC64MADDLD
        OpPPC64MULHD
        OpPPC64MULHW
        OpPPC64MULHDU
@@ -24374,6 +24375,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "MADDLD",
+               argLen: 3,
+               asm:    ppc64.AMADDLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {2, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:        "MULHD",
                argLen:      2,
index 37b75cc58a03b2aadb3208aa9b7516efa08c3368..7704b80dc6d06f16cffee006c8219abf1f74cbfa 100644 (file)
@@ -3852,6 +3852,27 @@ func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
+       // match: (ADD l:(MULLD x y) z)
+       // cond: objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l)
+       // result: (MADDLD x y z)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       l := v_0
+                       if l.Op != OpPPC64MULLD {
+                               continue
+                       }
+                       y := l.Args[1]
+                       x := l.Args[0]
+                       z := v_1
+                       if !(objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l)) {
+                               continue
+                       }
+                       v.reset(OpPPC64MADDLD)
+                       v.AddArg3(x, y, z)
+                       return true
+               }
+               break
+       }
        // match: (ADD (SLDconst x [c]) (SRDconst x [d]))
        // cond: d == 64-c
        // result: (ROTLconst [c] x)
index 9f30ec8ce42c2956931b4b4b7d51d913571faf41..45fdb68903fb016a8b9af7b4b3780df9f0011a34 100644 (file)
@@ -253,16 +253,20 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
        // 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
        // arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
        // arm:"MUL","ADD\t[$]715827882",-".*udiv"
-       // ppc64:"MULLD","ADD","ROTL\t[$]63"
-       // ppc64le:"MULLD","ADD","ROTL\t[$]63"
+       // ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
+       // ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
+       // ppc64/power9:"MADDLD","ROTL\t[$]63"
+       // ppc64le/power9:"MADDLD","ROTL\t[$]63"
        evenS := n2%6 == 0
 
        // amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
        // 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
        // arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
        // arm:"MUL","ADD\t[$]113025455",-".*udiv"
-       // ppc64:"MULLD","ADD",-"ROTL"
-       // ppc64le:"MULLD","ADD",-"ROTL"
+       // ppc64/power8:"MULLD","ADD",-"ROTL"
+       // ppc64/power9:"MADDLD",-"ROTL"
+       // ppc64le/power8:"MULLD","ADD",-"ROTL"
+       // ppc64le/power9:"MADDLD",-"ROTL"
        oddS := n2%19 == 0
 
        return evenU, oddU, evenS, oddS