]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile,cmd/internal/obj/ppc64: use mod instructions on power9
authorLynn Boger <laboger@linux.vnet.ibm.com>
Thu, 23 Apr 2020 19:36:28 +0000 (15:36 -0400)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Wed, 29 Apr 2020 14:45:56 +0000 (14:45 +0000)
This updates the PPC64.rules file to use the MOD instructions
that are available in power9. Prior to power9 this is done
using a longer sequence with multiply and divide.

Included in this change is removal of the REM* opcode variations
that set the CC or OV bits since their settings are based
on the DIV and are not appropriate for the REM.

Change-Id: Iceed9ce33e128e1911c15592ee674276ce8ba3fa
Reviewed-on: https://go-review.googlesource.com/c/go/+/229761
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/ppc64enc.s
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/internal/obj/ppc64/a.out.go
src/cmd/internal/obj/ppc64/anames.go
src/cmd/internal/obj/ppc64/asm9.go

index 0fa26d14cc97e4e1999c6dd587284421d17d05d0..35464f37fdebdc90eff5b6669ae3bbe0d6bf51d6 100644 (file)
@@ -236,6 +236,11 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        DIVDEU R3, R4, R5               // 7ca41b12
        DIVDEUCC R3, R4, R5             // 7ca41b13
 
+       REM R3, R4, R5                  // 7fe41bd67fff19d67cbf2050
+       REMU R3, R4, R5                 // 7fe41b967fff19d67bff00287cbf2050
+       REMD R3, R4, R5                 // 7fe41bd27fff19d27cbf2050
+       REMDU R3, R4, R5                // 7fe41b927fff19d27cbf2050
+
        MODUD R3, R4, R5                // 7ca41a12
        MODUW R3, R4, R5                // 7ca41a16
        MODSD R3, R4, R5                // 7ca41e12
index efb940b7d9a5d47ba3e803fe641a50db30a39f0e..0efdd710fb2cab1ee343b92fdd95b2286ba772e8 100644 (file)
@@ -571,7 +571,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
                ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
                ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
-               ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
+               ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
+               ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
                r := v.Reg()
                r1 := v.Args[0].Reg()
                r2 := v.Args[1].Reg()
index e59bd3f90f810d9c0047dd62cb12538eb16b3ba8..d8041e810fb7492ca23eb6e19599f131e2ce61c7 100644 (file)
 (Mod16u x y) => (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Mod8 x y) => (Mod32 (SignExt8to32 x) (SignExt8to32 y))
 (Mod8u x y) => (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y))
-(Mod64 x y) => (SUB x (MULLD y (DIVD x y)))
-(Mod64u x y) => (SUB x (MULLD y (DIVDU x y)))
-(Mod32 x y) => (SUB x (MULLW y (DIVW x y)))
-(Mod32u x y) => (SUB x (MULLW y (DIVWU x y)))
+(Mod64 x y) && objabi.GOPPC64 >=9 => (MODSD x y)
+(Mod64 x y) && objabi.GOPPC64 <=8 => (SUB x (MULLD y (DIVD x y)))
+(Mod64u x y) && objabi.GOPPC64 >= 9 => (MODUD x y)
+(Mod64u x y) && objabi.GOPPC64 <= 8 => (SUB x (MULLD y (DIVDU x y)))
+(Mod32 x y) && objabi.GOPPC64 >= 9 => (MODSW x y)
+(Mod32 x y) && objabi.GOPPC64 <= 8 => (SUB x (MULLW y (DIVW x y)))
+(Mod32u x y) && objabi.GOPPC64 >= 9 => (MODUW x y)
+(Mod32u x y) && objabi.GOPPC64 <= 8 => (SUB x (MULLW y (DIVWU x y)))
 
 // (x + y) / 2 with x>=y => (x - y) / 2 + y
 (Avg64u <t> x y) => (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
index 0199c8f7137d0da04bd1639252f826d96affcf3b..63e0b9366718f851060c86f37441aa94fb4e9dfa 100644 (file)
@@ -236,6 +236,10 @@ func init() {
                {name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
                {name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
 
+               {name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
+               {name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
+               {name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
+               {name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
                // MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
 
                // Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
index d708c8480fd2bd50467d7175c4f6218fa60bd62a..981be13200c7bf7fcbcb76f51da7b01446df4322 100644 (file)
@@ -1777,6 +1777,10 @@ const (
        OpPPC64DIVW
        OpPPC64DIVDU
        OpPPC64DIVWU
+       OpPPC64MODUD
+       OpPPC64MODSD
+       OpPPC64MODUW
+       OpPPC64MODSW
        OpPPC64FCTIDZ
        OpPPC64FCTIWZ
        OpPPC64FCFID
@@ -23379,6 +23383,62 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "MODUD",
+               argLen: 2,
+               asm:    ppc64.AMODUD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "MODSD",
+               argLen: 2,
+               asm:    ppc64.AMODSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "MODUW",
+               argLen: 2,
+               asm:    ppc64.AMODUW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:   "MODSW",
+               argLen: 2,
+               asm:    ppc64.AMODSW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:   "FCTIDZ",
                argLen: 1,
index 13c188e78d4315785e9aaa78b6045eb48f5eab50..0b798c6a722ddcad27b74a463274b9592e687837 100644 (file)
@@ -3231,10 +3231,27 @@ func rewriteValuePPC64_OpMod32(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Mod32 x y)
+       // cond: objabi.GOPPC64 >= 9
+       // result: (MODSW x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(objabi.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64MODSW)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Mod32 x y)
+       // cond: objabi.GOPPC64 <= 8
        // result: (SUB x (MULLW y (DIVW x y)))
        for {
                x := v_0
                y := v_1
+               if !(objabi.GOPPC64 <= 8) {
+                       break
+               }
                v.reset(OpPPC64SUB)
                v0 := b.NewValue0(v.Pos, OpPPC64MULLW, typ.Int32)
                v1 := b.NewValue0(v.Pos, OpPPC64DIVW, typ.Int32)
@@ -3243,6 +3260,7 @@ func rewriteValuePPC64_OpMod32(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValuePPC64_OpMod32u(v *Value) bool {
        v_1 := v.Args[1]
@@ -3250,10 +3268,27 @@ func rewriteValuePPC64_OpMod32u(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Mod32u x y)
+       // cond: objabi.GOPPC64 >= 9
+       // result: (MODUW x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(objabi.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64MODUW)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Mod32u x y)
+       // cond: objabi.GOPPC64 <= 8
        // result: (SUB x (MULLW y (DIVWU x y)))
        for {
                x := v_0
                y := v_1
+               if !(objabi.GOPPC64 <= 8) {
+                       break
+               }
                v.reset(OpPPC64SUB)
                v0 := b.NewValue0(v.Pos, OpPPC64MULLW, typ.Int32)
                v1 := b.NewValue0(v.Pos, OpPPC64DIVWU, typ.Int32)
@@ -3262,6 +3297,7 @@ func rewriteValuePPC64_OpMod32u(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValuePPC64_OpMod64(v *Value) bool {
        v_1 := v.Args[1]
@@ -3269,10 +3305,27 @@ func rewriteValuePPC64_OpMod64(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Mod64 x y)
+       // cond: objabi.GOPPC64 >=9
+       // result: (MODSD x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(objabi.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64MODSD)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Mod64 x y)
+       // cond: objabi.GOPPC64 <=8
        // result: (SUB x (MULLD y (DIVD x y)))
        for {
                x := v_0
                y := v_1
+               if !(objabi.GOPPC64 <= 8) {
+                       break
+               }
                v.reset(OpPPC64SUB)
                v0 := b.NewValue0(v.Pos, OpPPC64MULLD, typ.Int64)
                v1 := b.NewValue0(v.Pos, OpPPC64DIVD, typ.Int64)
@@ -3281,6 +3334,7 @@ func rewriteValuePPC64_OpMod64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValuePPC64_OpMod64u(v *Value) bool {
        v_1 := v.Args[1]
@@ -3288,10 +3342,27 @@ func rewriteValuePPC64_OpMod64u(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
        // match: (Mod64u x y)
+       // cond: objabi.GOPPC64 >= 9
+       // result: (MODUD x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(objabi.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64MODUD)
+               v.AddArg2(x, y)
+               return true
+       }
+       // match: (Mod64u x y)
+       // cond: objabi.GOPPC64 <= 8
        // result: (SUB x (MULLD y (DIVDU x y)))
        for {
                x := v_0
                y := v_1
+               if !(objabi.GOPPC64 <= 8) {
+                       break
+               }
                v.reset(OpPPC64SUB)
                v0 := b.NewValue0(v.Pos, OpPPC64MULLD, typ.Int64)
                v1 := b.NewValue0(v.Pos, OpPPC64DIVDU, typ.Int64)
@@ -3300,6 +3371,7 @@ func rewriteValuePPC64_OpMod64u(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       return false
 }
 func rewriteValuePPC64_OpMod8(v *Value) bool {
        v_1 := v.Args[1]
index 6642f25f89b040a7e0e830d97c43f9dd3afca11f..8b32692778e8c1b2958b1e9a63f1b0764a940716 100644 (file)
@@ -569,13 +569,7 @@ const (
        AORNCC
        AORIS
        AREM
-       AREMCC
-       AREMV
-       AREMVCC
        AREMU
-       AREMUCC
-       AREMUV
-       AREMUVCC
        ARFI
        ARLWMI
        ARLWMICC
@@ -741,13 +735,7 @@ const (
        /* 64-bit pseudo operation */
        ADWORD
        AREMD
-       AREMDCC
-       AREMDV
-       AREMDVCC
        AREMDU
-       AREMDUCC
-       AREMDUV
-       AREMDUVCC
 
        /* more 64-bit operations */
        AHRFID
index 392356885a68fedd329e2dfab6707e1a862443e4..287011877c857d64dc986770f08fdbc36673c36a 100644 (file)
@@ -174,13 +174,7 @@ var Anames = []string{
        "ORNCC",
        "ORIS",
        "REM",
-       "REMCC",
-       "REMV",
-       "REMVCC",
        "REMU",
-       "REMUCC",
-       "REMUV",
-       "REMUVCC",
        "RFI",
        "RLWMI",
        "RLWMICC",
@@ -335,13 +329,7 @@ var Anames = []string{
        "TD",
        "DWORD",
        "REMD",
-       "REMDCC",
-       "REMDV",
-       "REMDVCC",
        "REMDU",
-       "REMDUCC",
-       "REMDUV",
-       "REMDUVCC",
        "HRFID",
        "POPCNTD",
        "POPCNTW",
index 7b73b9f7863504c4a0e3906024c2af01444fc9ea..0fd0744a42df9bb3ca88f46f63259f2a1ea184a5 100644 (file)
@@ -378,8 +378,6 @@ var optab = []Optab{
        {AREMU, C_REG, C_REG, C_NONE, C_REG, 50, 16, 0},
        {AREMD, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
        {AREMD, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
-       {AREMDU, C_REG, C_NONE, C_NONE, C_REG, 51, 12, 0},
-       {AREMDU, C_REG, C_REG, C_NONE, C_REG, 51, 12, 0},
        {AMTFSB0, C_SCON, C_NONE, C_NONE, C_NONE, 52, 4, 0},
        {AMOVFL, C_FPSCR, C_NONE, C_NONE, C_FREG, 53, 4, 0},
        {AMOVFL, C_FREG, C_NONE, C_NONE, C_FPSCR, 64, 4, 0},
@@ -1265,31 +1263,16 @@ func buildop(ctxt *obj.Link) {
                        opset(ASTWCCC, r0)
                        opset(ASTHCCC, r0)
                        opset(ASTBCCC, r0)
-
                        opset(ASTDCCC, r0)
 
                case AREM: /* macro */
-                       opset(AREMCC, r0)
-
-                       opset(AREMV, r0)
-                       opset(AREMVCC, r0)
+                       opset(AREM, r0)
 
                case AREMU:
                        opset(AREMU, r0)
-                       opset(AREMUCC, r0)
-                       opset(AREMUV, r0)
-                       opset(AREMUVCC, r0)
 
                case AREMD:
-                       opset(AREMDCC, r0)
-                       opset(AREMDV, r0)
-                       opset(AREMDVCC, r0)
-
-               case AREMDU:
                        opset(AREMDU, r0)
-                       opset(AREMDUCC, r0)
-                       opset(AREMDUV, r0)
-                       opset(AREMDUVCC, r0)
 
                case ADIVW: /* op Rb[,Ra],Rd */
                        opset(AMULHW, r0)
@@ -3253,6 +3236,9 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                o1 = AOP_RRR(v&^t, REGTMP, uint32(r), uint32(p.From.Reg))
                o2 = AOP_RRR(OP_MULLD, REGTMP, REGTMP, uint32(p.From.Reg))
                o3 = AOP_RRR(OP_SUBF|t, uint32(p.To.Reg), REGTMP, uint32(r))
+               /* cases 50,51: removed; can be reused. */
+
+               /* cases 50,51: removed; can be reused. */
 
        case 52: /* mtfsbNx cr(n) */
                v := c.regoff(&p.From) & 31
@@ -3922,35 +3908,34 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
        case AMODSW:
                return OPVCC(31, 779, 0, 0) /* modsw - v3.0 */
 
-       // TODO: Should REMs be here?
-       case AREM, ADIVW:
+       case ADIVW, AREM:
                return OPVCC(31, 491, 0, 0)
 
-       case AREMCC, ADIVWCC:
+       case ADIVWCC:
                return OPVCC(31, 491, 0, 1)
 
-       case AREMV, ADIVWV:
+       case ADIVWV:
                return OPVCC(31, 491, 1, 0)
 
-       case AREMVCC, ADIVWVCC:
+       case ADIVWVCC:
                return OPVCC(31, 491, 1, 1)
 
-       case AREMU, ADIVWU:
+       case ADIVWU, AREMU:
                return OPVCC(31, 459, 0, 0)
 
-       case AREMUCC, ADIVWUCC:
+       case ADIVWUCC:
                return OPVCC(31, 459, 0, 1)
 
-       case AREMUV, ADIVWUV:
+       case ADIVWUV:
                return OPVCC(31, 459, 1, 0)
 
-       case AREMUVCC, ADIVWUVCC:
+       case ADIVWUVCC:
                return OPVCC(31, 459, 1, 1)
 
-       case AREMD, ADIVD:
+       case ADIVD, AREMD:
                return OPVCC(31, 489, 0, 0)
 
-       case AREMDCC, ADIVDCC:
+       case ADIVDCC:
                return OPVCC(31, 489, 0, 1)
 
        case ADIVDE:
@@ -3965,22 +3950,22 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
        case ADIVDEUCC:
                return OPVCC(31, 393, 0, 1)
 
-       case AREMDV, ADIVDV:
+       case ADIVDV:
                return OPVCC(31, 489, 1, 0)
 
-       case AREMDVCC, ADIVDVCC:
+       case ADIVDVCC:
                return OPVCC(31, 489, 1, 1)
 
-       case AREMDU, ADIVDU:
+       case ADIVDU, AREMDU:
                return OPVCC(31, 457, 0, 0)
 
-       case AREMDUCC, ADIVDUCC:
+       case ADIVDUCC:
                return OPVCC(31, 457, 0, 1)
 
-       case AREMDUV, ADIVDUV:
+       case ADIVDUV:
                return OPVCC(31, 457, 1, 0)
 
-       case AREMDUVCC, ADIVDUVCC:
+       case ADIVDUVCC:
                return OPVCC(31, 457, 1, 1)
 
        case AEIEIO: