]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Add64 on mips64
authorJunxian Zhu <zhujunxian@oss.cipunited.com>
Fri, 26 May 2023 05:26:51 +0000 (13:26 +0800)
committerJoel Sing <joel@sing.id.au>
Mon, 31 Jul 2023 03:58:42 +0000 (03:58 +0000)
This CL intrinsify Add64 on mips64.

pkg: math/bits
                  _   sec/op    _   sec/op     vs base               _
Add64-4             2.783n _ 0%   1.950n _ 0%  -29.93% (p=0.000 n=8)
Add64multiple-4     5.713n _ 0%   3.063n _ 0%  -46.38% (p=0.000 n=8)

pkg: crypto/elliptic
                                     _    sec/op    _   sec/op     vs base               _
ScalarBaseMult/P256-4                   353.7_ _ 0%   282.7_ _ 0%  -20.09% (p=0.000 n=8)
ScalarBaseMult/P224-4                   330.5_ _ 0%   250.0_ _ 0%  -24.37% (p=0.000 n=8)
ScalarBaseMult/P384-4                  1228.8_ _ 0%   791.5_ _ 0%  -35.59% (p=0.000 n=8)
ScalarBaseMult/P521-4                  15.412m _ 0%   2.438m _ 0%  -84.18% (p=0.000 n=8)
ScalarMult/P256-4                      1189.4_ _ 0%   904.2_ _ 0%  -23.98% (p=0.000 n=8)
ScalarMult/P224-4                      1138.8_ _ 0%   813.8_ _ 0%  -28.54% (p=0.000 n=8)
ScalarMult/P384-4                       4.419m _ 0%   2.692m _ 0%  -39.08% (p=0.000 n=8)
ScalarMult/P521-4                      59.768m _ 0%   8.773m _ 0%  -85.32% (p=0.000 n=8)
MarshalUnmarshal/P256/Uncompressed-4    8.697_ _ 1%   7.923_ _ 1%   -8.91% (p=0.000 n=8)
MarshalUnmarshal/P256/Compressed-4     104.75_ _ 0%   66.29_ _ 0%  -36.72% (p=0.000 n=8)
MarshalUnmarshal/P224/Uncompressed-4    8.728_ _ 1%   7.823_ _ 1%  -10.37% (p=0.000 n=8)
MarshalUnmarshal/P224/Compressed-4     1035.7_ _ 0%   676.5_ _ 2%  -34.69% (p=0.000 n=8)
MarshalUnmarshal/P384/Uncompressed-4    15.32_ _ 1%   11.81_ _ 1%  -22.90% (p=0.000 n=8)
MarshalUnmarshal/P384/Compressed-4      399.8_ _ 0%   217.4_ _ 0%  -45.62% (p=0.000 n=8)
MarshalUnmarshal/P521/Uncompressed-4    96.79_ _ 0%   20.32_ _ 0%  -79.01% (p=0.000 n=8)
MarshalUnmarshal/P521/Compressed-4     6640.4_ _ 0%   790.8_ _ 0%  -88.09% (p=0.000 n=8)

Change-Id: I8a0960b9665720c1d3e57dce36386e74db37fefa
Reviewed-on: https://go-review.googlesource.com/c/go/+/498496
Reviewed-by: Heschi Kreinick <heschi@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/_gen/MIPS64.rules
src/cmd/compile/internal/ssa/rewriteMIPS64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/mathbits.go

index 4628e2a02422edd45584cc23b9204439d4cb2c96..5c07abc79b36e348fff8b91a439e686a453ee5ca 100644 (file)
 (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
 (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
 
+(Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c)
+(Select1 <t> (Add64carry x y c)) =>
+       (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
+
 // math package intrinsics
 (Abs ...) => (ABSD ...)
 
 (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
 (GEZ (MOVVconst [c]) yes no) && c <  0 => (First no yes)
 
+// SGT/SGTU with known outcomes.
+(SGT  x x) => (MOVVconst [0])
+(SGTU x x) => (MOVVconst [0])
+
 // fold readonly sym load
 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read8(sym, int64(off)))])
 (MOVHload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
index de316e9678e58fb1466ec3fc24442a7d15bf2c7b..4e7b51b351422887d48978fc23480f682330f42e 100644 (file)
@@ -4797,6 +4797,17 @@ func rewriteValueMIPS64_OpMIPS64SGT(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (SGT x x)
+       // result: (MOVVconst [0])
+       for {
+               x := v_0
+               if x != v_1 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool {
@@ -4819,6 +4830,17 @@ func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (SGTU x x)
+       // result: (MOVVconst [0])
+       for {
+               x := v_0
+               if x != v_1 {
+                       break
+               }
+               v.reset(OpMIPS64MOVVconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
        return false
 }
 func rewriteValueMIPS64_OpMIPS64SGTUconst(v *Value) bool {
@@ -7315,6 +7337,22 @@ func rewriteValueMIPS64_OpSelect0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Select0 <t> (Add64carry x y c))
+       // result: (ADDV (ADDV <t> x y) c)
+       for {
+               t := v.Type
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPS64ADDV)
+               v0 := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, c)
+               return true
+       }
        // match: (Select0 (DIVVU _ (MOVVconst [1])))
        // result: (MOVVconst [0])
        for {
@@ -7427,6 +7465,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool {
                v.AddArg2(v0, v2)
                return true
        }
+       // match: (Select1 <t> (Add64carry x y c))
+       // result: (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
+       for {
+               t := v.Type
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPS64OR)
+               v0 := b.NewValue0(v.Pos, OpMIPS64SGTU, t)
+               s := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
+               s.AddArg2(x, y)
+               v0.AddArg2(x, s)
+               v2 := b.NewValue0(v.Pos, OpMIPS64SGTU, t)
+               v3 := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
+               v3.AddArg2(s, c)
+               v2.AddArg2(s, v3)
+               v.AddArg2(v0, v2)
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [-1])))
        // result: (NEGV x)
        for {
index 678e1ebc11e96ba7869d6acc6ed7a55e2c5674a3..09b20b726e90dff62c918e8db5ead83cde18e5e5 100644 (file)
@@ -4843,7 +4843,7 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64)
+               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
        alias("math/bits", "Add", "math/bits", "Add64", p8...)
        addF("math/bits", "Sub64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
index 8c971cf760c60cb0a783ef3272b07723bba3e6ad..85c7d24f7de7121d785cd9b45f37323ea309a241 100644 (file)
@@ -434,6 +434,7 @@ func AddC(x, ci uint) (r, co uint) {
        // loong64: "ADDV", "SGTU"
        // ppc64x: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
+       // mips64:"ADDV","SGTU"
        // riscv64: "ADD","SLTU"
        return bits.Add(x, 7, ci)
 }
@@ -444,6 +445,7 @@ func AddZ(x, y uint) (r, co uint) {
        // loong64: "ADDV", "SGTU"
        // ppc64x: "ADDC", -"ADDE", "ADDZE"
        // s390x:"ADDC",-"ADDC\t[$]-1,"
+       // mips64:"ADDV","SGTU"
        // riscv64: "ADD","SLTU"
        return bits.Add(x, y, 0)
 }
@@ -454,6 +456,7 @@ func AddR(x, y, ci uint) uint {
        // loong64: "ADDV", -"SGTU"
        // ppc64x: "ADDC", "ADDE", -"ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
+       // mips64:"ADDV",-"SGTU"
        // riscv64: "ADD",-"SLTU"
        r, _ := bits.Add(x, y, ci)
        return r
@@ -475,6 +478,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
        // loong64: "ADDV", "SGTU"
        // ppc64x: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
+       // mips64:"ADDV","SGTU"
        // riscv64: "ADD","SLTU"
        return bits.Add64(x, y, ci)
 }
@@ -485,6 +489,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
        // loong64: "ADDV", "SGTU"
        // ppc64x: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
+       // mips64:"ADDV","SGTU"
        // riscv64: "ADD","SLTU"
        return bits.Add64(x, 7, ci)
 }
@@ -495,6 +500,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
        // loong64: "ADDV", "SGTU"
        // ppc64x: "ADDC", -"ADDE", "ADDZE"
        // s390x:"ADDC",-"ADDC\t[$]-1,"
+       // mips64:"ADDV","SGTU"
        // riscv64: "ADD","SLTU"
        return bits.Add64(x, y, 0)
 }
@@ -505,6 +511,7 @@ func Add64R(x, y, ci uint64) uint64 {
        // loong64: "ADDV", -"SGTU"
        // ppc64x: "ADDC", "ADDE", -"ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
+       // mips64:"ADDV",-"SGTU"
        // riscv64: "ADD",-"SLTU"
        r, _ := bits.Add64(x, y, ci)
        return r