]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: optimize 386's math.bits.TrailingZeros16
authorBen Shi <powerman1st@163.com>
Wed, 7 Aug 2019 05:20:32 +0000 (05:20 +0000)
committerBen Shi <powerman1st@163.com>
Wed, 28 Aug 2019 02:29:54 +0000 (02:29 +0000)
This CL optimizes math.bits.TrailingZeros16 on 386 with
a pair of BSFL and ORL instrcutions.

The case TrailingZeros16-4 of the benchmark test in
math/bits shows big improvement.
name               old time/op  new time/op  delta
TrailingZeros16-4  1.55ns ± 1%  0.87ns ± 1%  -43.87%  (p=0.000 n=50+49)

Change-Id: Ia899975b0e46f45dcd20223b713ed632bc32740b
Reviewed-on: https://go-review.googlesource.com/c/go/+/189277
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/rewrite386.go
test/codegen/mathbits.go

index 9871e11a09934f393a9a35107944aa7c7d9059c6..069e2f6c9e96ad83ba775801cd9590d949c1ea95 100644 (file)
@@ -3370,7 +3370,7 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
+               sys.AMD64, sys.ARM, sys.I386, sys.ARM64, sys.Wasm)
        addF("math/bits", "TrailingZeros16",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
index d7cbba17186f54869b5b653043d871f711bbc828..395ebb085d9c18ccaf05df1d3607a9a0404f6aef 100644 (file)
@@ -66,6 +66,8 @@
 
 (Sqrt x) -> (SQRTSD x)
 
+(Ctz16 x) -> (BSFL (ORLconst <typ.UInt32> [0x10000] x))
+
 // Lowering extension
 (SignExt8to16  x) -> (MOVBLSX x)
 (SignExt8to32  x) -> (MOVBLSX x)
index 812aa9420d66f49b6e0241d59924704ac53b42f6..233656156009ae403952895775601811a36cd247 100644 (file)
@@ -360,6 +360,8 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_OpConstBool_0(v)
        case OpConstNil:
                return rewriteValue386_OpConstNil_0(v)
+       case OpCtz16:
+               return rewriteValue386_OpCtz16_0(v)
        case OpCvt32Fto32:
                return rewriteValue386_OpCvt32Fto32_0(v)
        case OpCvt32Fto64F:
@@ -20783,6 +20785,22 @@ func rewriteValue386_OpConstNil_0(v *Value) bool {
                return true
        }
 }
+func rewriteValue386_OpCtz16_0(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Ctz16 x)
+       // cond:
+       // result: (BSFL (ORLconst <typ.UInt32> [0x10000] x))
+       for {
+               x := v.Args[0]
+               v.reset(Op386BSFL)
+               v0 := b.NewValue0(v.Pos, Op386ORLconst, typ.UInt32)
+               v0.AuxInt = 0x10000
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValue386_OpCvt32Fto32_0(v *Value) bool {
        // match: (Cvt32Fto32 x)
        // cond:
index 0d94bd1bc8f26f19d6f69e25e6fbee1f06be4e1a..779ea6e3227619cdfc36bcfd71525d79cdefcb40 100644 (file)
@@ -296,6 +296,7 @@ func TrailingZeros32(n uint32) int {
 
 func TrailingZeros16(n uint16) int {
        // amd64:"BSFL","BTSL\\t\\$16"
+       // 386:"BSFL\t"
        // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
        // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
        // s390x:"FLOGR","OR\t\\$65536"