]> Cypherpunks.ru repositories - gostls13.git/commitdiff
cmd/compile: use ANDL for small immediates
authorJake Ciolek <jakub@ciolek.dev>
Sun, 10 Oct 2021 15:56:16 +0000 (17:56 +0200)
committerJosh Bleecher Snyder <josharian@gmail.com>
Tue, 12 Oct 2021 22:02:39 +0000 (22:02 +0000)
We can rewrite ANDQ with an immediate fitting in 32bit with an ANDL, which is shorter to encode.

Looking at Go binary itself, before the change there was:

ANDL: 2337
ANDQ: 4476

After the change:

ANDL: 3790
ANDQ: 3024

So we got rid of 1452 ANDQs

This makes the Linux x86_64 binary 0.03% smaller.

There seems to be an impact on performance.

Intel Cascade Lake benchmarks (with perflock):

name                     old time/op    new time/op    delta
BinaryTree17-8              1.91s ± 1%     1.89s ± 1%  -1.22%  (p=0.000 n=21+18)
Fannkuch11-8                2.34s ± 0%     2.34s ± 0%    ~     (p=0.052 n=20+20)
FmtFprintfEmpty-8          27.7ns ± 1%    27.4ns ± 3%    ~     (p=0.497 n=21+21)
FmtFprintfString-8         53.2ns ± 0%    51.5ns ± 0%  -3.21%  (p=0.000 n=20+19)
FmtFprintfInt-8            57.3ns ± 0%    55.7ns ± 0%  -2.89%  (p=0.000 n=19+19)
FmtFprintfIntInt-8         92.3ns ± 0%    88.4ns ± 1%  -4.23%  (p=0.000 n=20+21)
FmtFprintfPrefixedInt-8     103ns ± 0%     103ns ± 0%  +0.23%  (p=0.000 n=20+21)
FmtFprintfFloat-8           147ns ± 0%     148ns ± 0%  +0.75%  (p=0.000 n=20+21)
FmtManyArgs-8               384ns ± 0%     381ns ± 0%  -0.63%  (p=0.000 n=21+21)
GobDecode-8                3.86ms ± 1%    3.88ms ± 1%  +0.52%  (p=0.000 n=20+21)
GobEncode-8                2.77ms ± 1%    2.77ms ± 0%    ~     (p=0.078 n=21+21)
Gzip-8                      168ms ± 1%     168ms ± 0%  +0.24%  (p=0.000 n=20+20)
Gunzip-8                   25.1ms ± 0%    24.3ms ± 0%  -3.03%  (p=0.000 n=21+21)
HTTPClientServer-8         61.4µs ± 8%    59.1µs ±10%    ~     (p=0.088 n=20+21)
JSONEncode-8               6.86ms ± 0%    6.70ms ± 0%  -2.29%  (p=0.000 n=20+19)
JSONDecode-8               30.8ms ± 1%    30.6ms ± 1%  -0.82%  (p=0.000 n=20+20)
Mandelbrot200-8            3.85ms ± 0%    3.85ms ± 0%    ~     (p=0.191 n=16+17)
GoParse-8                  2.61ms ± 2%    2.60ms ± 1%    ~     (p=0.561 n=21+20)
RegexpMatchEasy0_32-8      48.5ns ± 2%    45.9ns ± 3%  -5.26%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8       139ns ± 0%     139ns ± 0%  +0.27%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8      41.3ns ± 0%    42.1ns ± 4%  +1.95%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8       216ns ± 2%     216ns ± 0%  +0.17%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8      790ns ± 7%     803ns ± 8%    ~     (p=0.178 n=21+21)
RegexpMatchMedium_1K-8     23.5µs ± 5%    23.7µs ± 5%    ~     (p=0.421 n=21+21)
RegexpMatchHard_32-8       1.09µs ± 1%    1.09µs ± 1%  -0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8       33.0µs ± 0%    33.0µs ± 0%    ~     (p=0.610 n=21+20)
Revcomp-8                   348ms ± 0%     353ms ± 0%  +1.38%  (p=0.000 n=17+18)
Template-8                 42.0ms ± 1%    41.9ms ± 1%  -0.30%  (p=0.049 n=20+20)
TimeParse-8                 185ns ± 0%     185ns ± 0%    ~     (p=0.387 n=20+18)
TimeFormat-8                237ns ± 1%     241ns ± 1%  +1.57%  (p=0.000 n=21+21)
[Geo mean]                 35.4µs         35.2µs       -0.66%

name                     old speed      new speed      delta
GobDecode-8               199MB/s ± 1%   198MB/s ± 1%  -0.52%  (p=0.000 n=20+21)
GobEncode-8               277MB/s ± 1%   277MB/s ± 0%    ~     (p=0.075 n=21+21)
Gzip-8                    116MB/s ± 1%   115MB/s ± 0%  -0.25%  (p=0.000 n=20+20)
Gunzip-8                  773MB/s ± 0%   797MB/s ± 0%  +3.12%  (p=0.000 n=21+21)
JSONEncode-8              283MB/s ± 0%   290MB/s ± 0%  +2.35%  (p=0.000 n=20+19)
JSONDecode-8             63.0MB/s ± 1%  63.5MB/s ± 1%  +0.82%  (p=0.000 n=20+20)
GoParse-8                22.2MB/s ± 2%  22.3MB/s ± 1%    ~     (p=0.539 n=21+20)
RegexpMatchEasy0_32-8     660MB/s ± 2%   697MB/s ± 3%  +5.57%  (p=0.000 n=20+21)
RegexpMatchEasy0_1K-8    7.36GB/s ± 0%  7.34GB/s ± 0%  -0.26%  (p=0.000 n=18+20)
RegexpMatchEasy1_32-8     775MB/s ± 0%   761MB/s ± 4%  -1.88%  (p=0.000 n=17+21)
RegexpMatchEasy1_1K-8    4.74GB/s ± 2%  4.74GB/s ± 0%  -0.18%  (p=0.020 n=21+19)
RegexpMatchMedium_32-8   40.6MB/s ± 7%  39.9MB/s ± 9%    ~     (p=0.191 n=21+21)
RegexpMatchMedium_1K-8   43.7MB/s ± 5%  43.2MB/s ± 5%    ~     (p=0.435 n=21+21)
RegexpMatchHard_32-8     29.3MB/s ± 1%  29.4MB/s ± 1%  +0.53%  (p=0.000 n=19+18)
RegexpMatchHard_1K-8     31.0MB/s ± 0%  31.0MB/s ± 0%    ~     (p=0.572 n=21+20)
Revcomp-8                 730MB/s ± 0%   720MB/s ± 0%  -1.36%  (p=0.000 n=17+18)
Template-8               46.2MB/s ± 1%  46.3MB/s ± 1%  +0.30%  (p=0.041 n=20+20)
[Geo mean]                204MB/s        205MB/s       +0.30%

Change-Id: Iac75d0ec184a515ce0e65e19559d5fe2e9840514
Reviewed-on: https://go-review.googlesource.com/c/go/+/354970
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>

src/cmd/compile/internal/amd64/ssa.go
test/codegen/arithmetic.go
test/codegen/bits.go
test/codegen/bool.go

index 78822098a78591012a65b50e66e1842146f40bb8..0e7457442249b95d4e762e95bdd9881824790cca 100644 (file)
@@ -618,8 +618,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Reg = r
                p.SetFrom3Reg(v.Args[0].Reg())
 
+       case ssa.OpAMD64ANDQconst:
+               asm := v.Op.Asm()
+               // If the constant is positive and fits into 32 bits, use ANDL.
+               // This saves a few bytes of encoding.
+               if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
+                       asm = x86.AANDL
+               }
+               p := s.Prog(asm)
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = v.AuxInt
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+
        case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
-               ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
+               ssa.OpAMD64ANDLconst,
                ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
                ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
                ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
index eb95416b6aabe38725a79ad3daf4615c4fc31991..8b8c8ed64b0c813e6515ecc141a350855fdeaa65 100644 (file)
@@ -241,7 +241,7 @@ func FloatDivs(a []float32) float32 {
 
 func Pow2Mods(n1 uint, n2 int) (uint, int) {
        // 386:"ANDL\t[$]31",-"DIVL"
-       // amd64:"ANDQ\t[$]31",-"DIVQ"
+       // amd64:"ANDL\t[$]31",-"DIVQ"
        // arm:"AND\t[$]31",-".*udiv"
        // arm64:"AND\t[$]31",-"UDIV"
        // ppc64:"ANDCC\t[$]31"
@@ -452,7 +452,7 @@ func LenDiv2(s string) int {
 
 func LenMod1(a []int) int {
        // 386:"ANDL\t[$]1023"
-       // amd64:"ANDQ\t[$]1023"
+       // amd64:"ANDL\t[$]1023"
        // arm64:"AND\t[$]1023",-"SDIV"
        // arm/6:"AND",-".*udiv"
        // arm/7:"BFC",-".*udiv",-"AND"
@@ -463,7 +463,7 @@ func LenMod1(a []int) int {
 
 func LenMod2(s string) int {
        // 386:"ANDL\t[$]2047"
-       // amd64:"ANDQ\t[$]2047"
+       // amd64:"ANDL\t[$]2047"
        // arm64:"AND\t[$]2047",-"SDIV"
        // arm/6:"AND",-".*udiv"
        // arm/7:"BFC",-".*udiv",-"AND"
@@ -484,7 +484,7 @@ func CapDiv(a []int) int {
 
 func CapMod(a []int) int {
        // 386:"ANDL\t[$]4095"
-       // amd64:"ANDQ\t[$]4095"
+       // amd64:"ANDL\t[$]4095"
        // arm64:"AND\t[$]4095",-"SDIV"
        // arm/6:"AND",-".*udiv"
        // arm/7:"BFC",-".*udiv",-"AND"
index 8e973d5726e4b52ad4536ae65541a50392ef1df3..e7826b8e658dc89d6ae97516b6f8a6339796f8b1 100644 (file)
@@ -332,7 +332,7 @@ func bitSetPowerOf2Test(x int) bool {
 }
 
 func bitSetTest(x int) bool {
-       // amd64:"ANDQ\t[$]9, AX"
+       // amd64:"ANDL\t[$]9, AX"
        // amd64:"CMPQ\tAX, [$]9"
        return x&9 == 9
 }
index 929b1b49b9bd012c5471e4d21a185fa765b7045d..a32423308e02c6ae36ee3c15d9ba2da1ff6a709e 100644 (file)
@@ -27,7 +27,7 @@ func convertNeq0L(x uint32, c bool) bool {
 }
 
 func convertNeq0Q(x uint64, c bool) bool {
-       // amd64:"ANDQ\t[$]1",-"SETB"
+       // amd64:"ANDL\t[$]1",-"SETB"
        b := x&1 != 0
        return c && b
 }