]> Cypherpunks.ru repositories - gostls13.git/commitdiff
internal/bytealg: fix alignment code in equal_riscv64.s
authorMark Ryan <markdryan@rivosinc.com>
Fri, 2 Jun 2023 11:13:29 +0000 (13:13 +0200)
committerM Zhuo <mzh@golangcn.org>
Thu, 29 Jun 2023 02:34:59 +0000 (02:34 +0000)
The riscv64 implementation of equal has an optimization that is
applied when both pointers share the same alignment but that alignment
is not 8 bytes.  In this case it tries to align both pointers to an 8 byte boundaries,
by individually comparing the first few bytes of each buffer.  Unfortunately,
the existing code is incorrect.  It adjusts the pointers by the wrong number
of bytes resulting, in most cases, in pointers that are not 8 byte aligned.

This commit fixes the issue by individually comparing the first
(8 - (pointer & 7)) bytes of each buffer rather than the first
(pointer & 7) bytes.

This particular optimization is not covered by any of the existing
benchmarks so a new benchmark, BenchmarkEqualBothUnaligned,
is provided.  The benchmark tests the case where both pointers have
the same alignment but may not be 8 byte aligned.  Results of the
new benchmark along with some of the existing benchmarks generated on
a SiFive HiFive Unmatched A00 with 16GB of RAM running Ubuntu 23.04
are presented below.

Equal/0-4                                 3.356n ± 0%    3.357n ± 0%        ~ (p=0.840 n=10)
Equal/1-4                                 63.91n ± 7%    65.97n ± 5%   +3.22% (p=0.029 n=10)
Equal/6-4                                 72.94n ± 5%    76.09n ± 4%        ~ (p=0.075 n=10)
Equal/9-4                                 84.61n ± 7%    85.83n ± 3%        ~ (p=0.315 n=10)
Equal/15-4                                103.7n ± 2%    102.9n ± 4%        ~ (p=0.739 n=10)
Equal/16-4                                89.14n ± 3%   100.40n ± 4%  +12.64% (p=0.000 n=10)
Equal/20-4                                107.8n ± 3%    106.8n ± 3%        ~ (p=0.725 n=10)
Equal/32-4                                63.95n ± 8%    67.79n ± 7%        ~ (p=0.089 n=10)
Equal/4K-4                                1.256µ ± 1%    1.254µ ± 0%        ~ (p=0.925 n=10)
Equal/4M-4                                1.231m ± 0%    1.230m ± 0%   -0.04% (p=0.011 n=10)
Equal/64M-4                               19.77m ± 0%    19.78m ± 0%        ~ (p=0.052 n=10)
EqualBothUnaligned/64_0-4                 43.70n ± 4%    44.40n ± 5%        ~ (p=0.529 n=10)
EqualBothUnaligned/64_1-4                6957.5n ± 0%    105.9n ± 1%  -98.48% (p=0.000 n=10)
EqualBothUnaligned/64_4-4                 100.1n ± 2%    101.5n ± 4%        ~ (p=0.149 n=10)
EqualBothUnaligned/64_7-4               6965.00n ± 0%    95.60n ± 4%  -98.63% (p=0.000 n=10)
EqualBothUnaligned/4096_0-4               1.233µ ± 1%    1.225µ ± 0%   -0.65% (p=0.015 n=10)
EqualBothUnaligned/4096_1-4             584.226µ ± 0%    1.277µ ± 0%  -99.78% (p=0.000 n=10)
EqualBothUnaligned/4096_4-4               1.270µ ± 1%    1.268µ ± 0%        ~ (p=0.105 n=10)
EqualBothUnaligned/4096_7-4             584.944µ ± 0%    1.266µ ± 1%  -99.78% (p=0.000 n=10)
EqualBothUnaligned/4194304_0-4            1.241m ± 0%    1.236m ± 0%   -0.38% (p=0.035 n=10)
EqualBothUnaligned/4194304_1-4          600.956m ± 0%    1.238m ± 0%  -99.79% (p=0.000 n=10)
EqualBothUnaligned/4194304_4-4            1.239m ± 0%    1.241m ± 0%   +0.22% (p=0.007 n=10)
EqualBothUnaligned/4194304_7-4          601.036m ± 0%    1.239m ± 0%  -99.79% (p=0.000 n=10)
EqualBothUnaligned/67108864_0-4           19.79m ± 0%    19.78m ± 0%        ~ (p=0.393 n=10)
EqualBothUnaligned/67108864_1-4         9616.61m ± 0%    19.82m ± 0%  -99.79% (p=0.000 n=10)
EqualBothUnaligned/67108864_4-4           19.82m ± 0%    19.82m ± 0%        ~ (p=0.971 n=10)
EqualBothUnaligned/67108864_7-4         9616.34m ± 0%    19.86m ± 0%  -99.79% (p=0.000 n=10)
geomean                                   38.38µ         7.194µ       -81.26%

Change-Id: I4caab6c3450bd7e2773426b08b70bbc37fbe4e5f
Reviewed-on: https://go-review.googlesource.com/c/go/+/500855
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/bytes/bytes_test.go
src/internal/bytealg/equal_riscv64.s

index 05c0090b6173fe3e58819836b4acbd66c9ee00cf..f0733edd3f447eb6d78ae1274f20f72191d2053c 100644 (file)
@@ -652,6 +652,38 @@ func bmEqual(equal func([]byte, []byte) bool) func(b *testing.B, n int) {
        }
 }
 
+func BenchmarkEqualBothUnaligned(b *testing.B) {
+       sizes := []int{64, 4 << 10}
+       if !isRaceBuilder {
+               sizes = append(sizes, []int{4 << 20, 64 << 20}...)
+       }
+       maxSize := 2 * (sizes[len(sizes)-1] + 8)
+       if len(bmbuf) < maxSize {
+               bmbuf = make([]byte, maxSize)
+       }
+
+       for _, n := range sizes {
+               for _, off := range []int{0, 1, 4, 7} {
+                       buf1 := bmbuf[off : off+n]
+                       buf2Start := (len(bmbuf) / 2) + off
+                       buf2 := bmbuf[buf2Start : buf2Start+n]
+                       buf1[n-1] = 'x'
+                       buf2[n-1] = 'x'
+                       b.Run(fmt.Sprint(n, off), func(b *testing.B) {
+                               b.SetBytes(int64(n))
+                               for i := 0; i < b.N; i++ {
+                                       eq := Equal(buf1, buf2)
+                                       if !eq {
+                                               b.Fatal("bad equal")
+                                       }
+                               }
+                       })
+                       buf1[n-1] = '\x00'
+                       buf2[n-1] = '\x00'
+               }
+       }
+}
+
 func BenchmarkIndex(b *testing.B) {
        benchBytes(b, indexSizes, func(b *testing.B, n int) {
                buf := bmbuf[0:n]
index 3834083ec94da9f28e885d5e898c1bc18ea906ac..503aac575198fb9fccfd94db132acaa611341967 100644 (file)
@@ -37,6 +37,8 @@ TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
        BEQZ    X9, loop32_check
 
        // Check one byte at a time until we reach 8 byte alignment.
+       SUB     X9, X0, X9
+       ADD     $8, X9, X9
        SUB     X9, X12, X12
 align:
        ADD     $-1, X9