]> Cypherpunks.ru repositories - gostls13.git/commitdiff
utf16: reduce utf16.Decode allocations
authorqmuntal <quimmuntal@gmail.com>
Fri, 25 Nov 2022 13:22:36 +0000 (14:22 +0100)
committerQuim Muntal <quimmuntal@gmail.com>
Mon, 23 Jan 2023 20:59:01 +0000 (20:59 +0000)
This CL avoids allocating in utf16.Decode for code point sequences
with less than 64 elements. It does so by splitting the function in two,
one that can be inlined that preallocates a buffer and the other that
does the heavy-lifting.

The mid-stack inliner will allocate the buffer in the caller stack,
and in many cases this will be enough to avoid the allocation.

unicode/utf16 benchmarks:

name                         old time/op    new time/op    delta
DecodeValidASCII-12            60.1ns ± 3%    16.0ns ±20%   -73.40%  (p=0.000 n=8+10)
DecodeValidJapaneseChars-12    61.3ns ±10%    14.9ns ±39%   -75.71%  (p=0.000 n=10+10)

name                         old alloc/op   new alloc/op   delta
DecodeValidASCII-12             48.0B ± 0%      0.0B       -100.00%  (p=0.000 n=10+10)
DecodeValidJapaneseChars-12     48.0B ± 0%      0.0B       -100.00%  (p=0.000 n=10+10)

name                         old allocs/op  new allocs/op  delta
DecodeValidASCII-12              1.00 ± 0%      0.00       -100.00%  (p=0.000 n=10+10)
DecodeValidJapaneseChars-12      1.00 ± 0%      0.00       -100.00%  (p=0.000 n=10+10)

I've also benchmarked os.File.ReadDir with this change applied
to demonstrate that it does make a difference in the caller site, in this
case via syscall.UTF16ToString:

name        old time/op    new time/op    delta
ReadDir-12     592µs ± 8%     620µs ±16%     ~     (p=0.280 n=10+10)

name        old alloc/op   new alloc/op   delta
ReadDir-12    30.4kB ± 0%    22.4kB ± 0%  -26.10%  (p=0.000 n=8+10)

name        old allocs/op  new allocs/op  delta
ReadDir-12       402 ± 0%       272 ± 0%  -32.34%  (p=0.000 n=10+10)

Change-Id: I65cf5caa3fd3b3a466c0ed837a50a96e975bbe6b
Reviewed-on: https://go-review.googlesource.com/c/go/+/453415
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>

src/cmd/compile/internal/test/inl_test.go
src/unicode/utf16/utf16.go
src/unicode/utf16/utf16_test.go

index 201f5773e97669f2e2aed9ca680d367f9a9ec30d..e59104df531c38abd08023bb7829634547ef1eda 100644 (file)
@@ -123,6 +123,9 @@ func TestIntendedInlining(t *testing.T) {
                        "AppendRune",
                        "ValidRune",
                },
+               "unicode/utf16": {
+                       "Decode",
+               },
                "reflect": {
                        "Value.Bool",
                        "Value.Bytes",
index 38d8be6060293ee01986cc4bf01a2d0eff201a44..1c6d2c66c30c40f14bc20266468da429563fbcac 100644 (file)
@@ -103,23 +103,31 @@ func AppendRune(a []uint16, r rune) []uint16 {
 // Decode returns the Unicode code point sequence represented
 // by the UTF-16 encoding s.
 func Decode(s []uint16) []rune {
-       a := make([]rune, len(s))
-       n := 0
+       // Preallocate capacity to hold up to 64 runes.
+       // Decode inlines, so the allocation can live on the stack.
+       buf := make([]rune, 0, 64)
+       return decode(s, buf)
+}
+
+// decode appends to buf the Unicode code point sequence represented
+// by the UTF-16 encoding s and return the extended buffer.
+func decode(s []uint16, buf []rune) []rune {
        for i := 0; i < len(s); i++ {
+               var ar rune
                switch r := s[i]; {
                case r < surr1, surr3 <= r:
                        // normal rune
-                       a[n] = rune(r)
+                       ar = rune(r)
                case surr1 <= r && r < surr2 && i+1 < len(s) &&
                        surr2 <= s[i+1] && s[i+1] < surr3:
                        // valid surrogate sequence
-                       a[n] = DecodeRune(rune(r), rune(s[i+1]))
+                       ar = DecodeRune(rune(r), rune(s[i+1]))
                        i++
                default:
                        // invalid surrogate sequence
-                       a[n] = replacementChar
+                       ar = replacementChar
                }
-               n++
+               buf = append(buf, ar)
        }
-       return a[:n]
+       return buf
 }
index be339b1fdf137b54a7097a4295c8a65caefce959..a5a503d3874bb214a6733582c4e3d303b156d5c5 100644 (file)
@@ -5,6 +5,7 @@
 package utf16_test
 
 import (
+       "internal/testenv"
        "reflect"
        "testing"
        "unicode"
@@ -103,6 +104,22 @@ var decodeTests = []decodeTest{
        {[]uint16{0xdfff}, []rune{0xfffd}},
 }
 
+func TestAllocationsDecode(t *testing.T) {
+       testenv.SkipIfOptimizationOff(t)
+
+       for _, tt := range decodeTests {
+               allocs := testing.AllocsPerRun(10, func() {
+                       out := Decode(tt.in)
+                       if out == nil {
+                               t.Errorf("Decode(%x) = nil", tt.in)
+                       }
+               })
+               if allocs > 0 {
+                       t.Errorf("Decode allocated %v times", allocs)
+               }
+       }
+}
+
 func TestDecode(t *testing.T) {
        for _, tt := range decodeTests {
                out := Decode(tt.in)