]> Cypherpunks.ru repositories - gostls13.git/commitdiff
runtime: make alloc headers footers instead
authorMichael Anthony Knyszek <mknyszek@google.com>
Tue, 31 Oct 2023 04:50:26 +0000 (04:50 +0000)
committerMichael Knyszek <mknyszek@google.com>
Thu, 9 Nov 2023 19:58:18 +0000 (19:58 +0000)
The previous CL in this series (CL 437955) adds the allocation headers
experiment. However, this experiment puts the headers at the beginning
of each allocation, which decreases the default allocator alignment that
users can rely upon. Historically, Go's memory allocator has implicitly
provided 16-byte alignment (except for sizes where it doesn't make
sense, like 8 or 24 bytes), so it's not unlikely that users are
depending on it. It also complicates other changes that want higher
alignment. For example, the sync/atomic.Uint64Pair proposal would
(hypothetically; it's not yet accepted) introduce a type with 16-byte
alignment. The malloc fast path will require extra code to consider
alignment and will waste memory for any value containing such a type.

This change moves the allocation header to the end of the span's
allocation slot instead of the beginning. This means worse locality for
the GC when scanning, but it's still an overall win. It also means that
objects will still have the 16-byte alignment we've provided thus far.

This is broken out in a separate change just becauase it ended up that
way during development. But I've chosen to leave it this way in case we
want to try and move allocation headers to the front of objects again.

Below are the benchmark results of this CL series, comparing the
performance of this CL with GOEXPERIMENT=allocheaders vs. without this
CL series.

name                  old time/op            new time/op            delta
BiogoIgor                        12.5s ± 0%             12.4s ± 2%    ~     (p=0.079 n=9+10)
BiogoKrishna                     12.8s ±10%             12.4s ±10%    ~     (p=0.182 n=9+10)
BleveIndexBatch100               4.54s ± 3%             4.60s ± 3%    ~     (p=0.050 n=9+9)
EtcdPut                         21.1ms ± 2%            21.3ms ± 4%    ~     (p=0.669 n=7+10)
EtcdSTM                          107ms ± 3%             108ms ± 2%    ~     (p=0.497 n=9+10)
GoBuildKubelet                   34.1s ± 3%             33.1s ± 2%  -3.08%  (p=0.000 n=10+10)
GoBuildKubeletLink               7.94s ± 2%             7.95s ± 2%    ~     (p=0.631 n=10+10)
GoBuildIstioctl                  33.2s ± 1%             31.7s ± 0%  -4.37%  (p=0.000 n=9+9)
GoBuildIstioctlLink              8.07s ± 1%             8.05s ± 1%    ~     (p=0.356 n=9+10)
GoBuildFrontend                  12.1s ± 0%             11.5s ± 1%  -4.43%  (p=0.000 n=8+10)
GoBuildFrontendLink              1.20s ± 2%             1.20s ± 2%    ~     (p=0.905 n=9+10)
GopherLuaKNucleotide             19.9s ± 0%             19.5s ± 1%  -1.95%  (p=0.000 n=9+10)
MarkdownRenderXHTML              194ms ± 5%             194ms ± 2%    ~     (p=0.931 n=9+9)
Tile38QueryLoad                  518µs ± 1%             508µs ± 1%  -1.93%  (p=0.000 n=9+8)

name                  old average-RSS-bytes  new average-RSS-bytes  delta
BiogoIgor                       66.2MB ± 3%            65.6MB ± 1%    ~     (p=0.156 n=10+9)
BiogoKrishna                    4.34GB ± 2%            4.34GB ± 1%    ~     (p=0.315 n=10+9)
BleveIndexBatch100               189MB ± 3%             186MB ± 3%    ~     (p=0.052 n=10+10)
EtcdPut                          105MB ± 5%             107MB ± 6%    ~     (p=0.579 n=10+10)
EtcdSTM                         92.1MB ± 5%            93.2MB ± 4%    ~     (p=0.353 n=10+10)
GoBuildKubelet                  2.07GB ± 1%            2.07GB ± 1%    ~     (p=0.436 n=10+10)
GoBuildIstioctl                 1.44GB ± 1%            1.46GB ± 1%  +0.96%  (p=0.001 n=10+10)
GoBuildFrontend                  522MB ± 1%             512MB ± 2%  -1.98%  (p=0.000 n=10+10)
GopherLuaKNucleotide            37.4MB ± 5%            36.4MB ± 4%  -2.53%  (p=0.035 n=10+10)
MarkdownRenderXHTML             21.2MB ± 1%            20.9MB ± 3%  -1.53%  (p=0.003 n=8+10)
Tile38QueryLoad                 6.39GB ± 2%            6.24GB ± 2%  -2.40%  (p=0.000 n=10+10)

name                  old peak-RSS-bytes     new peak-RSS-bytes     delta
BiogoIgor                       88.5MB ± 4%            88.4MB ± 3%    ~     (p=0.971 n=10+10)
BiogoKrishna                    4.48GB ± 0%            4.42GB ± 0%  -1.49%  (p=0.000 n=10+10)
BleveIndexBatch100               268MB ± 3%             265MB ± 4%    ~     (p=0.315 n=9+10)
EtcdPut                          147MB ± 9%             146MB ± 5%    ~     (p=0.853 n=10+10)
EtcdSTM                          119MB ± 6%             120MB ± 5%    ~     (p=0.796 n=10+10)
GopherLuaKNucleotide            43.1MB ±17%            40.7MB ±12%    ~     (p=0.075 n=10+10)
MarkdownRenderXHTML             21.2MB ± 1%            21.1MB ± 3%    ~     (p=0.511 n=9+10)
Tile38QueryLoad                 6.65GB ± 4%            6.52GB ± 2%  -1.93%  (p=0.009 n=10+10)

name                  old peak-VM-bytes      new peak-VM-bytes      delta
BiogoIgor                       1.33GB ± 0%            1.33GB ± 0%  -0.16%  (p=0.000 n=10+10)
BiogoKrishna                    5.77GB ± 0%            5.69GB ± 0%  -1.23%  (p=0.000 n=10+10)
BleveIndexBatch100              2.62GB ± 0%            2.61GB ± 0%  -0.13%  (p=0.000 n=7+10)
EtcdPut                         12.1GB ± 0%            12.1GB ± 0%    ~     (p=0.160 n=8+10)
EtcdSTM                         12.1GB ± 0%            12.1GB ± 0%  -0.02%  (p=0.000 n=10+10)
GopherLuaKNucleotide            1.26GB ± 0%            1.26GB ± 0%  -0.09%  (p=0.000 n=10+10)
MarkdownRenderXHTML             1.26GB ± 0%            1.26GB ± 0%  -0.08%  (p=0.000 n=10+10)
Tile38QueryLoad                 7.89GB ± 4%            7.76GB ± 1%  -1.70%  (p=0.008 n=10+8)

name                  old p50-latency-ns     new p50-latency-ns     delta
EtcdPut                          20.1M ± 5%             20.2M ± 4%    ~     (p=0.529 n=10+10)
EtcdSTM                          79.8M ± 4%             79.9M ± 4%    ~     (p=0.971 n=10+10)
Tile38QueryLoad                   215k ± 1%              210k ± 3%  -2.04%  (p=0.021 n=8+10)

name                  old p90-latency-ns     new p90-latency-ns     delta
EtcdPut                          31.9M ± 6%             32.0M ± 7%    ~     (p=0.780 n=9+10)
EtcdSTM                           220M ± 6%              220M ± 2%    ~     (p=1.000 n=10+10)
Tile38QueryLoad                   622k ± 2%              646k ± 2%  +3.83%  (p=0.000 n=10+10)

name                  old p99-latency-ns     new p99-latency-ns     delta
EtcdPut                          47.6M ±32%             51.4M ±28%    ~     (p=0.529 n=10+10)
EtcdSTM                           452M ± 2%              457M ± 2%    ~     (p=0.182 n=9+10)
Tile38QueryLoad                  5.04M ± 2%             4.91M ± 3%  -2.56%  (p=0.001 n=9+9)

name                  old ops/s              new ops/s              delta
EtcdPut                          46.1k ± 2%             45.7k ± 4%    ~     (p=0.475 n=7+10)
EtcdSTM                          9.18k ± 5%             9.20k ± 3%    ~     (p=0.971 n=10+10)
Tile38QueryLoad                  17.4k ± 1%             17.7k ± 1%  +1.97%  (p=0.000 n=9+8)

Change-Id: I637f48fb9e8c181912db785ae9186d7f16769870
Reviewed-on: https://go-review.googlesource.com/c/go/+/537886
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/runtime/malloc.go
src/runtime/mbitmap_allocheaders.go
src/runtime/mfinal.go

index beff9043ece3290815404d4fd151b35b173bfac2..d9b4112ded24ca95e31bd45375bf70006ee6984e 100644 (file)
@@ -1149,8 +1149,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                                memclrNoHeapPointers(x, size)
                        }
                        if goexperiment.AllocHeaders && hasHeader {
-                               header = (**_type)(x)
-                               x = add(x, mallocHeaderSize)
+                               header = (**_type)(unsafe.Pointer(uintptr(v) + size - mallocHeaderSize))
                                size -= mallocHeaderSize
                        }
                }
index 8a6b6a6564275567d58d8e9ad886b72baff4e0d3..9370d50b727058c0cb6eff9ccf0ac3d6c5472c41 100644 (file)
@@ -48,9 +48,9 @@
 // is zeroed, so the GC just observes nil pointers.
 // Note that this "tiled" bitmap isn't stored anywhere; it is generated on-the-fly.
 //
-// For objects without their own span, the type metadata is stored in the first
-// word before the object at the beginning of the allocation slot. For objects
-// with their own span, the type metadata is stored in the mspan.
+// For objects without their own span, the type metadata is stored in the last
+// word of the allocation slot. For objects with their own span, the type metadata
+// is stored in the mspan.
 //
 // The bitmap for small unallocated objects in scannable spans is not maintained
 // (can be junk).
@@ -191,9 +191,8 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers {
        // All of these objects have a header.
        var typ *_type
        if spc.sizeclass() != 0 {
-               // Pull the allocation header from the first word of the object.
-               typ = *(**_type)(unsafe.Pointer(addr))
-               addr += mallocHeaderSize
+               // Pull the allocation header from the last word of the object.
+               typ = *(**_type)(unsafe.Pointer(addr + span.elemsize - mallocHeaderSize))
        } else {
                typ = span.largeType
        }
index be501e6fcad4e2e6fe37e5306b7ba894cb6ce955..18cd93e77e96fcb80b64c33c7ae9c171a3074f38 100644 (file)
@@ -9,7 +9,6 @@ package runtime
 import (
        "internal/abi"
        "internal/goarch"
-       "internal/goexperiment"
        "runtime/internal/atomic"
        "runtime/internal/sys"
        "unsafe"
@@ -411,7 +410,7 @@ func SetFinalizer(obj any, finalizer any) {
        }
 
        // find the containing object
-       base, span, _ := findObject(uintptr(e.data), 0, 0)
+       base, _, _ := findObject(uintptr(e.data), 0, 0)
 
        if base == 0 {
                if isGoPointerWithoutSpan(e.data) {
@@ -420,11 +419,6 @@ func SetFinalizer(obj any, finalizer any) {
                throw("runtime.SetFinalizer: pointer not in allocated block")
        }
 
-       // Move base forward if we've got an allocation header.
-       if goexperiment.AllocHeaders && !span.spanclass.noscan() && !heapBitsInSpan(span.elemsize) && span.spanclass.sizeclass() != 0 {
-               base += mallocHeaderSize
-       }
-
        if uintptr(e.data) != base {
                // As an implementation detail we allow to set finalizers for an inner byte
                // of an object if it could come from tiny alloc (see mallocgc for details).