runtime: tweak bgsweep "low-priority" heuristic

author Michael Anthony Knyszek <mknyszek@google.com>

Thu, 8 Sep 2022 20:59:02 +0000 (20:59 +0000)

committer Gopher Robot <gobot@golang.org>

Fri, 16 Sep 2022 16:33:11 +0000 (16:33 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Thu, 8 Sep 2022 20:59:02 +0000 (20:59 +0000)
committer Gopher Robot <gobot@golang.org>
Fri, 16 Sep 2022 16:33:11 +0000 (16:33 +0000)
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go

index 0e2cfdc9c4ee856dd83957e1b830543d7c64c4be..3df9e5f392967c6ec48fb09e52f5e2fd6fa05297 100644 (file)
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -279,12 +279,34 @@ func bgsweep(c chan int) {
         goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
  
         for {
+               // bgsweep attempts to be a "low priority" goroutine by intentionally
+               // yielding time. It's OK if it doesn't run, because goroutines allocating
+               // memory will sweep and ensure that all spans are swept before the next
+               // GC cycle. We really only want to run when we're idle.
+               //
+               // However, calling Gosched after each span swept produces a tremendous
+               // amount of tracing events, sometimes up to 50% of events in a trace. It's
+               // also inefficient to call into the scheduler so much because sweeping a
+               // single span is in general a very fast operation, taking as little as 30 ns
+               // on modern hardware. (See #54767.)
+               //
+               // As a result, bgsweep sweeps in batches, and only calls into the scheduler
+               // at the end of every batch. Furthermore, it only yields its time if there
+               // isn't spare idle time available on other cores. If there's available idle
+               // time, helping to sweep can reduce allocation latencies by getting ahead of
+               // the proportional sweeper and having spans ready to go for allocation.
+               const sweepBatchSize = 10
+               nSwept := 0
                 for sweepone() != ^uintptr(0) {
                         sweep.nbgsweep++
-                       Gosched()
+                       nSwept++
+                       if nSwept%sweepBatchSize == 0 {
+                               goschedIfBusy()
+                       }
                 }
                 for freeSomeWbufs(true) {
-                       Gosched()
+                       // N.B. freeSomeWbufs is already batched internally.
+                       goschedIfBusy()
                 }
                 lock(&sweep.lock)
                 if !isSweepDone() {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index d7a8049f37c7209f8cf9ac112c8c7cc64e7dd446..2986a306094db521e82f311d95ae1adb64212316 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -326,6 +326,18 @@ func goschedguarded() {
         mcall(goschedguarded_m)
  }
  
+// goschedIfBusy yields the processor like gosched, but only does so if
+// there are no idle Ps or if we're on the only P and there's nothing in
+// the run queue. In both cases, there is freely available idle time.
+//
+//go:nosplit
+func goschedIfBusy() {
+       if sched.npidle.Load() > 0 {
+               return
+       }
+       mcall(gosched_m)
+}
+
  // Puts the current goroutine into a waiting state and calls unlockf on the
  // system stack.
  //
author	Michael Anthony Knyszek <mknyszek@google.com>
	Thu, 8 Sep 2022 20:59:02 +0000 (20:59 +0000)
committer	Gopher Robot <gobot@golang.org>
	Fri, 16 Sep 2022 16:33:11 +0000 (16:33 +0000)
src/runtime/mgcsweep.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history