// dedicated mark workers get started.
dedicatedMarkWorkersNeeded int64
+ // idleMarkWorkers is two packed int32 values in a single uint64.
+ // These two values are always updated simultaneously.
+ //
+ // The bottom int32 is the current number of idle mark workers executing.
+ //
+ // The top int32 is the maximum number of idle mark workers allowed to
+ // execute concurrently. Normally, this number is just gomaxprocs. However,
+ // during periodic GC cycles it is set to 1 because the system is idle
+ // anyway; there's no need to go full blast on all of GOMAXPROCS.
+ //
+ // The maximum number of idle mark workers is used to prevent new workers
+ // from starting, but it is not a hard maximum. It is possible (but
+ // exceedingly rare) for the current number of idle mark workers to
+ // transiently exceed the maximum. This could happen if the maximum changes
+ // just after a GC ends, and an M with no P.
+ //
+ // Note that the maximum may not be zero because idle-priority mark workers
+ // are vital to GC progress. Consider a situation in which goroutines
+ // block on the GC (such as via runtime.GOMAXPROCS) and only fractional
+ // mark workers are scheduled (e.g. GOMAXPROCS=1). Without idle-priority
+ // mark workers, the last running M might skip scheduling a fractional
+ // mark worker if its utilization goal is met, such that once it goes to
+ // sleep (because there's nothing to do), there will be nothing else to
+ // spin up a new M for the fractional worker in the future, stalling GC
+ // progress and causing a deadlock. However, idle-priority workers will
+ // *always* run when there is nothing left to do, ensuring the GC makes
+ // progress.
+ idleMarkWorkers atomic.Uint64
+
// assistWorkPerByte is the ratio of scan work to allocated
// bytes that should be performed by mutator assists. This is
// computed at the beginning of each cycle and updated every
// startCycle resets the GC controller's state and computes estimates
// for a new GC cycle. The caller must hold worldsema and the world
// must be stopped.
-func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
+func (c *gcControllerState) startCycle(markStartTime int64, procs int, trigger gcTrigger) {
c.heapScanWork.Store(0)
c.stackScanWork.Store(0)
c.globalsScanWork.Store(0)
p.gcFractionalMarkTime = 0
}
+ if trigger.kind == gcTriggerTime {
+ // During a periodic GC cycle, avoid having more than
+ // one idle mark worker running at a time. We need to have
+ // at least one to ensure the GC makes progress, but more than
+ // one is unnecessary.
+ c.setMaxIdleMarkWorkers(1)
+ } else {
+ // N.B. gomaxprocs and dedicatedMarkWorkersNeeded is guaranteed not to
+ // change during a GC cycle.
+ c.setMaxIdleMarkWorkers(int32(procs) - int32(c.dedicatedMarkWorkersNeeded))
+ }
+
// Compute initial values for controls that are updated
// throughout the cycle.
c.revise()
}
}
-// logWorkTime updates mark work accounting in the controller by a duration of
-// work in nanoseconds.
+// markWorkerStop must be called whenever a mark worker stops executing.
+//
+// It updates mark work accounting in the controller by a duration of
+// work in nanoseconds and other bookkeeping.
//
// Safe to execute at any time.
-func (c *gcControllerState) logWorkTime(mode gcMarkWorkerMode, duration int64) {
+func (c *gcControllerState) markWorkerStop(mode gcMarkWorkerMode, duration int64) {
switch mode {
case gcMarkWorkerDedicatedMode:
atomic.Xaddint64(&c.dedicatedMarkTime, duration)
atomic.Xaddint64(&c.fractionalMarkTime, duration)
case gcMarkWorkerIdleMode:
atomic.Xaddint64(&c.idleMarkTime, duration)
+ c.removeIdleMarkWorker()
default:
- throw("logWorkTime: unknown mark worker mode")
+ throw("markWorkerStop: unknown mark worker mode")
}
}
func (c *piController) reset() {
c.errIntegral = 0
}
+
+// addIdleMarkWorker attempts to add a new idle mark worker.
+//
+// If this returns true, the caller must become an idle mark worker unless
+// there's no background mark worker goroutines in the pool. This case is
+// harmless because there are already background mark workers running.
+// If this returns false, the caller must NOT become an idle mark worker.
+//
+// nosplit because it may be called without a P.
+//go:nosplit
+func (c *gcControllerState) addIdleMarkWorker() bool {
+ for {
+ old := c.idleMarkWorkers.Load()
+ n, max := int32(old&uint64(^uint32(0))), int32(old>>32)
+ if n >= max {
+ // See the comment on idleMarkWorkers for why
+ // n > max is tolerated.
+ return false
+ }
+ if n < 0 {
+ print("n=", n, " max=", max, "\n")
+ throw("negative idle mark workers")
+ }
+ new := uint64(uint32(n+1)) | (uint64(max) << 32)
+ if c.idleMarkWorkers.CompareAndSwap(old, new) {
+ return true
+ }
+ }
+}
+
+// needIdleMarkWorker is a hint as to whether another idle mark worker is needed.
+//
+// The caller must still call addIdleMarkWorker to become one. This is mainly
+// useful for a quick check before an expensive operation.
+//
+// nosplit because it may be called without a P.
+//go:nosplit
+func (c *gcControllerState) needIdleMarkWorker() bool {
+ p := c.idleMarkWorkers.Load()
+ n, max := int32(p&uint64(^uint32(0))), int32(p>>32)
+ return n < max
+}
+
+// removeIdleMarkWorker must be called when an new idle mark worker stops executing.
+func (c *gcControllerState) removeIdleMarkWorker() {
+ for {
+ old := c.idleMarkWorkers.Load()
+ n, max := int32(old&uint64(^uint32(0))), int32(old>>32)
+ if n-1 < 0 {
+ print("n=", n, " max=", max, "\n")
+ throw("negative idle mark workers")
+ }
+ new := uint64(uint32(n-1)) | (uint64(max) << 32)
+ if c.idleMarkWorkers.CompareAndSwap(old, new) {
+ return
+ }
+ }
+}
+
+// setMaxIdleMarkWorkers sets the maximum number of idle mark workers allowed.
+//
+// This method is optimistic in that it does not wait for the number of
+// idle mark workers to reduce to max before returning; it assumes the workers
+// will deschedule themselves.
+func (c *gcControllerState) setMaxIdleMarkWorkers(max int32) {
+ for {
+ old := c.idleMarkWorkers.Load()
+ n := int32(old & uint64(^uint32(0)))
+ if n < 0 {
+ print("n=", n, " max=", max, "\n")
+ throw("negative idle mark workers")
+ }
+ new := uint64(uint32(n)) | (uint64(max) << 32)
+ if c.idleMarkWorkers.CompareAndSwap(old, new) {
+ return
+ }
+ }
+}
}
})
}
+
+func TestIdleMarkWorkerCount(t *testing.T) {
+ const workers = 10
+ c := NewGCController(100)
+ c.SetMaxIdleMarkWorkers(workers)
+ for i := 0; i < workers; i++ {
+ if !c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to need idle mark workers: i=%d", i)
+ }
+ if !c.AddIdleMarkWorker() {
+ t.Fatalf("expected to be able to add an idle mark worker: i=%d", i)
+ }
+ }
+ if c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to not need idle mark workers")
+ }
+ if c.AddIdleMarkWorker() {
+ t.Fatalf("expected to not be able to add an idle mark worker")
+ }
+ for i := 0; i < workers; i++ {
+ c.RemoveIdleMarkWorker()
+ if !c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to need idle mark workers after removal: i=%d", i)
+ }
+ }
+ for i := 0; i < workers-1; i++ {
+ if !c.AddIdleMarkWorker() {
+ t.Fatalf("expected to be able to add idle mark workers after adding again: i=%d", i)
+ }
+ }
+ for i := 0; i < 10; i++ {
+ if !c.AddIdleMarkWorker() {
+ t.Fatalf("expected to be able to add idle mark workers interleaved: i=%d", i)
+ }
+ if c.AddIdleMarkWorker() {
+ t.Fatalf("expected to not be able to add idle mark workers interleaved: i=%d", i)
+ }
+ c.RemoveIdleMarkWorker()
+ }
+ // Support the max being below the count.
+ c.SetMaxIdleMarkWorkers(0)
+ if c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to not need idle mark workers after capacity set to 0")
+ }
+ if c.AddIdleMarkWorker() {
+ t.Fatalf("expected to not be able to add idle mark workers after capacity set to 0")
+ }
+ for i := 0; i < workers-1; i++ {
+ c.RemoveIdleMarkWorker()
+ }
+ if c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to not need idle mark workers after capacity set to 0")
+ }
+ if c.AddIdleMarkWorker() {
+ t.Fatalf("expected to not be able to add idle mark workers after capacity set to 0")
+ }
+ c.SetMaxIdleMarkWorkers(1)
+ if !c.NeedIdleMarkWorker() {
+ t.Fatalf("expected to need idle mark workers after capacity set to 1")
+ }
+ if !c.AddIdleMarkWorker() {
+ t.Fatalf("expected to be able to add idle mark workers after capacity set to 1")
+ }
+}
// We have nothing to do.
//
// If we're in the GC mark phase, can safely scan and blacken objects,
- // and have work to do, run idle-time marking rather than give up the
- // P.
- if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
+ // and have work to do, run idle-time marking rather than give up the P.
+ if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) && gcController.addIdleMarkWorker() {
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
if node != nil {
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
}
return gp, false
}
+ gcController.removeIdleMarkWorker()
}
// wasm only:
// returned. The returned P has not been wired yet.
func checkIdleGCNoP() (*p, *g) {
// N.B. Since we have no P, gcBlackenEnabled may change at any time; we
- // must check again after acquiring a P.
- if atomic.Load(&gcBlackenEnabled) == 0 {
+ // must check again after acquiring a P. As an optimization, we also check
+ // if an idle mark worker is needed at all. This is OK here, because if we
+ // observe that one isn't needed, at least one is currently running. Even if
+ // it stops running, its own journey into the scheduler should schedule it
+ // again, if need be (at which point, this check will pass, if relevant).
+ if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() {
return nil, nil
}
if !gcMarkWorkAvailable(nil) {
return nil, nil
}
- // Now that we own a P, gcBlackenEnabled can't change (as it requires
- // STW).
- if gcBlackenEnabled == 0 {
+ // Now that we own a P, gcBlackenEnabled can't change (as it requires STW).
+ if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() {
pidleput(pp)
unlock(&sched.lock)
return nil, nil
if node == nil {
pidleput(pp)
unlock(&sched.lock)
+ gcController.removeIdleMarkWorker()
return nil, nil
}