1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 import "runtime/internal/atomic"
9 // gcCPULimiter is a mechanism to limit GC CPU utilization in situations
10 // where it might become excessive and inhibit application progress (e.g.
13 // The core of the limiter is a leaky bucket mechanism that fills with GC
14 // CPU time and drains with mutator time. Because the bucket fills and
15 // drains with time directly (i.e. without any weighting), this effectively
16 // sets a very conservative limit of 50%. This limit could be enforced directly,
17 // however, but the purpose of the bucket is to accommodate spikes in GC CPU
18 // utilization without hurting throughput.
20 // Note that the bucket in the leaky bucket mechanism can never go negative,
21 // so the GC never gets credit for a lot of CPU time spent without the GC
22 // running. This is intentional, as an application that stays idle for, say,
23 // an entire day, could build up enough credit to fail to prevent a death
24 // spiral the following day. The bucket's capacity is the GC's only leeway.
26 // The capacity thus also sets the window the limiter considers. For example,
27 // if the capacity of the bucket is 1 cpu-second, then the limiter will not
28 // kick in until at least 1 full cpu-second in the last 2 cpu-second window
29 // is spent on GC CPU time.
30 var gcCPULimiter gcCPULimiterState
32 type gcCPULimiterState struct {
43 // overflow is the cumulative amount of GC CPU time that we tried to fill the
44 // bucket with but exceeded its capacity.
47 // gcEnabled is an internal copy of gcBlackenEnabled that determines
48 // whether the limiter tracks total assist time.
50 // gcBlackenEnabled isn't used directly so as to keep this structure
54 // transitioning is true when the GC is in a STW and transitioning between
55 // the mark and sweep phases.
58 // assistTimePool is the accumulated assist time since the last update.
59 assistTimePool atomic.Int64
61 // idleMarkTimePool is the accumulated idle mark time since the last update.
62 idleMarkTimePool atomic.Int64
64 // idleTimePool is the accumulated time Ps spent on the idle list since the last update.
65 idleTimePool atomic.Int64
67 // lastUpdate is the nanotime timestamp of the last time update was called.
69 // Updated under lock, but may be read concurrently.
70 lastUpdate atomic.Int64
72 // lastEnabledCycle is the GC cycle that last had the limiter enabled.
73 lastEnabledCycle atomic.Uint32
75 // nprocs is an internal copy of gomaxprocs, used to determine total available
78 // gomaxprocs isn't used directly so as to keep this structure unit-testable.
81 // test indicates whether this instance of the struct was made for testing purposes.
85 // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
86 // should take action to limit CPU utilization.
88 // It is safe to call concurrently with other operations.
89 func (l *gcCPULimiterState) limiting() bool {
90 return l.enabled.Load()
93 // startGCTransition notifies the limiter of a GC transition.
95 // This call takes ownership of the limiter and disables all other means of
96 // updating the limiter. Release ownership by calling finishGCTransition.
98 // It is safe to call concurrently with other operations.
99 func (l *gcCPULimiterState) startGCTransition(enableGC bool, now int64) {
101 // This must happen during a STW, so we can't fail to acquire the lock.
102 // If we did, something went wrong. Throw.
103 throw("failed to acquire lock to start a GC transition")
105 if l.gcEnabled == enableGC {
106 throw("transitioning GC to the same state as before?")
108 // Flush whatever was left between the last update and now.
110 l.gcEnabled = enableGC
111 l.transitioning = true
112 // N.B. finishGCTransition releases the lock.
114 // We don't release here to increase the chance that if there's a failure
115 // to finish the transition, that we throw on failing to acquire the lock.
118 // finishGCTransition notifies the limiter that the GC transition is complete
119 // and releases ownership of it. It also accumulates STW time in the bucket.
120 // now must be the timestamp from the end of the STW pause.
121 func (l *gcCPULimiterState) finishGCTransition(now int64) {
122 if !l.transitioning {
123 throw("finishGCTransition called without starting one?")
125 // Count the full nprocs set of CPU time because the world is stopped
126 // between startGCTransition and finishGCTransition. Even though the GC
127 // isn't running on all CPUs, it is preventing user code from doing so,
128 // so it might as well be.
129 if lastUpdate := l.lastUpdate.Load(); now >= lastUpdate {
130 l.accumulate(0, (now-lastUpdate)*int64(l.nprocs))
132 l.lastUpdate.Store(now)
133 l.transitioning = false
137 // gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time
138 // we can go before updating the limiter.
139 const gcCPULimiterUpdatePeriod = 10e6 // 10ms
141 // needUpdate returns true if the limiter's maximum update period has been
142 // exceeded, and so would benefit from an update.
143 func (l *gcCPULimiterState) needUpdate(now int64) bool {
144 return now-l.lastUpdate.Load() > gcCPULimiterUpdatePeriod
147 // addAssistTime notifies the limiter of additional assist time. It will be
148 // included in the next update.
149 func (l *gcCPULimiterState) addAssistTime(t int64) {
150 l.assistTimePool.Add(t)
153 // addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be
154 // subtracted from the total CPU time in the next update.
155 func (l *gcCPULimiterState) addIdleTime(t int64) {
156 l.idleTimePool.Add(t)
159 // update updates the bucket given runtime-specific information. now is the
160 // current monotonic time in nanoseconds.
162 // This is safe to call concurrently with other operations, except *GCTransition.
163 func (l *gcCPULimiterState) update(now int64) {
165 // We failed to acquire the lock, which means something else is currently
166 // updating. Just drop our update, the next one to update will include
167 // our total assist time.
171 throw("update during transition")
177 // updateLocked is the implementation of update. l.lock must be held.
178 func (l *gcCPULimiterState) updateLocked(now int64) {
179 lastUpdate := l.lastUpdate.Load()
180 if now < lastUpdate {
181 // Defensively avoid overflow. This isn't even the latest update anyway.
184 windowTotalTime := (now - lastUpdate) * int64(l.nprocs)
185 l.lastUpdate.Store(now)
187 // Drain the pool of assist time.
188 assistTime := l.assistTimePool.Load()
190 l.assistTimePool.Add(-assistTime)
193 // Drain the pool of idle time.
194 idleTime := l.idleTimePool.Load()
196 l.idleTimePool.Add(-idleTime)
200 // Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
202 // The reason we do this instead of just waiting for those events to finish and push updates
203 // is to ensure that all the time we're accounting for happened sometime between lastUpdate
204 // and now. This dramatically simplifies reasoning about the limiter because we're not at
205 // risk of extra time being accounted for in this window than actually happened in this window,
206 // leading to all sorts of weird transient behavior.
208 for _, pp := range allp {
209 typ, duration := pp.limiterEvent.consume(now)
211 case limiterEventIdleMarkWork:
213 case limiterEventIdle:
215 sched.idleTime.Add(duration)
216 case limiterEventMarkAssist:
218 case limiterEventScavengeAssist:
219 assistTime += duration
220 case limiterEventNone:
223 throw("invalid limiter event type found")
229 // Compute total GC time.
230 windowGCTime := assistTime
232 windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization)
235 // Subtract out all idle time from the total time. Do this after computing
236 // GC time, because the background utilization is dependent on the *real*
237 // total time, not the total time after idle time is subtracted.
239 // Idle time is counted as any time that a P is on the P idle list plus idle mark
240 // time. Idle mark workers soak up time that the application spends idle.
242 // On a heavily undersubscribed system, any additional idle time can skew GC CPU
243 // utilization, because the GC might be executing continuously and thrashing,
244 // yet the CPU utilization with respect to GOMAXPROCS will be quite low, so
245 // the limiter fails to turn on. By subtracting idle time, we're removing time that
246 // we know the application was idle giving a more accurate picture of whether
247 // the GC is thrashing.
249 // Note that this can cause the limiter to turn on even if it's not needed. For
250 // instance, on a system with 32 Ps but only 1 running goroutine, each GC will have
251 // 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep
252 // phase, then the GC CPU utilization over that cycle, with idle time removed, will
253 // be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be
254 // unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's
256 windowTotalTime -= idleTime
258 l.accumulate(windowTotalTime-windowGCTime, windowGCTime)
261 // accumulate adds time to the bucket and signals whether the limiter is enabled.
263 // This is an internal function that deals just with the bucket. Prefer update.
264 // l.lock must be held.
265 func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
266 headroom := l.bucket.capacity - l.bucket.fill
267 enabled := headroom == 0
269 // Let's be careful about three things here:
270 // 1. The addition and subtraction, for the invariants.
272 // 3. Excessive mutation of l.enabled, which is accessed
273 // by all assists, potentially more than once.
274 change := gcTime - mutatorTime
276 // Handle limiting case.
277 if change > 0 && headroom <= uint64(change) {
278 l.overflow += uint64(change) - headroom
279 l.bucket.fill = l.bucket.capacity
281 l.enabled.Store(true)
282 l.lastEnabledCycle.Store(memstats.numgc + 1)
287 // Handle non-limiting cases.
288 if change < 0 && l.bucket.fill <= uint64(-change) {
293 l.bucket.fill -= uint64(-change)
295 if change != 0 && enabled {
296 l.enabled.Store(false)
300 // tryLock attempts to lock l. Returns true on success.
301 func (l *gcCPULimiterState) tryLock() bool {
302 return l.lock.CompareAndSwap(0, 1)
305 // unlock releases the lock on l. Must be called if tryLock returns true.
306 func (l *gcCPULimiterState) unlock() {
307 old := l.lock.Swap(0)
309 throw("double unlock")
313 // capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS.
314 const capacityPerProc = 1e9 // 1 second in nanoseconds
316 // resetCapacity updates the capacity based on GOMAXPROCS. Must not be called
317 // while the GC is enabled.
319 // It is safe to call concurrently with other operations.
320 func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
322 // This must happen during a STW, so we can't fail to acquire the lock.
323 // If we did, something went wrong. Throw.
324 throw("failed to acquire lock to reset capacity")
326 // Flush the rest of the time for this period.
330 l.bucket.capacity = uint64(nprocs) * capacityPerProc
331 if l.bucket.fill > l.bucket.capacity {
332 l.bucket.fill = l.bucket.capacity
333 l.enabled.Store(true)
334 l.lastEnabledCycle.Store(memstats.numgc + 1)
335 } else if l.bucket.fill < l.bucket.capacity {
336 l.enabled.Store(false)
341 // limiterEventType indicates the type of an event occurring on some P.
343 // These events represent the full set of events that the GC CPU limiter tracks
344 // to execute its function.
346 // This type may use no more than limiterEventBits bits of information.
347 type limiterEventType uint8
350 limiterEventNone limiterEventType = iota // None of the following events.
351 limiterEventIdleMarkWork // Refers to an idle mark worker (see gcMarkWorkerMode).
352 limiterEventMarkAssist // Refers to mark assist (see gcAssistAlloc).
353 limiterEventScavengeAssist // Refers to a scavenge assist (see allocSpan).
354 limiterEventIdle // Refers to time a P spent on the idle list.
359 // limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
360 // the event type. The rest of the bits of that field represent a timestamp.
362 limiterEventTypeMask = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
363 limiterEventStampNone = limiterEventStamp(0)
366 // limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
367 type limiterEventStamp uint64
369 // makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
370 func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
371 return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
374 // duration computes the difference between now and the start time stored in the stamp.
376 // Returns 0 if the difference is negative, which may happen if now is stale or if the
377 // before and after timestamps cross a 2^(64-limiterEventBits) boundary.
378 func (s limiterEventStamp) duration(now int64) int64 {
379 // The top limiterEventBits bits of the timestamp are derived from the current time
380 // when computing a duration.
381 start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
388 // type extracts the event type from the stamp.
389 func (s limiterEventStamp) typ() limiterEventType {
390 return limiterEventType(s >> (64 - limiterEventBits))
393 // limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
394 type limiterEvent struct {
395 stamp atomic.Uint64 // Stores a limiterEventStamp.
398 // start begins tracking a new limiter event of the current type. If an event
399 // is already in flight, then a new event cannot begin because the current time is
400 // already being attributed to that event. In this case, this function returns false.
401 // Otherwise, it returns true.
403 // The caller must be non-preemptible until at least stop is called or this function
404 // returns false. Because this is trying to measure "on-CPU" time of some event, getting
405 // scheduled away during it can mean that whatever we're measuring isn't a reflection
406 // of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
407 // close of an approximation as we can.
408 func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
409 if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
412 e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
416 // consume acquires the partial event CPU time from any in-flight event.
417 // It achieves this by storing the current time as the new event time.
419 // Returns the type of the in-flight event, as well as how long it's currently been
420 // executing for. Returns limiterEventNone if no event is active.
421 func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
422 // Read the limiter event timestamp and update it to now.
424 old := limiterEventStamp(e.stamp.Load())
426 if typ == limiterEventNone {
427 // There's no in-flight event, so just push that up.
430 duration = old.duration(now)
432 // We might have a stale now value, or this crossed the
433 // 2^(64-limiterEventBits) boundary in the clock readings.
435 return limiterEventNone, 0
437 new := makeLimiterEventStamp(typ, now)
438 if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
445 // stop stops the active limiter event. Throws if the
447 // The caller must be non-preemptible across the event. See start as to why.
448 func (e *limiterEvent) stop(typ limiterEventType, now int64) {
449 var stamp limiterEventStamp
451 stamp = limiterEventStamp(e.stamp.Load())
452 if stamp.typ() != typ {
453 print("runtime: want=", typ, " got=", stamp.typ(), "\n")
454 throw("limiterEvent.stop: found wrong event in p's limiter event slot")
456 if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
460 duration := stamp.duration(now)
462 // It's possible that we're missing time because we crossed a
463 // 2^(64-limiterEventBits) boundary between the start and end.
464 // In this case, we're dropping that information. This is OK because
465 // at worst it'll cause a transient hiccup that will quickly resolve
466 // itself as all new timestamps begin on the other side of the boundary.
467 // Such a hiccup should be incredibly rare.
470 // Account for the event.
472 case limiterEventIdleMarkWork:
473 gcCPULimiter.addIdleTime(duration)
474 case limiterEventIdle:
475 gcCPULimiter.addIdleTime(duration)
476 sched.idleTime.Add(duration)
477 case limiterEventMarkAssist:
479 case limiterEventScavengeAssist:
480 gcCPULimiter.addAssistTime(duration)
482 throw("limiterEvent.stop: invalid limiter event type found")