src/runtime/mgclimit.go

   1 // Copyright 2022 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package runtime
   6
   7 import "runtime/internal/atomic"
   8
   9 // gcCPULimiter is a mechanism to limit GC CPU utilization in situations
  10 // where it might become excessive and inhibit application progress (e.g.
  11 // a death spiral).
  12 //
  13 // The core of the limiter is a leaky bucket mechanism that fills with GC
  14 // CPU time and drains with mutator time. Because the bucket fills and
  15 // drains with time directly (i.e. without any weighting), this effectively
  16 // sets a very conservative limit of 50%. This limit could be enforced directly,
  17 // however, but the purpose of the bucket is to accommodate spikes in GC CPU
  18 // utilization without hurting throughput.
  19 //
  20 // Note that the bucket in the leaky bucket mechanism can never go negative,
  21 // so the GC never gets credit for a lot of CPU time spent without the GC
  22 // running. This is intentional, as an application that stays idle for, say,
  23 // an entire day, could build up enough credit to fail to prevent a death
  24 // spiral the following day. The bucket's capacity is the GC's only leeway.
  25 //
  26 // The capacity thus also sets the window the limiter considers. For example,
  27 // if the capacity of the bucket is 1 cpu-second, then the limiter will not
  28 // kick in until at least 1 full cpu-second in the last 2 cpu-second window
  29 // is spent on GC CPU time.
  30 var gcCPULimiter gcCPULimiterState
  31
  32 type gcCPULimiterState struct {
  33         lock atomic.Uint32
  34
  35         enabled atomic.Bool
  36         bucket  struct {
  37                 // Invariants:
  38                 // - fill >= 0
  39                 // - capacity >= 0
  40                 // - fill <= capacity
  41                 fill, capacity uint64
  42         }
  43         // overflow is the cumulative amount of GC CPU time that we tried to fill the
  44         // bucket with but exceeded its capacity.
  45         overflow uint64
  46
  47         // gcEnabled is an internal copy of gcBlackenEnabled that determines
  48         // whether the limiter tracks total assist time.
  49         //
  50         // gcBlackenEnabled isn't used directly so as to keep this structure
  51         // unit-testable.
  52         gcEnabled bool
  53
  54         // transitioning is true when the GC is in a STW and transitioning between
  55         // the mark and sweep phases.
  56         transitioning bool
  57
  58         // assistTimePool is the accumulated assist time since the last update.
  59         assistTimePool atomic.Int64
  60
  61         // idleMarkTimePool is the accumulated idle mark time since the last update.
  62         idleMarkTimePool atomic.Int64
  63
  64         // idleTimePool is the accumulated time Ps spent on the idle list since the last update.
  65         idleTimePool atomic.Int64
  66
  67         // lastUpdate is the nanotime timestamp of the last time update was called.
  68         //
  69         // Updated under lock, but may be read concurrently.
  70         lastUpdate atomic.Int64
  71
  72         // lastEnabledCycle is the GC cycle that last had the limiter enabled.
  73         lastEnabledCycle atomic.Uint32
  74
  75         // nprocs is an internal copy of gomaxprocs, used to determine total available
  76         // CPU time.
  77         //
  78         // gomaxprocs isn't used directly so as to keep this structure unit-testable.
  79         nprocs int32
  80
  81         // test indicates whether this instance of the struct was made for testing purposes.
  82         test bool
  83 }
  84
  85 // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
  86 // should take action to limit CPU utilization.
  87 //
  88 // It is safe to call concurrently with other operations.
  89 func (l *gcCPULimiterState) limiting() bool {
  90         return l.enabled.Load()
  91 }
  92
  93 // startGCTransition notifies the limiter of a GC transition.
  94 //
  95 // This call takes ownership of the limiter and disables all other means of
  96 // updating the limiter. Release ownership by calling finishGCTransition.
  97 //
  98 // It is safe to call concurrently with other operations.
  99 func (l *gcCPULimiterState) startGCTransition(enableGC bool, now int64) {
 100         if !l.tryLock() {
 101                 // This must happen during a STW, so we can't fail to acquire the lock.
 102                 // If we did, something went wrong. Throw.
 103                 throw("failed to acquire lock to start a GC transition")
 104         }
 105         if l.gcEnabled == enableGC {
 106                 throw("transitioning GC to the same state as before?")
 107         }
 108         // Flush whatever was left between the last update and now.
 109         l.updateLocked(now)
 110         l.gcEnabled = enableGC
 111         l.transitioning = true
 112         // N.B. finishGCTransition releases the lock.
 113         //
 114         // We don't release here to increase the chance that if there's a failure
 115         // to finish the transition, that we throw on failing to acquire the lock.
 116 }
 117
 118 // finishGCTransition notifies the limiter that the GC transition is complete
 119 // and releases ownership of it. It also accumulates STW time in the bucket.
 120 // now must be the timestamp from the end of the STW pause.
 121 func (l *gcCPULimiterState) finishGCTransition(now int64) {
 122         if !l.transitioning {
 123                 throw("finishGCTransition called without starting one?")
 124         }
 125         // Count the full nprocs set of CPU time because the world is stopped
 126         // between startGCTransition and finishGCTransition. Even though the GC
 127         // isn't running on all CPUs, it is preventing user code from doing so,
 128         // so it might as well be.
 129         if lastUpdate := l.lastUpdate.Load(); now >= lastUpdate {
 130                 l.accumulate(0, (now-lastUpdate)*int64(l.nprocs))
 131         }
 132         l.lastUpdate.Store(now)
 133         l.transitioning = false
 134         l.unlock()
 135 }
 136
 137 // gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time
 138 // we can go before updating the limiter.
 139 const gcCPULimiterUpdatePeriod = 10e6 // 10ms
 140
 141 // needUpdate returns true if the limiter's maximum update period has been
 142 // exceeded, and so would benefit from an update.
 143 func (l *gcCPULimiterState) needUpdate(now int64) bool {
 144         return now-l.lastUpdate.Load() > gcCPULimiterUpdatePeriod
 145 }
 146
 147 // addAssistTime notifies the limiter of additional assist time. It will be
 148 // included in the next update.
 149 func (l *gcCPULimiterState) addAssistTime(t int64) {
 150         l.assistTimePool.Add(t)
 151 }
 152
 153 // addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be
 154 // subtracted from the total CPU time in the next update.
 155 func (l *gcCPULimiterState) addIdleTime(t int64) {
 156         l.idleTimePool.Add(t)
 157 }
 158
 159 // update updates the bucket given runtime-specific information. now is the
 160 // current monotonic time in nanoseconds.
 161 //
 162 // This is safe to call concurrently with other operations, except *GCTransition.
 163 func (l *gcCPULimiterState) update(now int64) {
 164         if !l.tryLock() {
 165                 // We failed to acquire the lock, which means something else is currently
 166                 // updating. Just drop our update, the next one to update will include
 167                 // our total assist time.
 168                 return
 169         }
 170         if l.transitioning {
 171                 throw("update during transition")
 172         }
 173         l.updateLocked(now)
 174         l.unlock()
 175 }
 176
 177 // updateLocked is the implementation of update. l.lock must be held.
 178 func (l *gcCPULimiterState) updateLocked(now int64) {
 179         lastUpdate := l.lastUpdate.Load()
 180         if now < lastUpdate {
 181                 // Defensively avoid overflow. This isn't even the latest update anyway.
 182                 return
 183         }
 184         windowTotalTime := (now - lastUpdate) * int64(l.nprocs)
 185         l.lastUpdate.Store(now)
 186
 187         // Drain the pool of assist time.
 188         assistTime := l.assistTimePool.Load()
 189         if assistTime != 0 {
 190                 l.assistTimePool.Add(-assistTime)
 191         }
 192
 193         // Drain the pool of idle time.
 194         idleTime := l.idleTimePool.Load()
 195         if idleTime != 0 {
 196                 l.idleTimePool.Add(-idleTime)
 197         }
 198
 199         if !l.test {
 200                 // Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
 201                 //
 202                 // The reason we do this instead of just waiting for those events to finish and push updates
 203                 // is to ensure that all the time we're accounting for happened sometime between lastUpdate
 204                 // and now. This dramatically simplifies reasoning about the limiter because we're not at
 205                 // risk of extra time being accounted for in this window than actually happened in this window,
 206                 // leading to all sorts of weird transient behavior.
 207                 mp := acquirem()
 208                 for _, pp := range allp {
 209                         typ, duration := pp.limiterEvent.consume(now)
 210                         switch typ {
 211                         case limiterEventIdleMarkWork:
 212                                 fallthrough
 213                         case limiterEventIdle:
 214                                 idleTime += duration
 215                                 sched.idleTime.Add(duration)
 216                         case limiterEventMarkAssist:
 217                                 fallthrough
 218                         case limiterEventScavengeAssist:
 219                                 assistTime += duration
 220                         case limiterEventNone:
 221                                 break
 222                         default:
 223                                 throw("invalid limiter event type found")
 224                         }
 225                 }
 226                 releasem(mp)
 227         }
 228
 229         // Compute total GC time.
 230         windowGCTime := assistTime
 231         if l.gcEnabled {
 232                 windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization)
 233         }
 234
 235         // Subtract out all idle time from the total time. Do this after computing
 236         // GC time, because the background utilization is dependent on the *real*
 237         // total time, not the total time after idle time is subtracted.
 238         //
 239         // Idle time is counted as any time that a P is on the P idle list plus idle mark
 240         // time. Idle mark workers soak up time that the application spends idle.
 241         //
 242         // On a heavily undersubscribed system, any additional idle time can skew GC CPU
 243         // utilization, because the GC might be executing continuously and thrashing,
 244         // yet the CPU utilization with respect to GOMAXPROCS will be quite low, so
 245         // the limiter fails to turn on. By subtracting idle time, we're removing time that
 246         // we know the application was idle giving a more accurate picture of whether
 247         // the GC is thrashing.
 248         //
 249         // Note that this can cause the limiter to turn on even if it's not needed. For
 250         // instance, on a system with 32 Ps but only 1 running goroutine, each GC will have
 251         // 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep
 252         // phase, then the GC CPU utilization over that cycle, with idle time removed, will
 253         // be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be
 254         // unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's
 255         // running.
 256         windowTotalTime -= idleTime
 257
 258         l.accumulate(windowTotalTime-windowGCTime, windowGCTime)
 259 }
 260
 261 // accumulate adds time to the bucket and signals whether the limiter is enabled.
 262 //
 263 // This is an internal function that deals just with the bucket. Prefer update.
 264 // l.lock must be held.
 265 func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
 266         headroom := l.bucket.capacity - l.bucket.fill
 267         enabled := headroom == 0
 268
 269         // Let's be careful about three things here:
 270         // 1. The addition and subtraction, for the invariants.
 271         // 2. Overflow.
 272         // 3. Excessive mutation of l.enabled, which is accessed
 273         //    by all assists, potentially more than once.
 274         change := gcTime - mutatorTime
 275
 276         // Handle limiting case.
 277         if change > 0 && headroom <= uint64(change) {
 278                 l.overflow += uint64(change) - headroom
 279                 l.bucket.fill = l.bucket.capacity
 280                 if !enabled {
 281                         l.enabled.Store(true)
 282                         l.lastEnabledCycle.Store(memstats.numgc + 1)
 283                 }
 284                 return
 285         }
 286
 287         // Handle non-limiting cases.
 288         if change < 0 && l.bucket.fill <= uint64(-change) {
 289                 // Bucket emptied.
 290                 l.bucket.fill = 0
 291         } else {
 292                 // All other cases.
 293                 l.bucket.fill -= uint64(-change)
 294         }
 295         if change != 0 && enabled {
 296                 l.enabled.Store(false)
 297         }
 298 }
 299
 300 // tryLock attempts to lock l. Returns true on success.
 301 func (l *gcCPULimiterState) tryLock() bool {
 302         return l.lock.CompareAndSwap(0, 1)
 303 }
 304
 305 // unlock releases the lock on l. Must be called if tryLock returns true.
 306 func (l *gcCPULimiterState) unlock() {
 307         old := l.lock.Swap(0)
 308         if old != 1 {
 309                 throw("double unlock")
 310         }
 311 }
 312
 313 // capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS.
 314 const capacityPerProc = 1e9 // 1 second in nanoseconds
 315
 316 // resetCapacity updates the capacity based on GOMAXPROCS. Must not be called
 317 // while the GC is enabled.
 318 //
 319 // It is safe to call concurrently with other operations.
 320 func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
 321         if !l.tryLock() {
 322                 // This must happen during a STW, so we can't fail to acquire the lock.
 323                 // If we did, something went wrong. Throw.
 324                 throw("failed to acquire lock to reset capacity")
 325         }
 326         // Flush the rest of the time for this period.
 327         l.updateLocked(now)
 328         l.nprocs = nprocs
 329
 330         l.bucket.capacity = uint64(nprocs) * capacityPerProc
 331         if l.bucket.fill > l.bucket.capacity {
 332                 l.bucket.fill = l.bucket.capacity
 333                 l.enabled.Store(true)
 334                 l.lastEnabledCycle.Store(memstats.numgc + 1)
 335         } else if l.bucket.fill < l.bucket.capacity {
 336                 l.enabled.Store(false)
 337         }
 338         l.unlock()
 339 }
 340
 341 // limiterEventType indicates the type of an event occurring on some P.
 342 //
 343 // These events represent the full set of events that the GC CPU limiter tracks
 344 // to execute its function.
 345 //
 346 // This type may use no more than limiterEventBits bits of information.
 347 type limiterEventType uint8
 348
 349 const (
 350         limiterEventNone           limiterEventType = iota // None of the following events.
 351         limiterEventIdleMarkWork                           // Refers to an idle mark worker (see gcMarkWorkerMode).
 352         limiterEventMarkAssist                             // Refers to mark assist (see gcAssistAlloc).
 353         limiterEventScavengeAssist                         // Refers to a scavenge assist (see allocSpan).
 354         limiterEventIdle                                   // Refers to time a P spent on the idle list.
 355
 356         limiterEventBits = 3
 357 )
 358
 359 // limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
 360 // the event type. The rest of the bits of that field represent a timestamp.
 361 const (
 362         limiterEventTypeMask  = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
 363         limiterEventStampNone = limiterEventStamp(0)
 364 )
 365
 366 // limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
 367 type limiterEventStamp uint64
 368
 369 // makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
 370 func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
 371         return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
 372 }
 373
 374 // duration computes the difference between now and the start time stored in the stamp.
 375 //
 376 // Returns 0 if the difference is negative, which may happen if now is stale or if the
 377 // before and after timestamps cross a 2^(64-limiterEventBits) boundary.
 378 func (s limiterEventStamp) duration(now int64) int64 {
 379         // The top limiterEventBits bits of the timestamp are derived from the current time
 380         // when computing a duration.
 381         start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
 382         if now < start {
 383                 return 0
 384         }
 385         return now - start
 386 }
 387
 388 // type extracts the event type from the stamp.
 389 func (s limiterEventStamp) typ() limiterEventType {
 390         return limiterEventType(s >> (64 - limiterEventBits))
 391 }
 392
 393 // limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
 394 type limiterEvent struct {
 395         stamp atomic.Uint64 // Stores a limiterEventStamp.
 396 }
 397
 398 // start begins tracking a new limiter event of the current type. If an event
 399 // is already in flight, then a new event cannot begin because the current time is
 400 // already being attributed to that event. In this case, this function returns false.
 401 // Otherwise, it returns true.
 402 //
 403 // The caller must be non-preemptible until at least stop is called or this function
 404 // returns false. Because this is trying to measure "on-CPU" time of some event, getting
 405 // scheduled away during it can mean that whatever we're measuring isn't a reflection
 406 // of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
 407 // close of an approximation as we can.
 408 func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
 409         if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
 410                 return false
 411         }
 412         e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
 413         return true
 414 }
 415
 416 // consume acquires the partial event CPU time from any in-flight event.
 417 // It achieves this by storing the current time as the new event time.
 418 //
 419 // Returns the type of the in-flight event, as well as how long it's currently been
 420 // executing for. Returns limiterEventNone if no event is active.
 421 func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
 422         // Read the limiter event timestamp and update it to now.
 423         for {
 424                 old := limiterEventStamp(e.stamp.Load())
 425                 typ = old.typ()
 426                 if typ == limiterEventNone {
 427                         // There's no in-flight event, so just push that up.
 428                         return
 429                 }
 430                 duration = old.duration(now)
 431                 if duration == 0 {
 432                         // We might have a stale now value, or this crossed the
 433                         // 2^(64-limiterEventBits) boundary in the clock readings.
 434                         // Just ignore it.
 435                         return limiterEventNone, 0
 436                 }
 437                 new := makeLimiterEventStamp(typ, now)
 438                 if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
 439                         break
 440                 }
 441         }
 442         return
 443 }
 444
 445 // stop stops the active limiter event. Throws if the
 446 //
 447 // The caller must be non-preemptible across the event. See start as to why.
 448 func (e *limiterEvent) stop(typ limiterEventType, now int64) {
 449         var stamp limiterEventStamp
 450         for {
 451                 stamp = limiterEventStamp(e.stamp.Load())
 452                 if stamp.typ() != typ {
 453                         print("runtime: want=", typ, " got=", stamp.typ(), "\n")
 454                         throw("limiterEvent.stop: found wrong event in p's limiter event slot")
 455                 }
 456                 if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
 457                         break
 458                 }
 459         }
 460         duration := stamp.duration(now)
 461         if duration == 0 {
 462                 // It's possible that we're missing time because we crossed a
 463                 // 2^(64-limiterEventBits) boundary between the start and end.
 464                 // In this case, we're dropping that information. This is OK because
 465                 // at worst it'll cause a transient hiccup that will quickly resolve
 466                 // itself as all new timestamps begin on the other side of the boundary.
 467                 // Such a hiccup should be incredibly rare.
 468                 return
 469         }
 470         // Account for the event.
 471         switch typ {
 472         case limiterEventIdleMarkWork:
 473                 gcCPULimiter.addIdleTime(duration)
 474         case limiterEventIdle:
 475                 gcCPULimiter.addIdleTime(duration)
 476                 sched.idleTime.Add(duration)
 477         case limiterEventMarkAssist:
 478                 fallthrough
 479         case limiterEventScavengeAssist:
 480                 gcCPULimiter.addAssistTime(duration)
 481         default:
 482                 throw("limiterEvent.stop: invalid limiter event type found")
 483         }
 484 }