runtime: only use CPU time from the current window in the GC CPU limiter

author Michael Anthony Knyszek <mknyszek@google.com>

Thu, 2 Jun 2022 19:06:27 +0000 (19:06 +0000)

committer Michael Knyszek <mknyszek@google.com>

Fri, 3 Jun 2022 20:16:15 +0000 (20:16 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Thu, 2 Jun 2022 19:06:27 +0000 (19:06 +0000)
committer Michael Knyszek <mknyszek@google.com>
Fri, 3 Jun 2022 20:16:15 +0000 (20:16 +0000)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index f3a29fbe03629ed97bfdaca93e461c1777da7ffd..b9f0990d8ca9d95fcb20c2b8771f5d5f339822f5 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -1415,6 +1415,7 @@ func NewGCCPULimiter(now int64, gomaxprocs int32) *GCCPULimiter {
         // on a 32-bit architecture, it may get allocated unaligned
         // space.
         l := Escape(new(GCCPULimiter))
+       l.limiter.test = true
         l.limiter.resetCapacity(now, gomaxprocs)
         return l
  }
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 5c044f0d874c2ed04ebd2e442a4d7ae87b1fbc04..9b25948255d96e191f36209e49f829ef9ffcd3aa 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1281,6 +1281,10 @@ func gcBgMarkWorker() {
  
                 startTime := nanotime()
                 pp.gcMarkWorkerStartTime = startTime
+               var trackLimiterEvent bool
+               if pp.gcMarkWorkerMode == gcMarkWorkerIdleMode {
+                       trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime)
+               }
  
                 decnwait := atomic.Xadd(&work.nwait, -1)
                 if decnwait == work.nproc {
@@ -1329,9 +1333,8 @@ func gcBgMarkWorker() {
                 now := nanotime()
                 duration := now - startTime
                 gcController.markWorkerStop(pp.gcMarkWorkerMode, duration)
-               if pp.gcMarkWorkerMode == gcMarkWorkerIdleMode {
-                       gcCPULimiter.addIdleMarkTime(duration)
-                       gcCPULimiter.update(now)
+               if trackLimiterEvent {
+                       pp.limiterEvent.stop(limiterEventIdleMarkWork, now)
                 }
                 if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode {
                         atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go

index 1ae5c3764c18a0caebd21bf6ab87c8d335259ec8..fc4fefaa2cc159593081af0d971806379a554219 100644 (file)
--- a/src/runtime/mgclimit.go
+++ b/src/runtime/mgclimit.go
@@ -76,6 +76,9 @@ type gcCPULimiterState struct {
         //
         // gomaxprocs isn't used directly so as to keep this structure unit-testable.
         nprocs int32
+
+       // test indicates whether this instance of the struct was made for testing purposes.
+       test bool
  }
  
  // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
@@ -192,6 +195,33 @@ func (l *gcCPULimiterState) updateLocked(now int64) {
                 l.idleMarkTimePool.Add(-idleMarkTime)
         }
  
+       if !l.test {
+               // Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
+               //
+               // The reason we do this instead of just waiting for those events to finish and push updates
+               // is to ensure that all the time we're accounting for happened sometime between lastUpdate
+               // and now. This dramatically simplifies reasoning about the limiter because we're not at
+               // risk of extra time being accounted for in this window than actually happened in this window,
+               // leading to all sorts of weird transient behavior.
+               mp := acquirem()
+               for _, pp := range allp {
+                       typ, duration := pp.limiterEvent.consume(now)
+                       switch typ {
+                       case limiterEventIdleMarkWork:
+                               idleMarkTime += duration
+                       case limiterEventMarkAssist:
+                               fallthrough
+                       case limiterEventScavengeAssist:
+                               assistTime += duration
+                       case limiterEventNone:
+                               break
+                       default:
+                               throw("invalid limiter event type found")
+                       }
+               }
+               releasem(mp)
+       }
+
         // Compute total GC time.
         windowGCTime := assistTime
         if l.gcEnabled {
@@ -300,3 +330,144 @@ func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
         }
         l.unlock()
  }
+
+// limiterEventType indicates the type of an event occuring on some P.
+//
+// These events represent the full set of events that the GC CPU limiter tracks
+// to execute its function.
+//
+// This type may use no more than limiterEventBits bits of information.
+type limiterEventType uint8
+
+const (
+       limiterEventNone           limiterEventType = iota // None of the following events.
+       limiterEventIdleMarkWork                           // Refers to an idle mark worker (see gcMarkWorkerMode).
+       limiterEventMarkAssist                             // Refers to mark assist (see gcAssistAlloc).
+       limiterEventScavengeAssist                         // Refers to a scavenge assist (see allocSpan).
+
+       limiterEventBits = 3
+)
+
+// limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
+// the event type. The rest of the bits of that field represent a timestamp.
+const (
+       limiterEventTypeMask  = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
+       limiterEventStampNone = limiterEventStamp(0)
+)
+
+// limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
+type limiterEventStamp uint64
+
+// makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
+func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
+       return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
+}
+
+// duration computes the difference between now and the start time stored in the stamp.
+//
+// Returns 0 if the difference is negative, which may happen if now is stale or if the
+// before and after timestamps cross a 2^(64-limiterEventBits) boundary.
+func (s limiterEventStamp) duration(now int64) int64 {
+       // The top limiterEventBits bits of the timestamp are derived from the current time
+       // when computing a duration.
+       start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
+       if now < start {
+               return 0
+       }
+       return now - start
+}
+
+// type extracts the event type from the stamp.
+func (s limiterEventStamp) typ() limiterEventType {
+       return limiterEventType(s >> (64 - limiterEventBits))
+}
+
+// limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
+type limiterEvent struct {
+       stamp atomic.Uint64 // Stores a limiterEventStamp.
+}
+
+// start begins tracking a new limiter event of the current type. If an event
+// is already in flight, then a new event cannot begin because the current time is
+// already being attributed to that event. In this case, this function returns false.
+// Otherwise, it returns true.
+//
+// The caller must be non-preemptible until at least stop is called or this function
+// returns false. Because this is trying to measure "on-CPU" time of some event, getting
+// scheduled away during it can mean that whatever we're measuring isn't a reflection
+// of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
+// close of an approximation as we can.
+func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
+       if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
+               return false
+       }
+       e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
+       return true
+}
+
+// consume acquires the partial event CPU time from any in-flight event.
+// It achieves this by storing the current time as the new event time.
+//
+// Returns the type of the in-flight event, as well as how long it's currently been
+// executing for. Returns limiterEventNone if no event is active.
+func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
+       // Read the limiter event timestamp and update it to now.
+       for {
+               old := limiterEventStamp(e.stamp.Load())
+               typ = old.typ()
+               if typ == limiterEventNone {
+                       // There's no in-flight event, so just push that up.
+                       return
+               }
+               duration = old.duration(now)
+               if duration == 0 {
+                       // We might have a stale now value, or this crossed the
+                       // 2^(64-limiterEventBits) boundary in the clock readings.
+                       // Just ignore it.
+                       return limiterEventNone, 0
+               }
+               new := makeLimiterEventStamp(typ, now)
+               if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
+                       break
+               }
+       }
+       return
+}
+
+// stop stops the active limiter event. Throws if the
+//
+// The caller must be non-preemptible across the event. See start as to why.
+func (e *limiterEvent) stop(typ limiterEventType, now int64) {
+       var stamp limiterEventStamp
+       for {
+               stamp = limiterEventStamp(e.stamp.Load())
+               if stamp.typ() != typ {
+                       print("runtime: want=", typ, " got=", stamp.typ(), "\n")
+                       throw("limiterEvent.stop: found wrong event in p's limiter event slot")
+               }
+               if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
+                       break
+               }
+       }
+       duration := stamp.duration(now)
+       if duration == 0 {
+               // It's possible that we're missing time because we crossed a
+               // 2^(64-limiterEventBits) boundary between the start and end.
+               // In this case, we're dropping that information. This is OK because
+               // at worst it'll cause a transient hiccup that will quickly resolve
+               // itself as all new timestamps begin on the other side of the boundary.
+               // Such a hiccup should be incredibly rare.
+               return
+       }
+       // Account for the event.
+       switch typ {
+       case limiterEventIdleMarkWork:
+               gcCPULimiter.addIdleMarkTime(duration)
+       case limiterEventMarkAssist:
+               fallthrough
+       case limiterEventScavengeAssist:
+               gcCPULimiter.addAssistTime(duration)
+       default:
+               throw("limiterEvent.stop: invalid limiter event type found")
+       }
+}
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go

index 7fc748875a9447591e8d20b01787946f31d9c7ea..74637072c5f2bfa27c8337721cee5c6fac8847ff 100644 (file)
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -545,7 +545,11 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
         // Track time spent in this assist. Since we're on the
         // system stack, this is non-preemptible, so we can
         // just measure start and end time.
+       //
+       // Limiter event tracking might be disabled if we end up here
+       // while on a mark worker.
         startTime := nanotime()
+       trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
  
         decnwait := atomic.Xadd(&work.nwait, -1)
         if decnwait == work.nproc {
@@ -593,9 +597,11 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
         duration := now - startTime
         _p_ := gp.m.p.ptr()
         _p_.gcAssistTime += duration
+       if trackLimiterEvent {
+               _p_.limiterEvent.stop(limiterEventMarkAssist, now)
+       }
         if _p_.gcAssistTime > gcAssistTimeSlack {
                 gcController.assistTime.Add(_p_.gcAssistTime)
-               gcCPULimiter.addAssistTime(_p_.gcAssistTime)
                 gcCPULimiter.update(now)
                 _p_.gcAssistTime = 0
         }
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index eb1f985f5ca2742f5358805b963292e534299ed9..b19a2ff4088c51b66e257b31554d1fc63a01bc79 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1301,17 +1301,29 @@ HaveSpan:
                         }
                 }
         }
-       if bytesToScavenge > 0 {
+       // There are a few very limited cirumstances where we won't have a P here.
+       // It's OK to simply skip scavenging in these cases. Something else will notice
+       // and pick up the tab.
+       if pp != nil && bytesToScavenge > 0 {
                 // Measure how long we spent scavenging and add that measurement to the assist
                 // time so we can track it for the GC CPU limiter.
+               //
+               // Limiter event tracking might be disabled if we end up here
+               // while on a mark worker.
                 start := nanotime()
+               track := pp.limiterEvent.start(limiterEventScavengeAssist, start)
+
+               // Scavenge, but back out if the limiter turns on.
                 h.pages.scavenge(bytesToScavenge, func() bool {
                         return gcCPULimiter.limiting()
                 })
+
+               // Finish up accounting.
                 now := nanotime()
+               if track {
+                       pp.limiterEvent.stop(limiterEventScavengeAssist, now)
+               }
                 h.pages.scav.assistTime.Add(now - start)
-               gcCPULimiter.addAssistTime(now - start)
-               gcCPULimiter.update(now)
         }
  
         // Commit and account for any scavenged memory that the span now owns.
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index 1e4f872726aa89bdcb43fe2a1f99db087aff4888..e1788223e7ee630adcfde72a199009768424ba34 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -686,6 +686,9 @@ type p struct {
         gcAssistTime         int64 // Nanoseconds in assistAlloc
         gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic)
  
+       // limiterEvent tracks events for the GC CPU limiter.
+       limiterEvent limiterEvent
+
         // gcMarkWorkerMode is the mode for the next mark worker to run in.
         // That is, this is used to communicate with the worker goroutine
         // selected for immediate execution by
author	Michael Anthony Knyszek <mknyszek@google.com>
	Thu, 2 Jun 2022 19:06:27 +0000 (19:06 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Fri, 3 Jun 2022 20:16:15 +0000 (20:16 +0000)
src/runtime/export_test.go		patch \| blob \| history
src/runtime/mgc.go		patch \| blob \| history
src/runtime/mgclimit.go		patch \| blob \| history
src/runtime/mgcmark.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/runtime2.go		patch \| blob \| history