runtime: implement GC pacer redesign

author Michael Anthony Knyszek <mknyszek@google.com>

Tue, 13 Apr 2021 03:07:27 +0000 (03:07 +0000)

committer Michael Knyszek <mknyszek@google.com>

Thu, 4 Nov 2021 20:00:31 +0000 (20:00 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Tue, 13 Apr 2021 03:07:27 +0000 (03:07 +0000)
committer Michael Knyszek <mknyszek@google.com>
Thu, 4 Nov 2021 20:00:31 +0000 (20:00 +0000)
diff --git a/src/internal/goexperiment/exp_pacerredesign_off.go b/src/internal/goexperiment/exp_pacerredesign_off.go

new file mode 100644 (file)

index 0000000..62e1831
--- /dev/null
+++ b/src/internal/goexperiment/exp_pacerredesign_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.pacerredesign
+// +build !goexperiment.pacerredesign
+
+package goexperiment
+
+const PacerRedesign = false
+const PacerRedesignInt = 0
diff --git a/src/internal/goexperiment/exp_pacerredesign_on.go b/src/internal/goexperiment/exp_pacerredesign_on.go

new file mode 100644 (file)

index 0000000..b22b031
--- /dev/null
+++ b/src/internal/goexperiment/exp_pacerredesign_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.pacerredesign
+// +build goexperiment.pacerredesign
+
+package goexperiment
+
+const PacerRedesign = true
+const PacerRedesignInt = 1
diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go

index 0a61a0e5fc12adc1556cc38138bf14e2583b3cf9..3bf19222e1cb02f04f6b2d239735f169205f5dc5 100644 (file)
--- a/src/internal/goexperiment/flags.go
+++ b/src/internal/goexperiment/flags.go
@@ -83,4 +83,10 @@ type Flags struct {
         // Requires wrappers (to do ABI translation), and reflect (so
         // reflection calls use registers).
         RegabiArgs bool
+
+       // PacerRedesign enables the new GC pacer in the runtime.
+       //
+       // Details regarding the new pacer may be found at
+       // https://golang.org/design/44167-gc-pacer-redesign
+       PacerRedesign bool
  }
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index 533627fa585f374a32ee24b8487202a046b4f8bf..5149252c83746395c2fde9287dbf93acd68d53f2 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -1290,7 +1290,9 @@ type GCControllerReviseDelta struct {
  func (c *GCController) Revise(d GCControllerReviseDelta) {
         c.heapLive += uint64(d.HeapLive)
         c.heapScan += uint64(d.HeapScan)
-       c.scanWork += d.HeapScanWork + d.StackScanWork + d.GlobalsScanWork
+       c.heapScanWork.Add(d.HeapScanWork)
+       c.stackScanWork.Add(d.StackScanWork)
+       c.globalsScanWork.Add(d.GlobalsScanWork)
         c.revise()
  }
  
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 03711a9617da03649172888db8209b813dafc5bd..96f4157b59864751da8b3ac5b798541dcbb43222 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1084,6 +1084,8 @@ func gcMarkTermination(nextTriggerRatio float64) {
                 print(" ms cpu, ",
                         work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ",
                         work.heapGoal>>20, " MB goal, ",
+                       gcController.stackScan>>20, " MB stacks, ",
+                       gcController.globalsScan>>20, " MB globals, ",
                         work.maxprocs, " P")
                 if work.userForced {
                         print(" (forced)")
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go

index efda65fe1e6b4e60511d42bf2afd10fcfed5141e..a5129bd1ee489f55f81eebf633b5bf6cc801a7aa 100644 (file)
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -8,6 +8,7 @@ package runtime
  
  import (
         "internal/goarch"
+       "internal/goexperiment"
         "runtime/internal/atomic"
         "runtime/internal/sys"
         "unsafe"
@@ -151,20 +152,28 @@ var oneptrmask = [...]uint8{1}
  //
  // Preemption must be disabled (because this uses a gcWork).
  //
+// Returns the amount of GC work credit produced by the operation.
+// If flushBgCredit is true, then that credit is also flushed
+// to the background credit pool.
+//
  // nowritebarrier is only advisory here.
  //
  //go:nowritebarrier
-func markroot(gcw *gcWork, i uint32) {
+func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
         // Note: if you add a case here, please also update heapdump.go:dumproots.
+       var workDone int64
+       var workCounter *atomic.Int64
         switch {
         case work.baseData <= i && i < work.baseBSS:
+               workCounter = &gcController.globalsScanWork
                 for _, datap := range activeModules() {
-                       markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
+                       workDone += markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
                 }
  
         case work.baseBSS <= i && i < work.baseSpans:
+               workCounter = &gcController.globalsScanWork
                 for _, datap := range activeModules() {
-                       markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
+                       workDone += markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
                 }
  
         case i == fixedRootFinalizers:
@@ -184,6 +193,7 @@ func markroot(gcw *gcWork, i uint32) {
  
         default:
                 // the rest is scanning goroutine stacks
+               workCounter = &gcController.stackScanWork
                 var gp *g
                 if work.baseStacks <= i && i < work.baseEnd {
                         // N.B. Atomic read of allglen in gcMarkRootPrepare
@@ -230,7 +240,7 @@ func markroot(gcw *gcWork, i uint32) {
                         if gp.gcscandone {
                                 throw("g already scanned")
                         }
-                       scanstack(gp, gcw)
+                       workDone += scanstack(gp, gcw)
                         gp.gcscandone = true
                         resumeG(stopped)
  
@@ -239,13 +249,24 @@ func markroot(gcw *gcWork, i uint32) {
                         }
                 })
         }
+       if goexperiment.PacerRedesign {
+               if workCounter != nil && workDone != 0 {
+                       workCounter.Add(workDone)
+                       if flushBgCredit {
+                               gcFlushBgCredit(workDone)
+                       }
+               }
+       }
+       return workDone
  }
  
  // markrootBlock scans the shard'th shard of the block of memory [b0,
  // b0+n0), with the given pointer mask.
  //
+// Returns the amount of work done.
+//
  //go:nowritebarrier
-func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
+func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) int64 {
         if rootBlockBytes%(8*goarch.PtrSize) != 0 {
                 // This is necessary to pick byte offsets in ptrmask0.
                 throw("rootBlockBytes must be a multiple of 8*ptrSize")
@@ -256,7 +277,7 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
         // These tests are written to avoid any possible overflow.
         off := uintptr(shard) * rootBlockBytes
         if off >= n0 {
-               return
+               return 0
         }
         b := b0 + off
         ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*goarch.PtrSize))))
@@ -267,6 +288,7 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
  
         // Scan this shard.
         scanblock(b, n, ptrmask, gcw, nil)
+       return int64(n)
  }
  
  // markrootFreeGStacks frees stacks of dead Gs.
@@ -681,6 +703,13 @@ func gcFlushBgCredit(scanWork int64) {
  
  // scanstack scans gp's stack, greying all pointers found on the stack.
  //
+// For goexperiment.PacerRedesign:
+// Returns the amount of scan work performed, but doesn't update
+// gcController.stackScanWork or flush any credit. Any background credit produced
+// by this function should be flushed by its caller. scanstack itself can't
+// safely flush because it may result in trying to wake up a goroutine that
+// was just scanned, resulting in a self-deadlock.
+//
  // scanstack will also shrink the stack if it is safe to do so. If it
  // is not, it schedules a stack shrink for the next synchronous safe
  // point.
@@ -690,7 +719,7 @@ func gcFlushBgCredit(scanWork int64) {
  //
  //go:nowritebarrier
  //go:systemstack
-func scanstack(gp *g, gcw *gcWork) {
+func scanstack(gp *g, gcw *gcWork) int64 {
         if readgstatus(gp)&_Gscan == 0 {
                 print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
                 throw("scanstack - bad status")
@@ -701,7 +730,7 @@ func scanstack(gp *g, gcw *gcWork) {
                 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
                 throw("mark - bad status")
         case _Gdead:
-               return
+               return 0
         case _Grunning:
                 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
                 throw("scanstack: goroutine not stopped")
@@ -713,6 +742,15 @@ func scanstack(gp *g, gcw *gcWork) {
                 throw("can't scan our own stack")
         }
  
+       // stackSize is the amount of work we'll be reporting.
+       //
+       // We report the total stack size, more than we scan,
+       // because this number needs to line up with gcControllerState's
+       // stackScan and scannableStackSize fields.
+       //
+       // See the documentation on those fields for more information.
+       stackSize := gp.stack.hi - gp.stack.lo
+
         if isShrinkStackSafe(gp) {
                 // Shrink the stack if not much of it is being used.
                 shrinkstack(gp)
@@ -852,6 +890,7 @@ func scanstack(gp *g, gcw *gcWork) {
         if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
                 throw("remaining pointer buffers")
         }
+       return int64(stackSize)
  }
  
  // Scan a stack frame: local variables and function arguments/results.
@@ -984,7 +1023,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
         flushBgCredit := flags&gcDrainFlushBgCredit != 0
         idle := flags&gcDrainIdle != 0
  
-       initScanWork := gcw.scanWork
+       initScanWork := gcw.heapScanWork
  
         // checkWork is the scan work before performing the next
         // self-preempt check.
@@ -1007,7 +1046,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                         if job >= work.markrootJobs {
                                 break
                         }
-                       markroot(gcw, job)
+                       markroot(gcw, job, flushBgCredit)
                         if check != nil && check() {
                                 goto done
                         }
@@ -1046,14 +1085,14 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                 // Flush background scan work credit to the global
                 // account if we've accumulated enough locally so
                 // mutator assists can draw on it.
-               if gcw.scanWork >= gcCreditSlack {
-                       atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+               if gcw.heapScanWork >= gcCreditSlack {
+                       gcController.heapScanWork.Add(gcw.heapScanWork)
                         if flushBgCredit {
-                               gcFlushBgCredit(gcw.scanWork - initScanWork)
+                               gcFlushBgCredit(gcw.heapScanWork - initScanWork)
                                 initScanWork = 0
                         }
-                       checkWork -= gcw.scanWork
-                       gcw.scanWork = 0
+                       checkWork -= gcw.heapScanWork
+                       gcw.heapScanWork = 0
  
                         if checkWork <= 0 {
                                 checkWork += drainCheckThreshold
@@ -1066,12 +1105,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
  
  done:
         // Flush remaining scan work credit.
-       if gcw.scanWork > 0 {
-               atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+       if gcw.heapScanWork > 0 {
+               gcController.heapScanWork.Add(gcw.heapScanWork)
                 if flushBgCredit {
-                       gcFlushBgCredit(gcw.scanWork - initScanWork)
+                       gcFlushBgCredit(gcw.heapScanWork - initScanWork)
                 }
-               gcw.scanWork = 0
+               gcw.heapScanWork = 0
         }
  }
  
@@ -1095,10 +1134,10 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
  
         // There may already be scan work on the gcw, which we don't
         // want to claim was done by this call.
-       workFlushed := -gcw.scanWork
+       workFlushed := -gcw.heapScanWork
  
         gp := getg().m.curg
-       for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
+       for !gp.preempt && workFlushed+gcw.heapScanWork < scanWork {
                 // See gcDrain comment.
                 if work.full == 0 {
                         gcw.balance()
@@ -1117,13 +1156,13 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
  
                 if b == 0 {
                         // Try to do a root job.
-                       //
-                       // TODO: Assists should get credit for this
-                       // work.
                         if work.markrootNext < work.markrootJobs {
                                 job := atomic.Xadd(&work.markrootNext, +1) - 1
                                 if job < work.markrootJobs {
-                                       markroot(gcw, job)
+                                       work := markroot(gcw, job, false)
+                                       if goexperiment.PacerRedesign {
+                                               workFlushed += work
+                                       }
                                         continue
                                 }
                         }
@@ -1134,10 +1173,10 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
                 scanobject(b, gcw)
  
                 // Flush background scan work credit.
-               if gcw.scanWork >= gcCreditSlack {
-                       atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
-                       workFlushed += gcw.scanWork
-                       gcw.scanWork = 0
+               if gcw.heapScanWork >= gcCreditSlack {
+                       gcController.heapScanWork.Add(gcw.heapScanWork)
+                       workFlushed += gcw.heapScanWork
+                       gcw.heapScanWork = 0
                 }
         }
  
@@ -1145,14 +1184,14 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
         // here because this never flushes to bgScanCredit and
         // gcw.dispose will flush any remaining work to scanWork.
  
-       return workFlushed + gcw.scanWork
+       return workFlushed + gcw.heapScanWork
  }
  
  // scanblock scans b as scanobject would, but using an explicit
  // pointer bitmap instead of the heap bitmap.
  //
  // This is used to scan non-heap roots, so it does not update
-// gcw.bytesMarked or gcw.scanWork.
+// gcw.bytesMarked or gcw.heapScanWork.
  //
  // If stk != nil, possible stack pointers are also reported to stk.putPtr.
  //go:nowritebarrier
@@ -1282,7 +1321,7 @@ func scanobject(b uintptr, gcw *gcWork) {
                 }
         }
         gcw.bytesMarked += uint64(n)
-       gcw.scanWork += int64(i)
+       gcw.heapScanWork += int64(i)
  }
  
  // scanConservative scans block [b, b+n) conservatively, treating any
@@ -1521,7 +1560,19 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
  
         gcw := &getg().m.p.ptr().gcw
         gcw.bytesMarked += uint64(size)
-       gcw.scanWork += int64(scanSize)
+       if !goexperiment.PacerRedesign {
+               // The old pacer counts newly allocated memory toward
+               // heapScanWork because heapScan is continuously updated
+               // throughout the GC cyle with newly allocated memory. However,
+               // these objects are never actually scanned, so we need
+               // to account for them in heapScanWork here, "faking" their work.
+               // Otherwise the pacer will think it's always behind, potentially
+               // by a large margin.
+               //
+               // The new pacer doesn't care about this because it ceases to updated
+               // heapScan once a GC cycle starts, effectively snapshotting it.
+               gcw.heapScanWork += int64(scanSize)
+       }
  }
  
  // gcMarkTinyAllocs greys all active tiny alloc blocks.
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go

index af43e6258f2700716d70848a5337e303b7830180..f886a07da19374c9907db0102704ac96a43fb22d 100644 (file)
--- a/src/runtime/mgcpacer.go
+++ b/src/runtime/mgcpacer.go
@@ -6,6 +6,7 @@ package runtime
  
  import (
         "internal/cpu"
+       "internal/goexperiment"
         "runtime/internal/atomic"
         "unsafe"
  )
@@ -13,7 +14,8 @@ import (
  const (
         // gcGoalUtilization is the goal CPU utilization for
         // marking as a fraction of GOMAXPROCS.
-       gcGoalUtilization = 0.30
+       gcGoalUtilization = goexperiment.PacerRedesignInt*gcBackgroundUtilization +
+               (1-goexperiment.PacerRedesignInt)*(gcBackgroundUtilization+0.05)
  
         // gcBackgroundUtilization is the fixed CPU utilization for background
         // marking. It must be <= gcGoalUtilization. The difference between
@@ -26,10 +28,15 @@ const (
         // better control CPU and heap growth. However, the larger the gap,
         // the more mutator assists are expected to happen, which impact
         // mutator latency.
+       //
+       // If goexperiment.PacerRedesign, the trigger feedback controller
+       // is replaced with an estimate of the mark/cons ratio that doesn't
+       // have the same saturation issues, so this is set equal to
+       // gcGoalUtilization.
         gcBackgroundUtilization = 0.25
  
         // gcCreditSlack is the amount of scan work credit that can
-       // accumulate locally before updating gcController.scanWork and,
+       // accumulate locally before updating gcController.heapScanWork and,
         // optionally, gcController.bgScanCredit. Lower values give a more
         // accurate assist ratio and make it more likely that assists will
         // successfully steal background credit. Higher values reduce memory
@@ -46,7 +53,8 @@ const (
         gcOverAssistWork = 64 << 10
  
         // defaultHeapMinimum is the value of heapMinimum for GOGC==100.
-       defaultHeapMinimum = 4 << 20
+       defaultHeapMinimum = goexperiment.PacerRedesignInt*(512<<10) +
+               (1-goexperiment.PacerRedesignInt)*(4<<20)
  
         // scannableStackSizeSlack is the bytes of stack space allocated or freed
         // that can accumulate on a P before updating gcController.stackSize.
@@ -108,6 +116,8 @@ type gcControllerState struct {
         // during mark termination for the next cycle's trigger.
         //
         // Protected by mheap_.lock or a STW.
+       //
+       // Used if !goexperiment.PacerRedesign.
         triggerRatio float64
  
         // trigger is the heap size that triggers marking.
@@ -122,6 +132,31 @@ type gcControllerState struct {
         // Protected by mheap_.lock or a STW.
         trigger uint64
  
+       // consMark is the estimated per-CPU consMark ratio for the application.
+       //
+       // It represents the ratio between the application's allocation
+       // rate, as bytes allocated per CPU-time, and the GC's scan rate,
+       // as bytes scanned per CPU-time.
+       // The units of this ratio are (B / cpu-ns) / (B / cpu-ns).
+       //
+       // At a high level, this value is computed as the bytes of memory
+       // allocated (cons) per unit of scan work completed (mark) in a GC
+       // cycle, divided by the CPU time spent on each activity.
+       //
+       // Updated at the end of each GC cycle, in endCycle.
+       //
+       // For goexperiment.PacerRedesign.
+       consMark float64
+
+       // consMarkController holds the state for the mark-cons ratio
+       // estimation over time.
+       //
+       // Its purpose is to smooth out noisiness in the computation of
+       // consMark; see consMark for details.
+       //
+       // For goexperiment.PacerRedesign.
+       consMarkController piController
+
         // heapGoal is the goal heapLive for when next GC ends.
         // Set to ^uint64(0) if disabled.
         //
@@ -164,12 +199,23 @@ type gcControllerState struct {
         // is the live heap (as counted by heapLive), but omitting
         // no-scan objects and no-scan tails of objects.
         //
-       // Whenever this is updated, call this gcControllerState's
-       // revise() method.
+       // For !goexperiment.PacerRedesign: Whenever this is updated,
+       // call this gcControllerState's revise() method. It is read
+       // and written atomically or with the world stopped.
         //
-       // Read and written atomically or with the world stopped.
+       // For goexperiment.PacerRedesign: This value is fixed at the
+       // start of a GC cycle, so during a GC cycle it is safe to
+       // read without atomics, and it represents the maximum scannable
+       // heap.
         heapScan uint64
  
+       // lastHeapScan is the number of bytes of heap that were scanned
+       // last GC cycle. It is the same as heapMarked, but only
+       // includes the "scannable" parts of objects.
+       //
+       // Updated when the world is stopped.
+       lastHeapScan uint64
+
         // stackScan is a snapshot of scannableStackSize taken at each GC
         // STW pause and is used in pacing decisions.
         //
@@ -179,6 +225,12 @@ type gcControllerState struct {
         // scannableStackSize is the amount of allocated goroutine stack space in
         // use by goroutines.
         //
+       // This number tracks allocated goroutine stack space rather than used
+       // goroutine stack space (i.e. what is actually scanned) because used
+       // goroutine stack space is much harder to measure cheaply. By using
+       // allocated space, we make an overestimate; this is OK, it's better
+       // to conservatively overcount than undercount.
+       //
         // Read and updated atomically.
         scannableStackSize uint64
  
@@ -194,16 +246,26 @@ type gcControllerState struct {
         // next mark termination.
         heapMarked uint64
  
-       // scanWork is the total scan work performed this cycle. This
-       // is updated atomically during the cycle. Updates occur in
-       // bounded batches, since it is both written and read
-       // throughout the cycle. At the end of the cycle, this is how
+       // heapScanWork is the total heap scan work performed this cycle.
+       // stackScanWork is the total stack scan work performed this cycle.
+       // globalsScanWork is the total globals scan work performed this cycle.
+       //
+       // These are updated atomically during the cycle. Updates occur in
+       // bounded batches, since they are both written and read
+       // throughout the cycle. At the end of the cycle, heapScanWork is how
         // much of the retained heap is scannable.
         //
-       // Currently this is the bytes of heap scanned. For most uses,
-       // this is an opaque unit of work, but for estimation the
-       // definition is important.
-       scanWork int64
+       // Currently these are measured in bytes. For most uses, this is an
+       // opaque unit of work, but for estimation the definition is important.
+       //
+       // Note that stackScanWork includes all allocated space, not just the
+       // size of the stack itself, mirroring stackSize.
+       //
+       // For !goexperiment.PacerRedesign, stackScanWork and globalsScanWork
+       // are always zero.
+       heapScanWork    atomic.Int64
+       stackScanWork   atomic.Int64
+       globalsScanWork atomic.Int64
  
         // bgScanCredit is the scan work credit accumulated by the
         // concurrent background scan. This credit is accumulated by
@@ -278,13 +340,39 @@ type gcControllerState struct {
  func (c *gcControllerState) init(gcPercent int32) {
         c.heapMinimum = defaultHeapMinimum
  
-       // Set a reasonable initial GC trigger.
-       c.triggerRatio = 7 / 8.0
+       if goexperiment.PacerRedesign {
+               c.consMarkController = piController{
+                       // Tuned first via the Ziegler-Nichols process in simulation,
+                       // then the integral time was manually tuned against real-world
+                       // applications to deal with noisiness in the measured cons/mark
+                       // ratio.
+                       kp: 0.9,
+                       ti: 4.0,
+
+                       // An update is done once per GC cycle.
+                       period: 1,
+
+                       // Set a high reset time in GC cycles.
+                       // This is inversely proportional to the rate at which we
+                       // accumulate error from clipping. By making this very high
+                       // we make the accumulation slow. In general, clipping is
+                       // OK in our situation, hence the choice.
+                       //
+                       // Tune this if we get unintended effects from clipping for
+                       // a long time.
+                       tt:  1000,
+                       min: -1000,
+                       max: 1000,
+               }
+       } else {
+               // Set a reasonable initial GC trigger.
+               c.triggerRatio = 7 / 8.0
  
-       // Fake a heapMarked value so it looks like a trigger at
-       // heapMinimum is the appropriate growth from heapMarked.
-       // This will go into computing the initial GC goal.
-       c.heapMarked = uint64(float64(c.heapMinimum) / (1 + c.triggerRatio))
+               // Fake a heapMarked value so it looks like a trigger at
+               // heapMinimum is the appropriate growth from heapMarked.
+               // This will go into computing the initial GC goal.
+               c.heapMarked = uint64(float64(c.heapMinimum) / (1 + c.triggerRatio))
+       }
  
         // This will also compute and set the GC trigger and goal.
         c.setGCPercent(gcPercent)
@@ -294,7 +382,9 @@ func (c *gcControllerState) init(gcPercent int32) {
  // for a new GC cycle. The caller must hold worldsema and the world
  // must be stopped.
  func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
-       c.scanWork = 0
+       c.heapScanWork.Store(0)
+       c.stackScanWork.Store(0)
+       c.globalsScanWork.Store(0)
         c.bgScanCredit = 0
         c.assistTime = 0
         c.dedicatedMarkTime = 0
@@ -310,8 +400,14 @@ func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
         // GOGC. Assist is proportional to this distance, so enforce a
         // minimum distance, even if it means going over the GOGC goal
         // by a tiny bit.
-       if c.heapGoal < c.heapLive+1024*1024 {
-               c.heapGoal = c.heapLive + 1024*1024
+       if goexperiment.PacerRedesign {
+               if c.heapGoal < c.heapLive+64<<10 {
+                       c.heapGoal = c.heapLive + 64<<10
+               }
+       } else {
+               if c.heapGoal < c.heapLive+1<<20 {
+                       c.heapGoal = c.heapLive + 1<<20
+               }
         }
  
         // Compute the background mark utilization goal. In general,
@@ -394,32 +490,79 @@ func (c *gcControllerState) revise() {
         }
         live := atomic.Load64(&c.heapLive)
         scan := atomic.Load64(&c.heapScan)
-       work := atomic.Loadint64(&c.scanWork)
+       work := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
  
         // Assume we're under the soft goal. Pace GC to complete at
         // heapGoal assuming the heap is in steady-state.
         heapGoal := int64(atomic.Load64(&c.heapGoal))
  
-       // Compute the expected scan work remaining.
-       //
-       // This is estimated based on the expected
-       // steady-state scannable heap. For example, with
-       // GOGC=100, only half of the scannable heap is
-       // expected to be live, so that's what we target.
-       //
-       // (This is a float calculation to avoid overflowing on
-       // 100*heapScan.)
-       scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcPercent))
-
-       if int64(live) > heapGoal || work > scanWorkExpected {
-               // We're past the soft goal, or we've already done more scan
-               // work than we expected. Pace GC so that in the worst case it
-               // will complete by the hard goal.
-               const maxOvershoot = 1.1
-               heapGoal = int64(float64(heapGoal) * maxOvershoot)
-
-               // Compute the upper bound on the scan work remaining.
-               scanWorkExpected = int64(scan)
+       var scanWorkExpected int64
+       if goexperiment.PacerRedesign {
+               // The expected scan work is computed as the amount of bytes scanned last
+               // GC cycle, plus our estimate of stacks and globals work for this cycle.
+               scanWorkExpected = int64(c.lastHeapScan + c.stackScan + c.globalsScan)
+
+               // maxScanWork is a worst-case estimate of the amount of scan work that
+               // needs to be performed in this GC cycle. Specifically, it represents
+               // the case where *all* scannable memory turns out to be live.
+               maxScanWork := int64(scan + c.stackScan + c.globalsScan)
+               if work > scanWorkExpected {
+                       // We've already done more scan work than expected. Because our expectation
+                       // is based on a steady-state scannable heap size, we assume this means our
+                       // heap is growing. Compute a new heap goal that takes our existing runway
+                       // computed for scanWorkExpected and extrapolates it to maxScanWork, the worst-case
+                       // scan work. This keeps our assist ratio stable if the heap continues to grow.
+                       //
+                       // The effect of this mechanism is that assists stay flat in the face of heap
+                       // growths. It's OK to use more memory this cycle to scan all the live heap,
+                       // because the next GC cycle is inevitably going to use *at least* that much
+                       // memory anyway.
+                       heapGoal = int64(float64(heapGoal-int64(c.trigger))/float64(scanWorkExpected)*float64(maxScanWork)) + int64(c.trigger)
+                       scanWorkExpected = maxScanWork
+
+                       // hardGoal is a hard limit on the amount that we're willing to push back the
+                       // heap goal, and that's twice the heap goal (i.e. if GOGC=100 and the heap and/or
+                       // stacks and/or globals grow to twice their size, this limits the current GC cycle's
+                       // growth to 4x the original live heap's size).
+                       //
+                       // This maintains the invariant that we use no more memory than the next GC cycle
+                       // will anyway.
+                       hardGoal := int64((1.0 + float64(gcPercent)/100.0) * float64(heapGoal))
+                       if heapGoal > hardGoal {
+                               heapGoal = hardGoal
+                       }
+               }
+               if int64(live) > heapGoal {
+                       // We're already past our heap goal, even the extrapolated one.
+                       // Leave ourselves some extra runway, so in the worst case we
+                       // finish by that point.
+                       const maxOvershoot = 1.1
+                       heapGoal = int64(float64(heapGoal) * maxOvershoot)
+
+                       // Compute the upper bound on the scan work remaining.
+                       scanWorkExpected = maxScanWork
+               }
+       } else {
+               // Compute the expected scan work remaining.
+               //
+               // This is estimated based on the expected
+               // steady-state scannable heap. For example, with
+               // GOGC=100, only half of the scannable heap is
+               // expected to be live, so that's what we target.
+               //
+               // (This is a float calculation to avoid overflowing on
+               // 100*heapScan.)
+               scanWorkExpected = int64(float64(scan) * 100 / float64(100+gcPercent))
+               if int64(live) > heapGoal || work > scanWorkExpected {
+                       // We're past the soft goal, or we've already done more scan
+                       // work than we expected. Pace GC so that in the worst case it
+                       // will complete by the hard goal.
+                       const maxOvershoot = 1.1
+                       heapGoal = int64(float64(heapGoal) * maxOvershoot)
+
+                       // Compute the upper bound on the scan work remaining.
+                       scanWorkExpected = int64(scan)
+               }
         }
  
         // Compute the remaining scan work estimate.
@@ -464,7 +607,9 @@ func (c *gcControllerState) revise() {
         c.assistBytesPerWork.Store(assistBytesPerWork)
  }
  
-// endCycle computes the trigger ratio for the next cycle.
+// endCycle computes the trigger ratio (!goexperiment.PacerRedesign)
+// or the consMark estimate (goexperiment.PacerRedesign) for the next cycle.
+// Returns the trigger ratio if application, or 0 (goexperiment.PacerRedesign).
  // userForced indicates whether the current GC cycle was forced
  // by the application.
  func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) float64 {
@@ -472,6 +617,81 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa
         // We'll be updating the heap goal soon.
         gcController.lastHeapGoal = gcController.heapGoal
  
+       // Compute the duration of time for which assists were turned on.
+       assistDuration := now - c.markStartTime
+
+       // Assume background mark hit its utilization goal.
+       utilization := gcBackgroundUtilization
+       // Add assist utilization; avoid divide by zero.
+       if assistDuration > 0 {
+               utilization += float64(c.assistTime) / float64(assistDuration*int64(procs))
+       }
+
+       if goexperiment.PacerRedesign {
+               if c.heapLive <= c.trigger {
+                       // Shouldn't happen, but let's be very safe about this in case the
+                       // GC is somehow extremely short.
+                       //
+                       // In this case though, the only reasonable value for c.heapLive-c.trigger
+                       // would be 0, which isn't really all that useful, i.e. the GC was so short
+                       // that it didn't matter.
+                       //
+                       // Ignore this case and don't update anything.
+                       return 0
+               }
+               idleUtilization := 0.0
+               if assistDuration > 0 {
+                       idleUtilization = float64(c.idleMarkTime) / float64(assistDuration*int64(procs))
+               }
+               // Determine the cons/mark ratio.
+               //
+               // The units we want for the numerator and denominator are both B / cpu-ns.
+               // We get this by taking the bytes allocated or scanned, and divide by the amount of
+               // CPU time it took for those operations. For allocations, that CPU time is
+               //
+               //    assistDuration * procs * (1 - utilization)
+               //
+               // Where utilization includes just background GC workers and assists. It does *not*
+               // include idle GC work time, because in theory the mutator is free to take that at
+               // any point.
+               //
+               // For scanning, that CPU time is
+               //
+               //    assistDuration * procs * (utilization + idleUtilization)
+               //
+               // In this case, we *include* idle utilization, because that is additional CPU time that the
+               // the GC had available to it.
+               //
+               // In effect, idle GC time is sort of double-counted here, but it's very weird compared
+               // to other kinds of GC work, because of how fluid it is. Namely, because the mutator is
+               // *always* free to take it.
+               //
+               // So this calculation is really:
+               //     (heapLive-trigger) / (assistDuration * procs * (1-utilization)) /
+               //         (scanWork) / (assistDuration * procs * (utilization+idleUtilization)
+               //
+               // Note that because we only care about the ratio, assistDuration and procs cancel out.
+               scanWork := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
+               currentConsMark := (float64(c.heapLive-c.trigger) * (utilization + idleUtilization)) /
+                       (float64(scanWork) * (1 - utilization))
+
+               // Update cons/mark controller.
+               oldConsMark := c.consMark
+               c.consMark = c.consMarkController.next(c.consMark, currentConsMark)
+
+               if debug.gcpacertrace > 0 {
+                       printlock()
+                       print("pacer: ", int(utilization*100), "% CPU (", int(gcGoalUtilization*100), " exp.) for ")
+                       print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.stackScan+c.globalsScan, " B exp.) ")
+                       print("in ", c.trigger, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.heapGoal), ", cons/mark ", oldConsMark, ")")
+                       println()
+                       printunlock()
+               }
+               return 0
+       }
+
+       // !goexperiment.PacerRedesign below.
+
         if userForced {
                 // Forced GC means this cycle didn't start at the
                 // trigger, so where it finished isn't good
@@ -498,15 +718,6 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa
         // heap growth is the error.
         goalGrowthRatio := c.effectiveGrowthRatio()
         actualGrowthRatio := float64(c.heapLive)/float64(c.heapMarked) - 1
-       assistDuration := now - c.markStartTime
-
-       // Assume background mark hit its utilization goal.
-       utilization := gcBackgroundUtilization
-       // Add assist utilization; avoid divide by zero.
-       if assistDuration > 0 {
-               utilization += float64(c.assistTime) / float64(assistDuration*int64(procs))
-       }
-
         triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio)
  
         // Finally, we adjust the trigger for next time by this error,
@@ -525,7 +736,7 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa
                 H_g := int64(float64(H_m_prev) * (1 + h_g))
                 u_a := utilization
                 u_g := gcGoalUtilization
-               W_a := c.scanWork
+               W_a := c.heapScanWork.Load()
                 print("pacer: H_m_prev=", H_m_prev,
                         " h_t=", h_t, " H_T=", H_T,
                         " h_a=", h_a, " H_a=", H_a,
@@ -669,7 +880,8 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
  func (c *gcControllerState) resetLive(bytesMarked uint64) {
         c.heapMarked = bytesMarked
         c.heapLive = bytesMarked
-       c.heapScan = uint64(c.scanWork)
+       c.heapScan = uint64(c.heapScanWork.Load())
+       c.lastHeapScan = uint64(c.heapScanWork.Load())
  
         // heapLive was updated, so emit a trace event.
         if trace.enabled {
@@ -703,8 +915,12 @@ func (c *gcControllerState) update(dHeapLive, dHeapScan int64) {
                         traceHeapAlloc()
                 }
         }
-       if dHeapScan != 0 {
-               atomic.Xadd64(&gcController.heapScan, dHeapScan)
+       // Only update heapScan in the new pacer redesign if we're not
+       // currently in a GC.
+       if !goexperiment.PacerRedesign || gcBlackenEnabled == 0 {
+               if dHeapScan != 0 {
+                       atomic.Xadd64(&gcController.heapScan, dHeapScan)
+               }
         }
         if gcBlackenEnabled != 0 {
                 // gcController.heapLive and heapScan changed.
@@ -728,9 +944,10 @@ func (c *gcControllerState) addGlobals(amount int64) {
         atomic.Xadd64(&c.globalsScan, amount)
  }
  
-// commit sets the trigger ratio and updates everything
-// derived from it: the absolute trigger, the heap goal, mark pacing,
-// and sweep pacing.
+// commit recomputes all pacing parameters from scratch, namely
+// absolute trigger, the heap goal, mark pacing, and sweep pacing.
+//
+// If goexperiment.PacerRedesign is true, triggerRatio is ignored.
  //
  // This can be called any time. If GC is the in the middle of a
  // concurrent phase, it will adjust the pacing of that phase.
@@ -744,6 +961,130 @@ func (c *gcControllerState) commit(triggerRatio float64) {
                 assertWorldStoppedOrLockHeld(&mheap_.lock)
         }
  
+       if !goexperiment.PacerRedesign {
+               c.oldCommit(triggerRatio)
+               return
+       }
+
+       // Compute the next GC goal, which is when the allocated heap
+       // has grown by GOGC/100 over where it started the last cycle,
+       // plus additional runway for non-heap sources of GC work.
+       goal := ^uint64(0)
+       if c.gcPercent >= 0 {
+               goal = c.heapMarked + (c.heapMarked+atomic.Load64(&c.stackScan)+atomic.Load64(&c.globalsScan))*uint64(c.gcPercent)/100
+       }
+
+       // Don't trigger below the minimum heap size.
+       minTrigger := c.heapMinimum
+       if !isSweepDone() {
+               // Concurrent sweep happens in the heap growth
+               // from gcController.heapLive to trigger, so ensure
+               // that concurrent sweep has some heap growth
+               // in which to perform sweeping before we
+               // start the next GC cycle.
+               sweepMin := atomic.Load64(&c.heapLive) + sweepMinHeapDistance
+               if sweepMin > minTrigger {
+                       minTrigger = sweepMin
+               }
+       }
+
+       // If we let the trigger go too low, then if the application
+       // is allocating very rapidly we might end up in a situation
+       // where we're allocating black during a nearly always-on GC.
+       // The result of this is a growing heap and ultimately an
+       // increase in RSS. By capping us at a point >0, we're essentially
+       // saying that we're OK using more CPU during the GC to prevent
+       // this growth in RSS.
+       //
+       // The current constant was chosen empirically: given a sufficiently
+       // fast/scalable allocator with 48 Ps that could drive the trigger ratio
+       // to <0.05, this constant causes applications to retain the same peak
+       // RSS compared to not having this allocator.
+       if triggerBound := uint64(0.7*float64(goal-c.heapMarked)) + c.heapMarked; minTrigger < triggerBound {
+               minTrigger = triggerBound
+       }
+
+       // For small heaps, set the max trigger point at 95% of the heap goal.
+       // This ensures we always have *some* headroom when the GC actually starts.
+       // For larger heaps, set the max trigger point at the goal, minus the
+       // minimum heap size.
+       // This choice follows from the fact that the minimum heap size is chosen
+       // to reflect the costs of a GC with no work to do. With a large heap but
+       // very little scan work to perform, this gives us exactly as much runway
+       // as we would need, in the worst case.
+       maxRunway := uint64(0.95 * float64(goal-c.heapMarked))
+       if largeHeapMaxRunway := goal - c.heapMinimum; goal > c.heapMinimum && maxRunway < largeHeapMaxRunway {
+               maxRunway = largeHeapMaxRunway
+       }
+       maxTrigger := maxRunway + c.heapMarked
+       if maxTrigger < minTrigger {
+               maxTrigger = minTrigger
+       }
+
+       // Compute the trigger by using our estimate of the cons/mark ratio.
+       //
+       // The idea is to take our expected scan work, and multiply it by
+       // the cons/mark ratio to determine how long it'll take to complete
+       // that scan work in terms of bytes allocated. This gives us our GC's
+       // runway.
+       //
+       // However, the cons/mark ratio is a ratio of rates per CPU-second, but
+       // here we care about the relative rates for some division of CPU
+       // resources among the mutator and the GC.
+       //
+       // To summarize, we have B / cpu-ns, and we want B / ns. We get that
+       // by multiplying by our desired division of CPU resources. We choose
+       // to express CPU resources as GOMAPROCS*fraction. Note that because
+       // we're working with a ratio here, we can omit the number of CPU cores,
+       // because they'll appear in the numerator and denominator and cancel out.
+       // As a result, this is basically just "weighing" the cons/mark ratio by
+       // our desired division of resources.
+       //
+       // Furthermore, by setting the trigger so that CPU resources are divided
+       // this way, assuming that the cons/mark ratio is correct, we make that
+       // division a reality.
+       var trigger uint64
+       runway := uint64((c.consMark * (1 - gcGoalUtilization) / (gcGoalUtilization)) * float64(c.lastHeapScan+c.stackScan+c.globalsScan))
+       if runway > goal {
+               trigger = minTrigger
+       } else {
+               trigger = goal - runway
+       }
+       if trigger < minTrigger {
+               trigger = minTrigger
+       }
+       if trigger > maxTrigger {
+               trigger = maxTrigger
+       }
+       if trigger > goal {
+               goal = trigger
+       }
+
+       // Commit to the trigger and goal.
+       c.trigger = trigger
+       atomic.Store64(&c.heapGoal, goal)
+       if trace.enabled {
+               traceHeapGoal()
+       }
+
+       // Update mark pacing.
+       if gcphase != _GCoff {
+               c.revise()
+       }
+}
+
+// oldCommit sets the trigger ratio and updates everything
+// derived from it: the absolute trigger, the heap goal, mark pacing,
+// and sweep pacing.
+//
+// This can be called any time. If GC is the in the middle of a
+// concurrent phase, it will adjust the pacing of that phase.
+//
+// This depends on gcPercent, gcController.heapMarked, and
+// gcController.heapLive. These must be up to date.
+//
+// For !goexperiment.PacerRedesign.
+func (c *gcControllerState) oldCommit(triggerRatio float64) {
         // Compute the next GC goal, which is when the allocated heap
         // has grown by GOGC/100 over the heap marked by the last
         // cycle.
@@ -913,3 +1254,38 @@ func readGOGC() int32 {
         }
         return 100
  }
+
+type piController struct {
+       kp float64 // Proportional constant.
+       ti float64 // Integral time constant.
+       tt float64 // Reset time in GC cyles.
+
+       // Period in GC cycles between updates.
+       period float64
+
+       min, max float64 // Output boundaries.
+
+       // PI controller state.
+
+       errIntegral float64 // Integral of the error from t=0 to now.
+}
+
+func (c *piController) next(input, setpoint float64) float64 {
+       // Compute the raw output value.
+       prop := c.kp * (setpoint - input)
+       rawOutput := prop + c.errIntegral
+
+       // Clamp rawOutput into output.
+       output := rawOutput
+       if output < c.min {
+               output = c.min
+       } else if output > c.max {
+               output = c.max
+       }
+
+       // Update the controller's state.
+       if c.ti != 0 && c.tt != 0 {
+               c.errIntegral += (c.kp*c.period/c.ti)*(setpoint-input) + (c.period/c.tt)*(output-rawOutput)
+       }
+       return output
+}
diff --git a/src/runtime/mgcpacer_test.go b/src/runtime/mgcpacer_test.go

index 5a9f46c6d17ae9e1787c2ac56a72f28706d60d07..d2707ca5a1301caf4e9aa42f68590e3e1c7bdaa0 100644 (file)
--- a/src/runtime/mgcpacer_test.go
+++ b/src/runtime/mgcpacer_test.go
@@ -6,6 +6,7 @@ package runtime_test
  
  import (
         "fmt"
+       "internal/goexperiment"
         "math"
         "math/rand"
         . "runtime"
@@ -34,12 +35,76 @@ func TestGcPacer(t *testing.T) {
                         checker: func(t *testing.T, c []gcCycleResult) {
                                 n := len(c)
                                 if n >= 25 {
+                                       if goexperiment.PacerRedesign {
+                                               // For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+                                               // it should be extremely close to the goal utilization.
+                                               assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+                                       }
+
                                         // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
                                         assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
                                         assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
                                 }
                         },
                 },
+               {
+                       // Same as the steady-state case, but lots of stacks to scan relative to the heap size.
+                       name:          "SteadyBigStacks",
+                       gcPercent:     100,
+                       globalsBytes:  32 << 10,
+                       nCores:        8,
+                       allocRate:     constant(132.0),
+                       scanRate:      constant(1024.0),
+                       growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+                       scannableFrac: constant(1.0),
+                       stackBytes:    constant(2048).sum(ramp(128<<20, 8)),
+                       length:        50,
+                       checker: func(t *testing.T, c []gcCycleResult) {
+                               // Check the same conditions as the steady-state case, except the old pacer can't
+                               // really handle this well, so don't check the goal ratio for it.
+                               n := len(c)
+                               if n >= 25 {
+                                       if goexperiment.PacerRedesign {
+                                               // For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+                                               // it should be extremely close to the goal utilization.
+                                               assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+                                               assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+                                       }
+
+                                       // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+                                       assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+                               }
+                       },
+               },
+               {
+                       // Same as the steady-state case, but lots of globals to scan relative to the heap size.
+                       name:          "SteadyBigGlobals",
+                       gcPercent:     100,
+                       globalsBytes:  128 << 20,
+                       nCores:        8,
+                       allocRate:     constant(132.0),
+                       scanRate:      constant(1024.0),
+                       growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+                       scannableFrac: constant(1.0),
+                       stackBytes:    constant(8192),
+                       length:        50,
+                       checker: func(t *testing.T, c []gcCycleResult) {
+                               // Check the same conditions as the steady-state case, except the old pacer can't
+                               // really handle this well, so don't check the goal ratio for it.
+                               n := len(c)
+                               if n >= 25 {
+                                       if goexperiment.PacerRedesign {
+                                               // For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+                                               // it should be extremely close to the goal utilization.
+                                               assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+                                               assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+                                       }
+
+                                       // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+                                       assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+                               }
+                       },
+               },
                 {
                         // This tests the GC pacer's response to a small change in allocation rate.
                         name:          "StepAlloc",
@@ -106,6 +171,47 @@ func TestGcPacer(t *testing.T) {
                                 }
                         },
                 },
+               {
+                       // Tests the pacer for a high GOGC value with a large heap growth happening
+                       // in the middle. The purpose of the large heap growth is to check if GC
+                       // utilization ends up sensitive
+                       name:          "HighGOGC",
+                       gcPercent:     1500,
+                       globalsBytes:  32 << 10,
+                       nCores:        8,
+                       allocRate:     random(7, 0x53).offset(165),
+                       scanRate:      constant(1024.0),
+                       growthRate:    constant(2.0).sum(ramp(-1.0, 12), random(0.01, 0x1), unit(14).delay(25)),
+                       scannableFrac: constant(1.0),
+                       stackBytes:    constant(8192),
+                       length:        50,
+                       checker: func(t *testing.T, c []gcCycleResult) {
+                               n := len(c)
+                               if goexperiment.PacerRedesign && n > 12 {
+                                       if n == 26 {
+                                               // In the 26th cycle there's a heap growth. Overshoot is expected to maintain
+                                               // a stable utilization, but we should *never* overshoot more than GOGC of
+                                               // the next cycle.
+                                               assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 15)
+                                       } else {
+                                               // Give a wider goal range here. With such a high GOGC value we're going to be
+                                               // forced to undershoot.
+                                               //
+                                               // TODO(mknyszek): Instead of placing a 0.95 limit on the trigger, make the limit
+                                               // based on absolute bytes, that's based somewhat in how the minimum heap size
+                                               // is determined.
+                                               assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 1.05)
+                                       }
+
+                                       // Ensure utilization remains stable despite a growth in live heap size
+                                       // at GC #25. This test fails prior to the GC pacer redesign.
+                                       //
+                                       // Because GOGC is so large, we should also be really close to the goal utilization.
+                                       assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, GCGoalUtilization+0.03)
+                                       assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.03)
+                               }
+                       },
+               },
                 {
                         // This test makes sure that in the face of a varying (in this case, oscillating) allocation
                         // rate, the pacer does a reasonably good job of staying abreast of the changes.
@@ -126,7 +232,12 @@ func TestGcPacer(t *testing.T) {
                                         // 1. Utilization isn't varying _too_ much, and
                                         // 2. The pacer is mostly keeping up with the goal.
                                         assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
-                                       assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+                                       if goexperiment.PacerRedesign {
+                                               assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+                                       } else {
+                                               // The old pacer is messier here, and needs a lot more tolerance.
+                                               assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+                                       }
                                 }
                         },
                 },
@@ -149,7 +260,12 @@ func TestGcPacer(t *testing.T) {
                                         // 1. Utilization isn't varying _too_ much, and
                                         // 2. The pacer is mostly keeping up with the goal.
                                         assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
-                                       assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+                                       if goexperiment.PacerRedesign {
+                                               assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+                                       } else {
+                                               // The old pacer is messier here, and needs a lot more tolerance.
+                                               assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+                                       }
                                 }
                         },
                 },
@@ -177,7 +293,12 @@ func TestGcPacer(t *testing.T) {
                                         // Unlike the other tests, GC utilization here will vary more and tend higher.
                                         // Just make sure it's not going too crazy.
                                         assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.05)
-                                       assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07)
+                                       if goexperiment.PacerRedesign {
+                                               assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.05)
+                                       } else {
+                                               // The old pacer is messier here, and needs a little more tolerance.
+                                               assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07)
+                                       }
                                 }
                         },
                 },
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go

index 8787d93d873508b36ebd597b417b1e08dec91b67..9c3f7fd223a943716dbab8549f1d638696fe6d74 100644 (file)
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -77,9 +77,10 @@ type gcWork struct {
         // into work.bytesMarked by dispose.
         bytesMarked uint64
  
-       // Scan work performed on this gcWork. This is aggregated into
+       // Heap scan work performed on this gcWork. This is aggregated into
         // gcController by dispose and may also be flushed by callers.
-       scanWork int64
+       // Other types of scan work are flushed immediately.
+       heapScanWork int64
  
         // flushedWork indicates that a non-empty work buffer was
         // flushed to the global work list since the last gcMarkDone
@@ -274,9 +275,9 @@ func (w *gcWork) dispose() {
                 atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
                 w.bytesMarked = 0
         }
-       if w.scanWork != 0 {
-               atomic.Xaddint64(&gcController.scanWork, w.scanWork)
-               w.scanWork = 0
+       if w.heapScanWork != 0 {
+               gcController.heapScanWork.Add(w.heapScanWork)
+               w.heapScanWork = 0
         }
  }
author	Michael Anthony Knyszek <mknyszek@google.com>
	Tue, 13 Apr 2021 03:07:27 +0000 (03:07 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Thu, 4 Nov 2021 20:00:31 +0000 (20:00 +0000)
src/internal/goexperiment/exp_pacerredesign_off.go	[new file with mode: 0644]	patch \| blob
src/internal/goexperiment/exp_pacerredesign_on.go	[new file with mode: 0644]	patch \| blob
src/internal/goexperiment/flags.go		patch \| blob \| history
src/runtime/export_test.go		patch \| blob \| history
src/runtime/mgc.go		patch \| blob \| history
src/runtime/mgcmark.go		patch \| blob \| history
src/runtime/mgcpacer.go		patch \| blob \| history
src/runtime/mgcpacer_test.go		patch \| blob \| history
src/runtime/mgcwork.go		patch \| blob \| history