}
var Timediv = timediv
+
+type PIController struct {
+ piController
+}
+
+func NewPIController(kp, ti, tt, min, max float64) *PIController {
+ return &PIController{piController{
+ kp: kp,
+ ti: ti,
+ tt: tt,
+ min: min,
+ max: max,
+ }}
+}
+
+func (c *PIController) Next(input, setpoint, period float64) (float64, bool) {
+ return c.piController.next(input, setpoint, period)
+}
// For goexperiment.PacerRedesign.
consMarkController piController
+ _ uint32 // Padding for atomics on 32-bit platforms.
+
// heapGoal is the goal heapLive for when next GC ends.
// Set to ^uint64(0) if disabled.
//
currentConsMark := (float64(c.heapLive-c.trigger) * (utilization + idleUtilization)) /
(float64(scanWork) * (1 - utilization))
- // Update cons/mark controller.
- // Period for this is 1 GC cycle.
+ // Update cons/mark controller. The time period for this is 1 GC cycle.
+ //
+ // This use of a PI controller might seem strange. So, here's an explanation:
+ //
+ // currentConsMark represents the consMark we *should've* had to be perfectly
+ // on-target for this cycle. Given that we assume the next GC will be like this
+ // one in the steady-state, it stands to reason that we should just pick that
+ // as our next consMark. In practice, however, currentConsMark is too noisy:
+ // we're going to be wildly off-target in each GC cycle if we do that.
+ //
+ // What we do instead is make a long-term assumption: there is some steady-state
+ // consMark value, but it's obscured by noise. By constantly shooting for this
+ // noisy-but-perfect consMark value, the controller will bounce around a bit,
+ // but its average behavior, in aggregate, should be less noisy and closer to
+ // the true long-term consMark value, provided its tuned to be slightly overdamped.
+ var ok bool
oldConsMark := c.consMark
- c.consMark = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
+ c.consMark, ok = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
+ if !ok {
+ // The error spiraled out of control. This is incredibly unlikely seeing
+ // as this controller is essentially just a smoothing function, but it might
+ // mean that something went very wrong with how currentConsMark was calculated.
+ // Just reset consMark and keep going.
+ c.consMark = 0
+ }
if debug.gcpacertrace > 0 {
printlock()
print("pacer: ", int(utilization*100), "% CPU (", int(goal), " exp.) for ")
print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.stackScan+c.globalsScan, " B exp.) ")
print("in ", c.trigger, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.heapGoal), ", cons/mark ", oldConsMark, ")")
+ if !ok {
+ print("[controller reset]")
+ }
println()
printunlock()
}
// PI controller state.
errIntegral float64 // Integral of the error from t=0 to now.
+
+ // Error flags.
+ errOverflow bool // Set if errIntegral ever overflowed.
+ inputOverflow bool // Set if an operation with the input overflowed.
}
-func (c *piController) next(input, setpoint, period float64) float64 {
+// next provides a new sample to the controller.
+//
+// input is the sample, setpoint is the desired point, and period is how much
+// time (in whatever unit makes the most sense) has passed since the last sample.
+//
+// Returns a new value for the variable it's controlling, and whether the operation
+// completed successfully. One reason this might fail is if error has been growing
+// in an unbounded manner, to the point of overflow.
+//
+// In the specific case of an error overflow occurs, the errOverflow field will be
+// set and the rest of the controller's internal state will be fully reset.
+func (c *piController) next(input, setpoint, period float64) (float64, bool) {
// Compute the raw output value.
prop := c.kp * (setpoint - input)
rawOutput := prop + c.errIntegral
// Clamp rawOutput into output.
output := rawOutput
+ if isInf(output) || isNaN(output) {
+ // The input had a large enough magnitude that either it was already
+ // overflowed, or some operation with it overflowed.
+ // Set a flag and reset. That's the safest thing to do.
+ c.reset()
+ c.inputOverflow = true
+ return c.min, false
+ }
if output < c.min {
output = c.min
} else if output > c.max {
// Update the controller's state.
if c.ti != 0 && c.tt != 0 {
c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
+ if isInf(c.errIntegral) || isNaN(c.errIntegral) {
+ // So much error has accumulated that we managed to overflow.
+ // The assumptions around the controller have likely broken down.
+ // Set a flag and reset. That's the safest thing to do.
+ c.reset()
+ c.errOverflow = true
+ return c.min, false
+ }
}
- return output
+ return output, true
+}
+
+// reset resets the controller state, except for controller error flags.
+func (c *piController) reset() {
+ c.errIntegral = 0
}
return v
}
}
+
+func FuzzPIController(f *testing.F) {
+ isNormal := func(x float64) bool {
+ return !math.IsInf(x, 0) && !math.IsNaN(x)
+ }
+ isPositive := func(x float64) bool {
+ return isNormal(x) && x > 0
+ }
+ // Seed with constants from controllers in the runtime.
+ // It's not critical that we keep these in sync, they're just
+ // reasonable seed inputs.
+ f.Add(0.3375, 3.2e6, 1e9, 0.001, 1000.0, 0.01)
+ f.Add(0.9, 4.0, 1000.0, -1000.0, 1000.0, 0.84)
+ f.Fuzz(func(t *testing.T, kp, ti, tt, min, max, setPoint float64) {
+ // Ignore uninteresting invalid parameters. These parameters
+ // are constant, so in practice surprising values will be documented
+ // or will be other otherwise immediately visible.
+ //
+ // We just want to make sure that given a non-Inf, non-NaN input,
+ // we always get a non-Inf, non-NaN output.
+ if !isPositive(kp) || !isPositive(ti) || !isPositive(tt) {
+ return
+ }
+ if !isNormal(min) || !isNormal(max) || min > max {
+ return
+ }
+ // Use a random source, but make it deterministic.
+ rs := rand.New(rand.NewSource(800))
+ randFloat64 := func() float64 {
+ return math.Float64frombits(rs.Uint64())
+ }
+ p := NewPIController(kp, ti, tt, min, max)
+ state := float64(0)
+ for i := 0; i < 100; i++ {
+ input := randFloat64()
+ // Ignore the "ok" parameter. We're just trying to break it.
+ // state is intentionally completely uncorrelated with the input.
+ var ok bool
+ state, ok = p.Next(input, setPoint, 1.0)
+ if !isNormal(state) {
+ t.Fatalf("got NaN or Inf result from controller: %f %v", state, ok)
+ }
+ }
+ })
+}
// Sleep/wait state of the background scavenger.
var scavenge struct {
- lock mutex
- g *g
- parked bool
- timer *timer
- sysmonWake uint32 // Set atomically.
+ lock mutex
+ g *g
+ parked bool
+ timer *timer
+ sysmonWake uint32 // Set atomically.
+ printControllerReset bool // Whether the scavenger is in cooldown.
}
// readyForScavenger signals sysmon to wake the scavenger because
max: 1000.0, // 1000:1
}
// It doesn't really matter what value we start at, but we can't be zero, because
- // that'll cause divide-by-zero issues.
- critSleepRatio := 0.001
+ // that'll cause divide-by-zero issues. Pick something conservative which we'll
+ // also use as a fallback.
+ const startingCritSleepRatio = 0.001
+ critSleepRatio := startingCritSleepRatio
+ // Duration left in nanoseconds during which we avoid using the controller and
+ // we hold critSleepRatio at a conservative value. Used if the controller's
+ // assumptions fail to hold.
+ controllerCooldown := int64(0)
for {
released := uintptr(0)
crit := float64(0)
// because of the additional overheads of using scavenged memory.
crit *= 1 + scavengeCostRatio
- // Go to sleep for our current sleepNS.
+ // Go to sleep based on how much time we spent doing work.
slept := scavengeSleep(int64(crit / critSleepRatio))
+ // Stop here if we're cooling down from the controller.
+ if controllerCooldown > 0 {
+ // crit and slept aren't exact measures of time, but it's OK to be a bit
+ // sloppy here. We're just hoping we're avoiding some transient bad behavior.
+ t := slept + int64(crit)
+ if t > controllerCooldown {
+ controllerCooldown = 0
+ } else {
+ controllerCooldown -= t
+ }
+ continue
+ }
+
// Calculate the CPU time spent.
//
// This may be slightly inaccurate with respect to GOMAXPROCS, but we're
cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs))
// Update the critSleepRatio, adjusting until we reach our ideal fraction.
- critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
+ var ok bool
+ critSleepRatio, ok = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
+ if !ok {
+ // The core assumption of the controller, that we can get a proportional
+ // response, broke down. This may be transient, so temporarily switch to
+ // sleeping a fixed, conservative amount.
+ critSleepRatio = startingCritSleepRatio
+ controllerCooldown = 5e9 // 5 seconds.
+
+ // Signal the scav trace printer to output this.
+ lock(&scavenge.lock)
+ scavenge.printControllerReset = true
+ unlock(&scavenge.lock)
+ }
}
}
// released should be the amount of memory released since the last time this
// was called, and forced indicates whether the scavenge was forced by the
// application.
+//
+// scavenge.lock must be held.
func printScavTrace(gen uint32, released uintptr, forced bool) {
+ assertLockHeld(&scavenge.lock)
+
printlock()
print("scav ", gen, " ",
released>>10, " KiB work, ",
)
if forced {
print(" (forced)")
+ } else if scavenge.printControllerReset {
+ print(" [controller reset]")
+ scavenge.printControllerReset = false
}
println()
printunlock()