1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 // MutatorUtil is a change in mutator utilization at a particular
16 // time. Mutator utilization functions are represented as a
17 // time-ordered []MutatorUtil.
18 type MutatorUtil struct {
20 // Util is the mean mutator utilization starting at Time. This
21 // is in the range [0, 1].
25 // UtilFlags controls the behavior of MutatorUtilization.
29 // UtilSTW means utilization should account for STW events.
30 // This includes non-GC STW events, which are typically user-requested.
31 UtilSTW UtilFlags = 1 << iota
32 // UtilBackground means utilization should account for
33 // background mark workers.
35 // UtilAssist means utilization should account for mark
38 // UtilSweep means utilization should account for sweeping.
41 // UtilPerProc means each P should be given a separate
42 // utilization function. Otherwise, there is a single function
43 // and each P is given a fraction of the utilization.
47 // MutatorUtilization returns a set of mutator utilization functions
48 // for the given trace. Each function will always end with 0
49 // utilization. The bounds of each function are implicit in the first
50 // and last event; outside of these bounds each function is undefined.
52 // If the UtilPerProc flag is not given, this always returns a single
53 // utilization function. Otherwise, it returns one function per P.
54 func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
60 // gc > 0 indicates that GC is active on this P.
62 // series the logical series number for this P. This
63 // is necessary because Ps may be removed and then
64 // re-added, and then the new P needs a new series.
70 out := [][]MutatorUtil{}
71 assists := map[uint64]bool{}
72 block := map[uint64]*Event{}
73 bgMark := map[uint64]bool{}
75 for _, ev := range events {
78 gomaxprocs := int(ev.Args[0])
79 if len(ps) > gomaxprocs {
80 if flags&UtilPerProc != 0 {
81 // End each P's series.
82 for _, p := range ps[gomaxprocs:] {
83 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
88 for len(ps) < gomaxprocs {
89 // Start new P's series.
91 if flags&UtilPerProc != 0 || len(out) == 0 {
93 out = append(out, []MutatorUtil{{ev.Ts, 1}})
95 ps = append(ps, perP{series: series})
98 if flags&UtilSTW != 0 {
102 if flags&UtilSTW != 0 {
105 case EvGCMarkAssistStart:
106 if flags&UtilAssist != 0 {
110 case EvGCMarkAssistDone:
111 if flags&UtilAssist != 0 {
113 delete(assists, ev.G)
116 if flags&UtilSweep != 0 {
120 if flags&UtilSweep != 0 {
124 if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
125 // Background mark worker.
127 // If we're in per-proc mode, we don't
128 // count dedicated workers because
129 // they kick all of the goroutines off
130 // that P, so don't directly
131 // contribute to goroutine latency.
132 if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
140 // Unblocked during assist.
143 block[ev.G] = ev.Link
145 if ev != block[ev.G] {
150 // Blocked during assist.
154 // Background mark worker done.
161 if flags&UtilPerProc == 0 {
162 // Compute the current average utilization.
176 mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
178 // Record the utilization change. (Since
179 // len(ps) == len(out), we know len(out) > 0.)
180 out[0] = addUtil(out[0], mu)
182 // Check for per-P utilization changes.
186 if stw > 0 || p.gc > 0 {
189 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
194 // Add final 0 utilization event to any remaining series. This
195 // is important to mark the end of the trace. The exact value
196 // shouldn't matter since no window should extend beyond this,
197 // but using 0 is symmetric with the start of the trace.
198 mu := MutatorUtil{events[len(events)-1].Ts, 0}
200 out[ps[i].series] = addUtil(out[ps[i].series], mu)
205 func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
207 if mu.Util == util[len(util)-1].Util {
211 if mu.Time == util[len(util)-1].Time {
212 // Take the lowest utilization at a time stamp.
213 if mu.Util < util[len(util)-1].Util {
214 util[len(util)-1] = mu
219 return append(util, mu)
222 // totalUtil is total utilization, measured in nanoseconds. This is a
223 // separate type primarily to distinguish it from mean utilization,
224 // which is also a float64.
225 type totalUtil float64
227 func totalUtilOf(meanUtil float64, dur int64) totalUtil {
228 return totalUtil(meanUtil * float64(dur))
231 // mean returns the mean utilization over dur.
232 func (u totalUtil) mean(dur time.Duration) float64 {
233 return float64(u) / float64(dur)
236 // An MMUCurve is the minimum mutator utilization curve across
237 // multiple window sizes.
238 type MMUCurve struct {
242 type mmuSeries struct {
244 // sums[j] is the cumulative sum of util[:j].
246 // bands summarizes util in non-overlapping bands of duration
249 // bandDur is the duration of each band.
253 type mmuBand struct {
254 // minUtil is the minimum instantaneous mutator utilization in
257 // cumUtil is the cumulative total mutator utilization between
258 // time 0 and the left edge of this band.
261 // integrator is the integrator for the left edge of this
263 integrator integrator
266 // NewMMUCurve returns an MMU curve for the given mutator utilization
268 func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
269 series := make([]mmuSeries, len(utils))
270 for i, util := range utils {
271 series[i] = newMMUSeries(util)
273 return &MMUCurve{series}
276 // bandsPerSeries is the number of bands to divide each series into.
277 // This is only changed by tests.
278 var bandsPerSeries = 1000
280 func newMMUSeries(util []MutatorUtil) mmuSeries {
281 // Compute cumulative sum.
282 sums := make([]totalUtil, len(util))
285 for j, u := range util {
286 sum += totalUtilOf(prev.Util, u.Time-prev.Time)
291 // Divide the utilization curve up into equal size
292 // non-overlapping "bands" and compute a summary for each of
295 // Compute the duration of each band.
296 numBands := bandsPerSeries
297 if numBands > len(util) {
298 // There's no point in having lots of bands if there
299 // aren't many events.
302 dur := util[len(util)-1].Time - util[0].Time
303 bandDur := (dur + int64(numBands) - 1) / int64(numBands)
307 // Compute the bands. There are numBands+1 bands in order to
308 // record the final cumulative sum.
309 bands := make([]mmuBand, numBands+1)
310 s := mmuSeries{util, sums, bands, bandDur}
311 leftSum := integrator{&s, 0}
312 for i := range bands {
313 startTime, endTime := s.bandTime(i)
314 cumUtil := leftSum.advance(startTime)
315 predIdx := leftSum.pos
317 for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
318 minUtil = math.Min(minUtil, util[i].Util)
320 bands[i] = mmuBand{minUtil, cumUtil, leftSum}
326 func (s *mmuSeries) bandTime(i int) (start, end int64) {
327 start = int64(i)*s.bandDur + s.util[0].Time
328 end = start + s.bandDur
332 type bandUtil struct {
333 // Utilization series index
337 // Lower bound of mutator utilization for all windows
338 // with a left edge in this band.
342 type bandUtilHeap []bandUtil
344 func (h bandUtilHeap) Len() int {
348 func (h bandUtilHeap) Less(i, j int) bool {
349 return h[i].utilBound < h[j].utilBound
352 func (h bandUtilHeap) Swap(i, j int) {
353 h[i], h[j] = h[j], h[i]
356 func (h *bandUtilHeap) Push(x any) {
357 *h = append(*h, x.(bandUtil))
360 func (h *bandUtilHeap) Pop() any {
362 *h = (*h)[:len(*h)-1]
366 // UtilWindow is a specific window at Time.
367 type UtilWindow struct {
369 // MutatorUtil is the mean mutator utilization in this window.
373 type utilHeap []UtilWindow
375 func (h utilHeap) Len() int {
379 func (h utilHeap) Less(i, j int) bool {
380 if h[i].MutatorUtil != h[j].MutatorUtil {
381 return h[i].MutatorUtil > h[j].MutatorUtil
383 return h[i].Time > h[j].Time
386 func (h utilHeap) Swap(i, j int) {
387 h[i], h[j] = h[j], h[i]
390 func (h *utilHeap) Push(x any) {
391 *h = append(*h, x.(UtilWindow))
394 func (h *utilHeap) Pop() any {
396 *h = (*h)[:len(*h)-1]
400 // An accumulator takes a windowed mutator utilization function and
401 // tracks various statistics for that function.
402 type accumulator struct {
405 // bound is the mutator utilization bound where adding any
406 // mutator utilization above this bound cannot affect the
407 // accumulated statistics.
410 // Worst N window tracking
414 // Mutator utilization distribution tracking
416 // preciseMass is the distribution mass that must be precise
417 // before accumulation is stopped.
419 // lastTime and lastMU are the previous point added to the
420 // windowed mutator utilization function.
425 // resetTime declares a discontinuity in the windowed mutator
426 // utilization function by resetting the current time.
427 func (acc *accumulator) resetTime() {
428 // This only matters for distribution collection, since that's
429 // the only thing that depends on the progression of the
430 // windowed mutator utilization function.
431 acc.lastTime = math.MaxInt64
434 // addMU adds a point to the windowed mutator utilization function at
435 // (time, mu). This must be called for monotonically increasing values
438 // It returns true if further calls to addMU would be pointless.
439 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
446 // If the minimum has reached zero, it can't go any
447 // lower, so we can stop early.
451 // Consider adding this window to the n worst.
452 if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
453 // This window is lower than the K'th worst window.
455 // Check if there's any overlapping window
456 // already in the heap and keep whichever is
458 for i, ui := range acc.wHeap {
459 if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
460 if ui.MutatorUtil <= mu {
461 // Keep the first window.
464 // Replace it with this window.
465 heap.Remove(&acc.wHeap, i)
471 heap.Push(&acc.wHeap, UtilWindow{time, mu})
472 if len(acc.wHeap) > acc.nWorst {
478 if len(acc.wHeap) < acc.nWorst {
479 // We don't have N windows yet, so keep accumulating.
482 // Anything above the least worst window has no effect.
483 acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
487 if acc.lastTime != math.MaxInt64 {
488 // Update distribution.
489 acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
491 acc.lastTime, acc.lastMU = time, mu
492 if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
493 acc.bound = math.Max(acc.bound, mudBound)
495 // We haven't accumulated enough total precise
496 // mass yet to even reach our goal, so keep
500 // It's not worth checking percentiles every time, so
501 // just keep accumulating this band.
505 // If we've found enough 0 utilizations, we can stop immediately.
506 return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
509 // MMU returns the minimum mutator utilization for the given time
510 // window. This is the minimum utilization for all windows of this
511 // duration across the execution. The returned value is in the range
513 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
514 acc := accumulator{mmu: 1.0, bound: 1.0}
519 // Examples returns n specific examples of the lowest mutator
520 // utilization for the given window size. The returned windows will be
521 // disjoint (otherwise there would be a huge number of
522 // mostly-overlapping windows at the single lowest point). There are
523 // no guarantees on which set of disjoint windows this returns.
524 func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
525 acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
527 sort.Sort(sort.Reverse(acc.wHeap))
528 return ([]UtilWindow)(acc.wHeap)
531 // MUD returns mutator utilization distribution quantiles for the
532 // given window size.
534 // The mutator utilization distribution is the distribution of mean
535 // mutator utilization across all windows of the given window size in
538 // The minimum mutator utilization is the minimum (0th percentile) of
539 // this distribution. (However, if only the minimum is desired, it's
540 // more efficient to use the MMU method.)
541 func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
542 if len(quantiles) == 0 {
546 // Each unrefined band contributes a known total mass to the
547 // distribution (bandDur except at the end), but in an unknown
548 // way. However, we know that all the mass it contributes must
549 // be at or above its worst-case mean mutator utilization.
551 // Hence, we refine bands until the highest desired
552 // distribution quantile is less than the next worst-case mean
553 // mutator utilization. At this point, all further
554 // contributions to the distribution must be beyond the
555 // desired quantile and hence cannot affect it.
557 // First, find the highest desired distribution quantile.
559 for _, q := range quantiles {
564 // The distribution's mass is in units of time (it's not
565 // normalized because this would make it more annoying to
566 // account for future contributions of unrefined bands). The
567 // total final mass will be the duration of the trace itself
568 // minus the window size. Using this, we can compute the mass
569 // corresponding to quantile maxQ.
571 for _, s := range c.series {
572 duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
573 if duration1 >= int64(window) {
574 duration += duration1 - int64(window)
577 qMass := float64(duration) * maxQ
579 // Accumulate the MUD until we have precise information for
580 // everything to the left of qMass.
581 acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
582 acc.mud.setTrackMass(qMass)
585 // Evaluate the quantiles on the accumulated MUD.
586 out := make([]float64, len(quantiles))
588 mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
590 // There are a few legitimate ways this can
593 // 1. If the window is the full trace
594 // duration, then the windowed MU function is
595 // only defined at a single point, so the MU
596 // distribution is not well-defined.
598 // 2. If there are no events, then the MU
599 // distribution has no mass.
601 // Either way, all of the quantiles will have
602 // converged toward the MMU at this point.
610 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
616 var bandU bandUtilHeap
617 windows := make([]time.Duration, len(c.series))
618 for i, s := range c.series {
620 if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
624 bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
628 bandU = append(bandU, bandU1...)
632 // Process bands from lowest utilization bound to highest.
635 // Refine each band into a precise window and MMU until
636 // refining the next lowest band can no longer affect the MMU
638 for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
640 c.series[i].bandMMU(bandU[0].i, windows[i], acc)
645 func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
646 // For each band, compute the worst-possible total mutator
647 // utilization for all windows that start in that band.
649 // minBands is the minimum number of bands a window can span
650 // and maxBands is the maximum number of bands a window can
651 // span in any alignment.
652 minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
653 maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
654 if window > 1 && maxBands < 2 {
655 panic("maxBands < 2")
657 tailDur := int64(window) % c.bandDur
658 nUtil := len(c.bands) - maxBands + 1
662 bandU := make([]bandUtil, nUtil)
663 for i := range bandU {
664 // To compute the worst-case MU, we assume the minimum
665 // for any bands that are only partially overlapped by
666 // some window and the mean for any bands that are
667 // completely covered by all windows.
670 // Find the lowest and second lowest of the partial
672 l := c.bands[i].minUtil
673 r1 := c.bands[i+minBands-1].minUtil
674 r2 := c.bands[i+maxBands-1].minUtil
675 minBand := math.Min(l, math.Min(r1, r2))
676 // Assume the worst window maximally overlaps the
677 // worst minimum and then the rest overlaps the second
680 util += totalUtilOf(minBand, int64(window))
682 util += totalUtilOf(minBand, c.bandDur)
686 midBand = math.Min(r1, r2)
688 midBand = math.Min(l, r2)
690 midBand = math.Min(l, r1)
692 util += totalUtilOf(midBand, tailDur)
695 // Add the total mean MU of bands that are completely
696 // overlapped by all windows.
698 util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
701 bandU[i] = bandUtil{series, i, util.mean(window)}
707 // bandMMU computes the precise minimum mutator utilization for
708 // windows with a left edge in band bandIdx.
709 func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
712 // We think of the mutator utilization over time as the
713 // box-filtered utilization function, which we call the
714 // "windowed mutator utilization function". The resulting
715 // function is continuous and piecewise linear (unless
716 // window==0, which we handle elsewhere), where the boundaries
717 // between segments occur when either edge of the window
718 // encounters a change in the instantaneous mutator
719 // utilization function. Hence, the minimum of this function
720 // will always occur when one of the edges of the window
721 // aligns with a utilization change, so these are the only
722 // points we need to consider.
724 // We compute the mutator utilization function incrementally
725 // by tracking the integral from t=0 to the left edge of the
726 // window and to the right edge of the window.
727 left := c.bands[bandIdx].integrator
729 time, endTime := c.bandTime(bandIdx)
730 if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
735 // Advance edges to time and time+window.
736 mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
737 if acc.addMU(time, mu, window) {
744 // The maximum slope of the windowed mutator
745 // utilization function is 1/window, so we can always
746 // advance the time by at least (mu - mmu) * window
747 // without dropping below mmu.
748 minTime := time + int64((mu-acc.bound)*float64(window))
750 // Advance the window to the next time where either
751 // the left or right edge of the window encounters a
752 // change in the utilization curve.
753 if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
762 // For MMUs we could stop here, but for MUDs
763 // it's important that we span the entire
770 // An integrator tracks a position in a utilization function and
772 type integrator struct {
774 // pos is the index in u.util of the current time's non-strict
779 // advance returns the integral of the utilization function from 0 to
780 // time. advance must be called on monotonically increasing values of
782 func (in *integrator) advance(time int64) totalUtil {
783 util, pos := in.u.util, in.pos
784 // Advance pos until pos+1 is time's strict successor (making
785 // pos time's non-strict predecessor).
787 // Very often, this will be nearby, so we optimize that case,
788 // but it may be arbitrarily far away, so we handled that
791 if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
792 // Nearby. Use a linear scan.
793 for pos+1 < len(util) && util[pos+1].Time <= time {
797 // Far. Binary search for time's strict successor.
798 l, r := pos, len(util)
800 h := int(uint(l+r) >> 1)
801 if util[h].Time <= time {
807 pos = l - 1 // Non-strict predecessor.
810 var partial totalUtil
811 if time != util[pos].Time {
812 partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
814 return in.u.sums[pos] + partial
817 // next returns the smallest time t' > time of a change in the
818 // utilization function.
819 func (in *integrator) next(time int64) int64 {
820 for _, u := range in.u.util[in.pos:] {