src/internal/trace/gc.go

   1 // Copyright 2017 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package trace
   6
   7 import (
   8         "container/heap"
   9         tracev2 "internal/trace/v2"
  10         "io"
  11         "math"
  12         "sort"
  13         "strings"
  14         "time"
  15 )
  16
  17 // MutatorUtil is a change in mutator utilization at a particular
  18 // time. Mutator utilization functions are represented as a
  19 // time-ordered []MutatorUtil.
  20 type MutatorUtil struct {
  21         Time int64
  22         // Util is the mean mutator utilization starting at Time. This
  23         // is in the range [0, 1].
  24         Util float64
  25 }
  26
  27 // UtilFlags controls the behavior of MutatorUtilization.
  28 type UtilFlags int
  29
  30 const (
  31         // UtilSTW means utilization should account for STW events.
  32         // This includes non-GC STW events, which are typically user-requested.
  33         UtilSTW UtilFlags = 1 << iota
  34         // UtilBackground means utilization should account for
  35         // background mark workers.
  36         UtilBackground
  37         // UtilAssist means utilization should account for mark
  38         // assists.
  39         UtilAssist
  40         // UtilSweep means utilization should account for sweeping.
  41         UtilSweep
  42
  43         // UtilPerProc means each P should be given a separate
  44         // utilization function. Otherwise, there is a single function
  45         // and each P is given a fraction of the utilization.
  46         UtilPerProc
  47 )
  48
  49 // MutatorUtilization returns a set of mutator utilization functions
  50 // for the given trace. Each function will always end with 0
  51 // utilization. The bounds of each function are implicit in the first
  52 // and last event; outside of these bounds each function is undefined.
  53 //
  54 // If the UtilPerProc flag is not given, this always returns a single
  55 // utilization function. Otherwise, it returns one function per P.
  56 func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
  57         if len(events) == 0 {
  58                 return nil
  59         }
  60
  61         type perP struct {
  62                 // gc > 0 indicates that GC is active on this P.
  63                 gc int
  64                 // series the logical series number for this P. This
  65                 // is necessary because Ps may be removed and then
  66                 // re-added, and then the new P needs a new series.
  67                 series int
  68         }
  69         ps := []perP{}
  70         stw := 0
  71
  72         out := [][]MutatorUtil{}
  73         assists := map[uint64]bool{}
  74         block := map[uint64]*Event{}
  75         bgMark := map[uint64]bool{}
  76
  77         for _, ev := range events {
  78                 switch ev.Type {
  79                 case EvGomaxprocs:
  80                         gomaxprocs := int(ev.Args[0])
  81                         if len(ps) > gomaxprocs {
  82                                 if flags&UtilPerProc != 0 {
  83                                         // End each P's series.
  84                                         for _, p := range ps[gomaxprocs:] {
  85                                                 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
  86                                         }
  87                                 }
  88                                 ps = ps[:gomaxprocs]
  89                         }
  90                         for len(ps) < gomaxprocs {
  91                                 // Start new P's series.
  92                                 series := 0
  93                                 if flags&UtilPerProc != 0 || len(out) == 0 {
  94                                         series = len(out)
  95                                         out = append(out, []MutatorUtil{{ev.Ts, 1}})
  96                                 }
  97                                 ps = append(ps, perP{series: series})
  98                         }
  99                 case EvSTWStart:
 100                         if flags&UtilSTW != 0 {
 101                                 stw++
 102                         }
 103                 case EvSTWDone:
 104                         if flags&UtilSTW != 0 {
 105                                 stw--
 106                         }
 107                 case EvGCMarkAssistStart:
 108                         if flags&UtilAssist != 0 {
 109                                 ps[ev.P].gc++
 110                                 assists[ev.G] = true
 111                         }
 112                 case EvGCMarkAssistDone:
 113                         if flags&UtilAssist != 0 {
 114                                 ps[ev.P].gc--
 115                                 delete(assists, ev.G)
 116                         }
 117                 case EvGCSweepStart:
 118                         if flags&UtilSweep != 0 {
 119                                 ps[ev.P].gc++
 120                         }
 121                 case EvGCSweepDone:
 122                         if flags&UtilSweep != 0 {
 123                                 ps[ev.P].gc--
 124                         }
 125                 case EvGoStartLabel:
 126                         if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
 127                                 // Background mark worker.
 128                                 //
 129                                 // If we're in per-proc mode, we don't
 130                                 // count dedicated workers because
 131                                 // they kick all of the goroutines off
 132                                 // that P, so don't directly
 133                                 // contribute to goroutine latency.
 134                                 if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
 135                                         bgMark[ev.G] = true
 136                                         ps[ev.P].gc++
 137                                 }
 138                         }
 139                         fallthrough
 140                 case EvGoStart:
 141                         if assists[ev.G] {
 142                                 // Unblocked during assist.
 143                                 ps[ev.P].gc++
 144                         }
 145                         block[ev.G] = ev.Link
 146                 default:
 147                         if ev != block[ev.G] {
 148                                 continue
 149                         }
 150
 151                         if assists[ev.G] {
 152                                 // Blocked during assist.
 153                                 ps[ev.P].gc--
 154                         }
 155                         if bgMark[ev.G] {
 156                                 // Background mark worker done.
 157                                 ps[ev.P].gc--
 158                                 delete(bgMark, ev.G)
 159                         }
 160                         delete(block, ev.G)
 161                 }
 162
 163                 if flags&UtilPerProc == 0 {
 164                         // Compute the current average utilization.
 165                         if len(ps) == 0 {
 166                                 continue
 167                         }
 168                         gcPs := 0
 169                         if stw > 0 {
 170                                 gcPs = len(ps)
 171                         } else {
 172                                 for i := range ps {
 173                                         if ps[i].gc > 0 {
 174                                                 gcPs++
 175                                         }
 176                                 }
 177                         }
 178                         mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
 179
 180                         // Record the utilization change. (Since
 181                         // len(ps) == len(out), we know len(out) > 0.)
 182                         out[0] = addUtil(out[0], mu)
 183                 } else {
 184                         // Check for per-P utilization changes.
 185                         for i := range ps {
 186                                 p := &ps[i]
 187                                 util := 1.0
 188                                 if stw > 0 || p.gc > 0 {
 189                                         util = 0.0
 190                                 }
 191                                 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
 192                         }
 193                 }
 194         }
 195
 196         // Add final 0 utilization event to any remaining series. This
 197         // is important to mark the end of the trace. The exact value
 198         // shouldn't matter since no window should extend beyond this,
 199         // but using 0 is symmetric with the start of the trace.
 200         mu := MutatorUtil{events[len(events)-1].Ts, 0}
 201         for i := range ps {
 202                 out[ps[i].series] = addUtil(out[ps[i].series], mu)
 203         }
 204         return out
 205 }
 206
 207 // MutatorUtilizationV2 returns a set of mutator utilization functions
 208 // for the given v2 trace, passed as an io.Reader. Each function will
 209 // always end with 0 utilization. The bounds of each function are implicit
 210 // in the first and last event; outside of these bounds each function is
 211 // undefined.
 212 //
 213 // If the UtilPerProc flag is not given, this always returns a single
 214 // utilization function. Otherwise, it returns one function per P.
 215 func MutatorUtilizationV2(trace io.Reader, flags UtilFlags) ([][]MutatorUtil, error) {
 216         // Create a reader.
 217         r, err := tracev2.NewReader(trace)
 218         if err != nil {
 219                 return nil, err
 220         }
 221
 222         // Set up a bunch of analysis state.
 223         type perP struct {
 224                 // gc > 0 indicates that GC is active on this P.
 225                 gc int
 226                 // series the logical series number for this P. This
 227                 // is necessary because Ps may be removed and then
 228                 // re-added, and then the new P needs a new series.
 229                 series int
 230         }
 231         type procsCount struct {
 232                 // time at which procs changed.
 233                 time int64
 234                 // n is the number of procs at that point.
 235                 n int
 236         }
 237         out := [][]MutatorUtil{}
 238         stw := 0
 239         ps := []perP{}
 240         inGC := make(map[tracev2.GoID]bool)
 241         states := make(map[tracev2.GoID]tracev2.GoState)
 242         bgMark := make(map[tracev2.GoID]bool)
 243         procs := []procsCount{}
 244         seenSync := false
 245
 246         // Helpers.
 247         handleSTW := func(r tracev2.Range) bool {
 248                 return flags&UtilSTW != 0 && isGCSTW(r)
 249         }
 250         handleMarkAssist := func(r tracev2.Range) bool {
 251                 return flags&UtilAssist != 0 && isGCMarkAssist(r)
 252         }
 253         handleSweep := func(r tracev2.Range) bool {
 254                 return flags&UtilSweep != 0 && isGCSweep(r)
 255         }
 256
 257         // Iterate through the trace, tracking mutator utilization.
 258         var lastEv tracev2.Event
 259         for {
 260                 // Read a single event.
 261                 ev, err := r.ReadEvent()
 262                 if err == io.EOF {
 263                         break
 264                 }
 265                 if err != nil {
 266                         return nil, err
 267                 }
 268                 lastEv = ev
 269
 270                 // Process the event.
 271                 switch ev.Kind() {
 272                 case tracev2.EventSync:
 273                         seenSync = true
 274                 case tracev2.EventMetric:
 275                         m := ev.Metric()
 276                         if m.Name != "/sched/gomaxprocs:threads" {
 277                                 break
 278                         }
 279                         gomaxprocs := int(m.Value.Uint64())
 280                         if len(ps) > gomaxprocs {
 281                                 if flags&UtilPerProc != 0 {
 282                                         // End each P's series.
 283                                         for _, p := range ps[gomaxprocs:] {
 284                                                 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0})
 285                                         }
 286                                 }
 287                                 ps = ps[:gomaxprocs]
 288                         }
 289                         for len(ps) < gomaxprocs {
 290                                 // Start new P's series.
 291                                 series := 0
 292                                 if flags&UtilPerProc != 0 || len(out) == 0 {
 293                                         series = len(out)
 294                                         out = append(out, []MutatorUtil{{int64(ev.Time()), 1}})
 295                                 }
 296                                 ps = append(ps, perP{series: series})
 297                         }
 298                         if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n {
 299                                 procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs})
 300                         }
 301                 }
 302                 if len(ps) == 0 {
 303                         // We can't start doing any analysis until we see what GOMAXPROCS is.
 304                         // It will show up very early in the trace, but we need to be robust to
 305                         // something else being emitted beforehand.
 306                         continue
 307                 }
 308
 309                 switch ev.Kind() {
 310                 case tracev2.EventRangeActive:
 311                         if seenSync {
 312                                 // If we've seen a sync, then we can be sure we're not finding out about
 313                                 // something late; we have complete information after that point, and these
 314                                 // active events will just be redundant.
 315                                 break
 316                         }
 317                         // This range is active back to the start of the trace. We're failing to account
 318                         // for this since we just found out about it now. Fix up the mutator utilization.
 319                         //
 320                         // N.B. A trace can't start during a STW, so we don't handle it here.
 321                         r := ev.Range()
 322                         switch {
 323                         case handleMarkAssist(r):
 324                                 if !states[ev.Goroutine()].Executing() {
 325                                         // If the goroutine isn't executing, then the fact that it was in mark
 326                                         // assist doesn't actually count.
 327                                         break
 328                                 }
 329                                 // This G has been in a mark assist *and running on its P* since the start
 330                                 // of the trace.
 331                                 fallthrough
 332                         case handleSweep(r):
 333                                 // This P has been in sweep (or mark assist, from above) in the start of the trace.
 334                                 //
 335                                 // We don't need to do anything if UtilPerProc is set. If we get an event like
 336                                 // this for a running P, it must show up the first time a P is mentioned. Therefore,
 337                                 // this P won't actually have any MutatorUtils on its list yet.
 338                                 //
 339                                 // However, if UtilPerProc isn't set, then we probably have data from other procs
 340                                 // and from previous events. We need to fix that up.
 341                                 if flags&UtilPerProc != 0 {
 342                                         break
 343                                 }
 344                                 // Subtract out 1/gomaxprocs mutator utilization for all time periods
 345                                 // from the beginning of the trace until now.
 346                                 mi, pi := 0, 0
 347                                 for mi < len(out[0]) {
 348                                         if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
 349                                                 pi++
 350                                                 continue
 351                                         }
 352                                         out[0][mi].Util -= float64(1) / float64(procs[pi].n)
 353                                         if out[0][mi].Util < 0 {
 354                                                 out[0][mi].Util = 0
 355                                         }
 356                                         mi++
 357                                 }
 358                         }
 359                         // After accounting for the portion we missed, this just acts like the
 360                         // beginning of a new range.
 361                         fallthrough
 362                 case tracev2.EventRangeBegin:
 363                         r := ev.Range()
 364                         if handleSTW(r) {
 365                                 stw++
 366                         } else if handleSweep(r) {
 367                                 ps[ev.Proc()].gc++
 368                         } else if handleMarkAssist(r) {
 369                                 ps[ev.Proc()].gc++
 370                                 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine {
 371                                         inGC[g] = true
 372                                 }
 373                         }
 374                 case tracev2.EventRangeEnd:
 375                         r := ev.Range()
 376                         if handleSTW(r) {
 377                                 stw--
 378                         } else if handleSweep(r) {
 379                                 ps[ev.Proc()].gc--
 380                         } else if handleMarkAssist(r) {
 381                                 ps[ev.Proc()].gc--
 382                                 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine {
 383                                         delete(inGC, g)
 384                                 }
 385                         }
 386                 case tracev2.EventStateTransition:
 387                         st := ev.StateTransition()
 388                         if st.Resource.Kind != tracev2.ResourceGoroutine {
 389                                 break
 390                         }
 391                         old, new := st.Goroutine()
 392                         g := st.Resource.Goroutine()
 393                         if inGC[g] || bgMark[g] {
 394                                 if !old.Executing() && new.Executing() {
 395                                         // Started running while doing GC things.
 396                                         ps[ev.Proc()].gc++
 397                                 } else if old.Executing() && !new.Executing() {
 398                                         // Stopped running while doing GC things.
 399                                         ps[ev.Proc()].gc--
 400                                 }
 401                         }
 402                         states[g] = new
 403                 case tracev2.EventLabel:
 404                         l := ev.Label()
 405                         if flags&UtilBackground != 0 && strings.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" {
 406                                 // Background mark worker.
 407                                 //
 408                                 // If we're in per-proc mode, we don't
 409                                 // count dedicated workers because
 410                                 // they kick all of the goroutines off
 411                                 // that P, so don't directly
 412                                 // contribute to goroutine latency.
 413                                 if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") {
 414                                         bgMark[ev.Goroutine()] = true
 415                                         ps[ev.Proc()].gc++
 416                                 }
 417                         }
 418                 }
 419
 420                 if flags&UtilPerProc == 0 {
 421                         // Compute the current average utilization.
 422                         if len(ps) == 0 {
 423                                 continue
 424                         }
 425                         gcPs := 0
 426                         if stw > 0 {
 427                                 gcPs = len(ps)
 428                         } else {
 429                                 for i := range ps {
 430                                         if ps[i].gc > 0 {
 431                                                 gcPs++
 432                                         }
 433                                 }
 434                         }
 435                         mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))}
 436
 437                         // Record the utilization change. (Since
 438                         // len(ps) == len(out), we know len(out) > 0.)
 439                         out[0] = addUtil(out[0], mu)
 440                 } else {
 441                         // Check for per-P utilization changes.
 442                         for i := range ps {
 443                                 p := &ps[i]
 444                                 util := 1.0
 445                                 if stw > 0 || p.gc > 0 {
 446                                         util = 0.0
 447                                 }
 448                                 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util})
 449                         }
 450                 }
 451         }
 452
 453         // No events in the stream.
 454         if lastEv.Kind() == tracev2.EventBad {
 455                 return nil, nil
 456         }
 457
 458         // Add final 0 utilization event to any remaining series. This
 459         // is important to mark the end of the trace. The exact value
 460         // shouldn't matter since no window should extend beyond this,
 461         // but using 0 is symmetric with the start of the trace.
 462         mu := MutatorUtil{int64(lastEv.Time()), 0}
 463         for i := range ps {
 464                 out[ps[i].series] = addUtil(out[ps[i].series], mu)
 465         }
 466         return out, nil
 467 }
 468
 469 func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
 470         if len(util) > 0 {
 471                 if mu.Util == util[len(util)-1].Util {
 472                         // No change.
 473                         return util
 474                 }
 475                 if mu.Time == util[len(util)-1].Time {
 476                         // Take the lowest utilization at a time stamp.
 477                         if mu.Util < util[len(util)-1].Util {
 478                                 util[len(util)-1] = mu
 479                         }
 480                         return util
 481                 }
 482         }
 483         return append(util, mu)
 484 }
 485
 486 // totalUtil is total utilization, measured in nanoseconds. This is a
 487 // separate type primarily to distinguish it from mean utilization,
 488 // which is also a float64.
 489 type totalUtil float64
 490
 491 func totalUtilOf(meanUtil float64, dur int64) totalUtil {
 492         return totalUtil(meanUtil * float64(dur))
 493 }
 494
 495 // mean returns the mean utilization over dur.
 496 func (u totalUtil) mean(dur time.Duration) float64 {
 497         return float64(u) / float64(dur)
 498 }
 499
 500 // An MMUCurve is the minimum mutator utilization curve across
 501 // multiple window sizes.
 502 type MMUCurve struct {
 503         series []mmuSeries
 504 }
 505
 506 type mmuSeries struct {
 507         util []MutatorUtil
 508         // sums[j] is the cumulative sum of util[:j].
 509         sums []totalUtil
 510         // bands summarizes util in non-overlapping bands of duration
 511         // bandDur.
 512         bands []mmuBand
 513         // bandDur is the duration of each band.
 514         bandDur int64
 515 }
 516
 517 type mmuBand struct {
 518         // minUtil is the minimum instantaneous mutator utilization in
 519         // this band.
 520         minUtil float64
 521         // cumUtil is the cumulative total mutator utilization between
 522         // time 0 and the left edge of this band.
 523         cumUtil totalUtil
 524
 525         // integrator is the integrator for the left edge of this
 526         // band.
 527         integrator integrator
 528 }
 529
 530 // NewMMUCurve returns an MMU curve for the given mutator utilization
 531 // function.
 532 func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
 533         series := make([]mmuSeries, len(utils))
 534         for i, util := range utils {
 535                 series[i] = newMMUSeries(util)
 536         }
 537         return &MMUCurve{series}
 538 }
 539
 540 // bandsPerSeries is the number of bands to divide each series into.
 541 // This is only changed by tests.
 542 var bandsPerSeries = 1000
 543
 544 func newMMUSeries(util []MutatorUtil) mmuSeries {
 545         // Compute cumulative sum.
 546         sums := make([]totalUtil, len(util))
 547         var prev MutatorUtil
 548         var sum totalUtil
 549         for j, u := range util {
 550                 sum += totalUtilOf(prev.Util, u.Time-prev.Time)
 551                 sums[j] = sum
 552                 prev = u
 553         }
 554
 555         // Divide the utilization curve up into equal size
 556         // non-overlapping "bands" and compute a summary for each of
 557         // these bands.
 558         //
 559         // Compute the duration of each band.
 560         numBands := bandsPerSeries
 561         if numBands > len(util) {
 562                 // There's no point in having lots of bands if there
 563                 // aren't many events.
 564                 numBands = len(util)
 565         }
 566         dur := util[len(util)-1].Time - util[0].Time
 567         bandDur := (dur + int64(numBands) - 1) / int64(numBands)
 568         if bandDur < 1 {
 569                 bandDur = 1
 570         }
 571         // Compute the bands. There are numBands+1 bands in order to
 572         // record the final cumulative sum.
 573         bands := make([]mmuBand, numBands+1)
 574         s := mmuSeries{util, sums, bands, bandDur}
 575         leftSum := integrator{&s, 0}
 576         for i := range bands {
 577                 startTime, endTime := s.bandTime(i)
 578                 cumUtil := leftSum.advance(startTime)
 579                 predIdx := leftSum.pos
 580                 minUtil := 1.0
 581                 for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
 582                         minUtil = math.Min(minUtil, util[i].Util)
 583                 }
 584                 bands[i] = mmuBand{minUtil, cumUtil, leftSum}
 585         }
 586
 587         return s
 588 }
 589
 590 func (s *mmuSeries) bandTime(i int) (start, end int64) {
 591         start = int64(i)*s.bandDur + s.util[0].Time
 592         end = start + s.bandDur
 593         return
 594 }
 595
 596 type bandUtil struct {
 597         // Utilization series index
 598         series int
 599         // Band index
 600         i int
 601         // Lower bound of mutator utilization for all windows
 602         // with a left edge in this band.
 603         utilBound float64
 604 }
 605
 606 type bandUtilHeap []bandUtil
 607
 608 func (h bandUtilHeap) Len() int {
 609         return len(h)
 610 }
 611
 612 func (h bandUtilHeap) Less(i, j int) bool {
 613         return h[i].utilBound < h[j].utilBound
 614 }
 615
 616 func (h bandUtilHeap) Swap(i, j int) {
 617         h[i], h[j] = h[j], h[i]
 618 }
 619
 620 func (h *bandUtilHeap) Push(x any) {
 621         *h = append(*h, x.(bandUtil))
 622 }
 623
 624 func (h *bandUtilHeap) Pop() any {
 625         x := (*h)[len(*h)-1]
 626         *h = (*h)[:len(*h)-1]
 627         return x
 628 }
 629
 630 // UtilWindow is a specific window at Time.
 631 type UtilWindow struct {
 632         Time int64
 633         // MutatorUtil is the mean mutator utilization in this window.
 634         MutatorUtil float64
 635 }
 636
 637 type utilHeap []UtilWindow
 638
 639 func (h utilHeap) Len() int {
 640         return len(h)
 641 }
 642
 643 func (h utilHeap) Less(i, j int) bool {
 644         if h[i].MutatorUtil != h[j].MutatorUtil {
 645                 return h[i].MutatorUtil > h[j].MutatorUtil
 646         }
 647         return h[i].Time > h[j].Time
 648 }
 649
 650 func (h utilHeap) Swap(i, j int) {
 651         h[i], h[j] = h[j], h[i]
 652 }
 653
 654 func (h *utilHeap) Push(x any) {
 655         *h = append(*h, x.(UtilWindow))
 656 }
 657
 658 func (h *utilHeap) Pop() any {
 659         x := (*h)[len(*h)-1]
 660         *h = (*h)[:len(*h)-1]
 661         return x
 662 }
 663
 664 // An accumulator takes a windowed mutator utilization function and
 665 // tracks various statistics for that function.
 666 type accumulator struct {
 667         mmu float64
 668
 669         // bound is the mutator utilization bound where adding any
 670         // mutator utilization above this bound cannot affect the
 671         // accumulated statistics.
 672         bound float64
 673
 674         // Worst N window tracking
 675         nWorst int
 676         wHeap  utilHeap
 677
 678         // Mutator utilization distribution tracking
 679         mud *mud
 680         // preciseMass is the distribution mass that must be precise
 681         // before accumulation is stopped.
 682         preciseMass float64
 683         // lastTime and lastMU are the previous point added to the
 684         // windowed mutator utilization function.
 685         lastTime int64
 686         lastMU   float64
 687 }
 688
 689 // resetTime declares a discontinuity in the windowed mutator
 690 // utilization function by resetting the current time.
 691 func (acc *accumulator) resetTime() {
 692         // This only matters for distribution collection, since that's
 693         // the only thing that depends on the progression of the
 694         // windowed mutator utilization function.
 695         acc.lastTime = math.MaxInt64
 696 }
 697
 698 // addMU adds a point to the windowed mutator utilization function at
 699 // (time, mu). This must be called for monotonically increasing values
 700 // of time.
 701 //
 702 // It returns true if further calls to addMU would be pointless.
 703 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
 704         if mu < acc.mmu {
 705                 acc.mmu = mu
 706         }
 707         acc.bound = acc.mmu
 708
 709         if acc.nWorst == 0 {
 710                 // If the minimum has reached zero, it can't go any
 711                 // lower, so we can stop early.
 712                 return mu == 0
 713         }
 714
 715         // Consider adding this window to the n worst.
 716         if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
 717                 // This window is lower than the K'th worst window.
 718                 //
 719                 // Check if there's any overlapping window
 720                 // already in the heap and keep whichever is
 721                 // worse.
 722                 for i, ui := range acc.wHeap {
 723                         if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
 724                                 if ui.MutatorUtil <= mu {
 725                                         // Keep the first window.
 726                                         goto keep
 727                                 } else {
 728                                         // Replace it with this window.
 729                                         heap.Remove(&acc.wHeap, i)
 730                                         break
 731                                 }
 732                         }
 733                 }
 734
 735                 heap.Push(&acc.wHeap, UtilWindow{time, mu})
 736                 if len(acc.wHeap) > acc.nWorst {
 737                         heap.Pop(&acc.wHeap)
 738                 }
 739         keep:
 740         }
 741
 742         if len(acc.wHeap) < acc.nWorst {
 743                 // We don't have N windows yet, so keep accumulating.
 744                 acc.bound = 1.0
 745         } else {
 746                 // Anything above the least worst window has no effect.
 747                 acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
 748         }
 749
 750         if acc.mud != nil {
 751                 if acc.lastTime != math.MaxInt64 {
 752                         // Update distribution.
 753                         acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
 754                 }
 755                 acc.lastTime, acc.lastMU = time, mu
 756                 if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
 757                         acc.bound = math.Max(acc.bound, mudBound)
 758                 } else {
 759                         // We haven't accumulated enough total precise
 760                         // mass yet to even reach our goal, so keep
 761                         // accumulating.
 762                         acc.bound = 1
 763                 }
 764                 // It's not worth checking percentiles every time, so
 765                 // just keep accumulating this band.
 766                 return false
 767         }
 768
 769         // If we've found enough 0 utilizations, we can stop immediately.
 770         return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
 771 }
 772
 773 // MMU returns the minimum mutator utilization for the given time
 774 // window. This is the minimum utilization for all windows of this
 775 // duration across the execution. The returned value is in the range
 776 // [0, 1].
 777 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 778         acc := accumulator{mmu: 1.0, bound: 1.0}
 779         c.mmu(window, &acc)
 780         return acc.mmu
 781 }
 782
 783 // Examples returns n specific examples of the lowest mutator
 784 // utilization for the given window size. The returned windows will be
 785 // disjoint (otherwise there would be a huge number of
 786 // mostly-overlapping windows at the single lowest point). There are
 787 // no guarantees on which set of disjoint windows this returns.
 788 func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
 789         acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
 790         c.mmu(window, &acc)
 791         sort.Sort(sort.Reverse(acc.wHeap))
 792         return ([]UtilWindow)(acc.wHeap)
 793 }
 794
 795 // MUD returns mutator utilization distribution quantiles for the
 796 // given window size.
 797 //
 798 // The mutator utilization distribution is the distribution of mean
 799 // mutator utilization across all windows of the given window size in
 800 // the trace.
 801 //
 802 // The minimum mutator utilization is the minimum (0th percentile) of
 803 // this distribution. (However, if only the minimum is desired, it's
 804 // more efficient to use the MMU method.)
 805 func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
 806         if len(quantiles) == 0 {
 807                 return []float64{}
 808         }
 809
 810         // Each unrefined band contributes a known total mass to the
 811         // distribution (bandDur except at the end), but in an unknown
 812         // way. However, we know that all the mass it contributes must
 813         // be at or above its worst-case mean mutator utilization.
 814         //
 815         // Hence, we refine bands until the highest desired
 816         // distribution quantile is less than the next worst-case mean
 817         // mutator utilization. At this point, all further
 818         // contributions to the distribution must be beyond the
 819         // desired quantile and hence cannot affect it.
 820         //
 821         // First, find the highest desired distribution quantile.
 822         maxQ := quantiles[0]
 823         for _, q := range quantiles {
 824                 if q > maxQ {
 825                         maxQ = q
 826                 }
 827         }
 828         // The distribution's mass is in units of time (it's not
 829         // normalized because this would make it more annoying to
 830         // account for future contributions of unrefined bands). The
 831         // total final mass will be the duration of the trace itself
 832         // minus the window size. Using this, we can compute the mass
 833         // corresponding to quantile maxQ.
 834         var duration int64
 835         for _, s := range c.series {
 836                 duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
 837                 if duration1 >= int64(window) {
 838                         duration += duration1 - int64(window)
 839                 }
 840         }
 841         qMass := float64(duration) * maxQ
 842
 843         // Accumulate the MUD until we have precise information for
 844         // everything to the left of qMass.
 845         acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
 846         acc.mud.setTrackMass(qMass)
 847         c.mmu(window, &acc)
 848
 849         // Evaluate the quantiles on the accumulated MUD.
 850         out := make([]float64, len(quantiles))
 851         for i := range out {
 852                 mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
 853                 if math.IsNaN(mu) {
 854                         // There are a few legitimate ways this can
 855                         // happen:
 856                         //
 857                         // 1. If the window is the full trace
 858                         // duration, then the windowed MU function is
 859                         // only defined at a single point, so the MU
 860                         // distribution is not well-defined.
 861                         //
 862                         // 2. If there are no events, then the MU
 863                         // distribution has no mass.
 864                         //
 865                         // Either way, all of the quantiles will have
 866                         // converged toward the MMU at this point.
 867                         mu = acc.mmu
 868                 }
 869                 out[i] = mu
 870         }
 871         return out
 872 }
 873
 874 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 875         if window <= 0 {
 876                 acc.mmu = 0
 877                 return
 878         }
 879
 880         var bandU bandUtilHeap
 881         windows := make([]time.Duration, len(c.series))
 882         for i, s := range c.series {
 883                 windows[i] = window
 884                 if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
 885                         windows[i] = max
 886                 }
 887
 888                 bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
 889                 if bandU == nil {
 890                         bandU = bandU1
 891                 } else {
 892                         bandU = append(bandU, bandU1...)
 893                 }
 894         }
 895
 896         // Process bands from lowest utilization bound to highest.
 897         heap.Init(&bandU)
 898
 899         // Refine each band into a precise window and MMU until
 900         // refining the next lowest band can no longer affect the MMU
 901         // or windows.
 902         for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
 903                 i := bandU[0].series
 904                 c.series[i].bandMMU(bandU[0].i, windows[i], acc)
 905                 heap.Pop(&bandU)
 906         }
 907 }
 908
 909 func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
 910         // For each band, compute the worst-possible total mutator
 911         // utilization for all windows that start in that band.
 912
 913         // minBands is the minimum number of bands a window can span
 914         // and maxBands is the maximum number of bands a window can
 915         // span in any alignment.
 916         minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
 917         maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
 918         if window > 1 && maxBands < 2 {
 919                 panic("maxBands < 2")
 920         }
 921         tailDur := int64(window) % c.bandDur
 922         nUtil := len(c.bands) - maxBands + 1
 923         if nUtil < 0 {
 924                 nUtil = 0
 925         }
 926         bandU := make([]bandUtil, nUtil)
 927         for i := range bandU {
 928                 // To compute the worst-case MU, we assume the minimum
 929                 // for any bands that are only partially overlapped by
 930                 // some window and the mean for any bands that are
 931                 // completely covered by all windows.
 932                 var util totalUtil
 933
 934                 // Find the lowest and second lowest of the partial
 935                 // bands.
 936                 l := c.bands[i].minUtil
 937                 r1 := c.bands[i+minBands-1].minUtil
 938                 r2 := c.bands[i+maxBands-1].minUtil
 939                 minBand := math.Min(l, math.Min(r1, r2))
 940                 // Assume the worst window maximally overlaps the
 941                 // worst minimum and then the rest overlaps the second
 942                 // worst minimum.
 943                 if minBands == 1 {
 944                         util += totalUtilOf(minBand, int64(window))
 945                 } else {
 946                         util += totalUtilOf(minBand, c.bandDur)
 947                         midBand := 0.0
 948                         switch {
 949                         case minBand == l:
 950                                 midBand = math.Min(r1, r2)
 951                         case minBand == r1:
 952                                 midBand = math.Min(l, r2)
 953                         case minBand == r2:
 954                                 midBand = math.Min(l, r1)
 955                         }
 956                         util += totalUtilOf(midBand, tailDur)
 957                 }
 958
 959                 // Add the total mean MU of bands that are completely
 960                 // overlapped by all windows.
 961                 if minBands > 2 {
 962                         util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
 963                 }
 964
 965                 bandU[i] = bandUtil{series, i, util.mean(window)}
 966         }
 967
 968         return bandU
 969 }
 970
 971 // bandMMU computes the precise minimum mutator utilization for
 972 // windows with a left edge in band bandIdx.
 973 func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
 974         util := c.util
 975
 976         // We think of the mutator utilization over time as the
 977         // box-filtered utilization function, which we call the
 978         // "windowed mutator utilization function". The resulting
 979         // function is continuous and piecewise linear (unless
 980         // window==0, which we handle elsewhere), where the boundaries
 981         // between segments occur when either edge of the window
 982         // encounters a change in the instantaneous mutator
 983         // utilization function. Hence, the minimum of this function
 984         // will always occur when one of the edges of the window
 985         // aligns with a utilization change, so these are the only
 986         // points we need to consider.
 987         //
 988         // We compute the mutator utilization function incrementally
 989         // by tracking the integral from t=0 to the left edge of the
 990         // window and to the right edge of the window.
 991         left := c.bands[bandIdx].integrator
 992         right := left
 993         time, endTime := c.bandTime(bandIdx)
 994         if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
 995                 endTime = utilEnd
 996         }
 997         acc.resetTime()
 998         for {
 999                 // Advance edges to time and time+window.
1000                 mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
1001                 if acc.addMU(time, mu, window) {
1002                         break
1003                 }
1004                 if time == endTime {
1005                         break
1006                 }
1007
1008                 // The maximum slope of the windowed mutator
1009                 // utilization function is 1/window, so we can always
1010                 // advance the time by at least (mu - mmu) * window
1011                 // without dropping below mmu.
1012                 minTime := time + int64((mu-acc.bound)*float64(window))
1013
1014                 // Advance the window to the next time where either
1015                 // the left or right edge of the window encounters a
1016                 // change in the utilization curve.
1017                 if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
1018                         time = t1
1019                 } else {
1020                         time = t2
1021                 }
1022                 if time < minTime {
1023                         time = minTime
1024                 }
1025                 if time >= endTime {
1026                         // For MMUs we could stop here, but for MUDs
1027                         // it's important that we span the entire
1028                         // band.
1029                         time = endTime
1030                 }
1031         }
1032 }
1033
1034 // An integrator tracks a position in a utilization function and
1035 // integrates it.
1036 type integrator struct {
1037         u *mmuSeries
1038         // pos is the index in u.util of the current time's non-strict
1039         // predecessor.
1040         pos int
1041 }
1042
1043 // advance returns the integral of the utilization function from 0 to
1044 // time. advance must be called on monotonically increasing values of
1045 // times.
1046 func (in *integrator) advance(time int64) totalUtil {
1047         util, pos := in.u.util, in.pos
1048         // Advance pos until pos+1 is time's strict successor (making
1049         // pos time's non-strict predecessor).
1050         //
1051         // Very often, this will be nearby, so we optimize that case,
1052         // but it may be arbitrarily far away, so we handled that
1053         // efficiently, too.
1054         const maxSeq = 8
1055         if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
1056                 // Nearby. Use a linear scan.
1057                 for pos+1 < len(util) && util[pos+1].Time <= time {
1058                         pos++
1059                 }
1060         } else {
1061                 // Far. Binary search for time's strict successor.
1062                 l, r := pos, len(util)
1063                 for l < r {
1064                         h := int(uint(l+r) >> 1)
1065                         if util[h].Time <= time {
1066                                 l = h + 1
1067                         } else {
1068                                 r = h
1069                         }
1070                 }
1071                 pos = l - 1 // Non-strict predecessor.
1072         }
1073         in.pos = pos
1074         var partial totalUtil
1075         if time != util[pos].Time {
1076                 partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
1077         }
1078         return in.u.sums[pos] + partial
1079 }
1080
1081 // next returns the smallest time t' > time of a change in the
1082 // utilization function.
1083 func (in *integrator) next(time int64) int64 {
1084         for _, u := range in.u.util[in.pos:] {
1085                 if u.Time > time {
1086                         return u.Time
1087                 }
1088         }
1089         return 1<<63 - 1
1090 }
1091
1092 func isGCSTW(r tracev2.Range) bool {
1093         return strings.HasPrefix(r.Name, "stop-the-world") && strings.Contains(r.Name, "GC")
1094 }
1095
1096 func isGCMarkAssist(r tracev2.Range) bool {
1097         return r.Name == "GC mark assist"
1098 }
1099
1100 func isGCSweep(r tracev2.Range) bool {
1101         return r.Name == "GC incremental sweep"
1102 }