1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 tracev2 "internal/trace/v2"
14 // GoroutineSummary contains statistics and execution details of a single goroutine.
16 type GoroutineSummary struct {
18 Name string // A non-unique human-friendly identifier for the goroutine.
19 PC uint64 // The start PC of the goroutine.
20 CreationTime tracev2.Time // Timestamp of the first appearance in the trace.
21 StartTime tracev2.Time // Timestamp of the first time it started running. 0 if the goroutine never ran.
22 EndTime tracev2.Time // Timestamp of when the goroutine exited. 0 if the goroutine never exited.
24 // List of regions in the goroutine, sorted based on the start time.
25 Regions []*UserRegionSummary
27 // Statistics of execution time during the goroutine execution.
30 // goroutineSummary is state used just for computing this structure.
31 // It's dropped before being returned to the caller.
33 // More specifically, if it's nil, it indicates that this summary has
34 // already been finalized.
38 // UserRegionSummary represents a region and goroutine execution stats
39 // while the region was active. (For v2 traces.)
40 type UserRegionSummary struct {
44 // Region start event. Normally EventRegionBegin event or nil,
45 // but can be a state transition event from NotExist or Undetermined
46 // if the region is a synthetic region representing task inheritance
47 // from the parent goroutine.
50 // Region end event. Normally EventRegionEnd event or nil,
51 // but can be a state transition event to NotExist if the goroutine
52 // terminated without explicitly ending the region.
58 // GoroutineExecStats contains statistics about a goroutine's execution
59 // during a period of time.
60 type GoroutineExecStats struct {
61 ExecTime time.Duration
62 SchedWaitTime time.Duration
63 BlockTimeByReason map[string]time.Duration
64 SyscallTime time.Duration
65 SyscallBlockTime time.Duration
66 RangeTime map[string]time.Duration
67 TotalTime time.Duration
70 // sub returns the stats v-s.
71 func (s GoroutineExecStats) sub(v GoroutineExecStats) (r GoroutineExecStats) {
73 r.ExecTime -= v.ExecTime
74 r.SchedWaitTime -= v.SchedWaitTime
75 for reason := range s.BlockTimeByReason {
76 r.BlockTimeByReason[reason] -= v.BlockTimeByReason[reason]
78 r.SyscallTime -= v.SyscallTime
79 r.SyscallBlockTime -= v.SyscallBlockTime
80 r.TotalTime -= v.TotalTime
81 for name := range s.RangeTime {
82 r.RangeTime[name] -= v.RangeTime[name]
87 func (s GoroutineExecStats) clone() (r GoroutineExecStats) {
89 r.BlockTimeByReason = make(map[string]time.Duration)
90 for reason, dt := range s.BlockTimeByReason {
91 r.BlockTimeByReason[reason] = dt
93 r.RangeTime = make(map[string]time.Duration)
94 for name, dt := range s.RangeTime {
95 r.RangeTime[name] = dt
100 // snapshotStat returns the snapshot of the goroutine execution statistics.
101 // This is called as we process the ordered trace event stream. lastTs is used
102 // to process pending statistics if this is called before any goroutine end event.
103 func (g *GoroutineSummary) snapshotStat(lastTs tracev2.Time) (ret GoroutineExecStats) {
104 ret = g.GoroutineExecStats.clone()
106 if g.goroutineSummary == nil {
107 return ret // Already finalized; no pending state.
110 // Set the total time if necessary.
111 if g.TotalTime == 0 {
112 ret.TotalTime = lastTs.Sub(g.CreationTime)
115 // Add in time since lastTs.
116 if g.lastStartTime != 0 {
117 ret.ExecTime += lastTs.Sub(g.lastStartTime)
119 if g.lastRunnableTime != 0 {
120 ret.SchedWaitTime += lastTs.Sub(g.lastRunnableTime)
122 if g.lastBlockTime != 0 {
123 ret.BlockTimeByReason[g.lastBlockReason] += lastTs.Sub(g.lastBlockTime)
125 if g.lastSyscallTime != 0 {
126 ret.SyscallTime += lastTs.Sub(g.lastSyscallTime)
128 if g.lastSyscallBlockTime != 0 {
129 ret.SchedWaitTime += lastTs.Sub(g.lastSyscallBlockTime)
131 for name, ts := range g.lastRangeTime {
132 ret.RangeTime[name] += lastTs.Sub(ts)
137 // finalize is called when processing a goroutine end event or at
138 // the end of trace processing. This finalizes the execution stat
139 // and any active regions in the goroutine, in which case trigger is nil.
140 func (g *GoroutineSummary) finalize(lastTs tracev2.Time, trigger *tracev2.Event) {
142 g.EndTime = trigger.Time()
144 finalStat := g.snapshotStat(lastTs)
146 g.GoroutineExecStats = finalStat
148 // System goroutines are never part of regions, even though they
149 // "inherit" a task due to creation (EvGoCreate) from within a region.
150 // This may happen e.g. if the first GC is triggered within a region,
151 // starting the GC worker goroutines.
152 if !IsSystemGoroutine(g.Name) {
153 for _, s := range g.activeRegions {
155 s.GoroutineExecStats = finalStat.sub(s.GoroutineExecStats)
156 g.Regions = append(g.Regions, s)
159 *(g.goroutineSummary) = goroutineSummary{}
162 // goroutineSummary is a private part of GoroutineSummary that is required only during analysis.
163 type goroutineSummary struct {
164 lastStartTime tracev2.Time
165 lastRunnableTime tracev2.Time
166 lastBlockTime tracev2.Time
167 lastBlockReason string
168 lastSyscallTime tracev2.Time
169 lastSyscallBlockTime tracev2.Time
170 lastRangeTime map[string]tracev2.Time
171 activeRegions []*UserRegionSummary // stack of active regions
174 // SummarizeGoroutines generates statistics for all goroutines in the trace.
175 func SummarizeGoroutines(trace io.Reader) (map[tracev2.GoID]*GoroutineSummary, error) {
176 // Create the analysis state.
177 b := goroutineStatsBuilder{
178 gs: make(map[tracev2.GoID]*GoroutineSummary),
179 syscallingP: make(map[tracev2.ProcID]tracev2.GoID),
180 syscallingG: make(map[tracev2.GoID]tracev2.ProcID),
181 rangesP: make(map[rangeP]tracev2.GoID),
184 // Process the trace.
185 r, err := tracev2.NewReader(trace)
190 ev, err := r.ReadEvent()
199 return b.finalize(), nil
202 // goroutineStatsBuilder constructs per-goroutine time statistics for v2 traces.
203 type goroutineStatsBuilder struct {
204 // gs contains the map of goroutine summaries we're building up to return to the caller.
205 gs map[tracev2.GoID]*GoroutineSummary
207 // syscallingP and syscallingG represent a binding between a P and G in a syscall.
208 // Used to correctly identify and clean up after syscalls (blocking or otherwise).
209 syscallingP map[tracev2.ProcID]tracev2.GoID
210 syscallingG map[tracev2.GoID]tracev2.ProcID
212 // rangesP is used for optimistic tracking of P-based ranges for goroutines.
214 // It's a best-effort mapping of an active range on a P to the goroutine we think
215 // is associated with it.
216 rangesP map[rangeP]tracev2.GoID
218 lastTs tracev2.Time // timestamp of the last event processed.
219 syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace).
227 // event feeds a single event into the stats builder.
228 func (b *goroutineStatsBuilder) event(ev tracev2.Event) {
235 // Record sync time for the RangeActive events.
236 case tracev2.EventSync:
239 // Handle state transitions.
240 case tracev2.EventStateTransition:
241 st := ev.StateTransition()
242 switch st.Resource.Kind {
243 // Handle goroutine transitions, which are the meat of this computation.
244 case tracev2.ResourceGoroutine:
245 id := st.Resource.Goroutine()
246 old, new := st.Goroutine()
248 // Skip these events; they're not telling us anything new.
252 // Handle transition out.
255 case tracev2.GoUndetermined, tracev2.GoNotExist:
256 g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
257 // If we're coming out of GoUndetermined, then the creation time is the
258 // time of the last sync.
259 if old == tracev2.GoUndetermined {
260 g.CreationTime = b.syncTs
262 g.CreationTime = ev.Time()
264 // The goroutine is being created, or it's being named for the first time.
265 g.lastRangeTime = make(map[string]tracev2.Time)
266 g.BlockTimeByReason = make(map[string]time.Duration)
267 g.RangeTime = make(map[string]time.Duration)
269 // When a goroutine is newly created, inherit the task
270 // of the active region. For ease handling of this
271 // case, we create a fake region description with the
272 // task id. This isn't strictly necessary as this
273 // goroutine may not be associated with the task, but
274 // it can be convenient to see all children created
277 // N.B. ev.Goroutine() will always be NoGoroutine for the
278 // Undetermined case, so this is will simply not fire.
279 if creatorG := b.gs[ev.Goroutine()]; creatorG != nil && len(creatorG.activeRegions) > 0 {
280 regions := creatorG.activeRegions
281 s := regions[len(regions)-1]
282 if s.TaskID != tracev2.NoTask {
283 g.activeRegions = []*UserRegionSummary{{TaskID: s.TaskID, Start: &ev}}
287 case tracev2.GoRunning:
288 // Record execution time as we transition out of running
289 g.ExecTime += ev.Time().Sub(g.lastStartTime)
291 case tracev2.GoWaiting:
292 // Record block time as we transition out of waiting.
293 if g.lastBlockTime != 0 {
294 g.BlockTimeByReason[g.lastBlockReason] += ev.Time().Sub(g.lastBlockTime)
297 case tracev2.GoRunnable:
298 // Record sched latency time as we transition out of runnable.
299 if g.lastRunnableTime != 0 {
300 g.SchedWaitTime += ev.Time().Sub(g.lastRunnableTime)
301 g.lastRunnableTime = 0
303 case tracev2.GoSyscall:
304 // Record syscall execution time and syscall block time as we transition out of syscall.
305 if g.lastSyscallTime != 0 {
306 if g.lastSyscallBlockTime != 0 {
307 g.SyscallBlockTime += ev.Time().Sub(g.lastSyscallBlockTime)
308 g.SyscallTime += g.lastSyscallBlockTime.Sub(g.lastSyscallTime)
310 g.SyscallTime += ev.Time().Sub(g.lastSyscallTime)
312 g.lastSyscallTime = 0
313 g.lastSyscallBlockTime = 0
315 // Clear the syscall map.
316 delete(b.syscallingP, b.syscallingG[id])
317 delete(b.syscallingG, id)
321 // The goroutine hasn't been identified yet. Take any stack we
322 // can get and identify it by the bottom-most frame of that stack.
325 if stk != tracev2.NoStack {
326 var frame tracev2.StackFrame
328 stk.Frames(func(f tracev2.StackFrame) bool {
340 // Handle transition in.
342 case tracev2.GoRunning:
343 // We started running. Record it.
344 g.lastStartTime = ev.Time()
345 if g.StartTime == 0 {
346 g.StartTime = ev.Time()
348 case tracev2.GoRunnable:
349 g.lastRunnableTime = ev.Time()
350 case tracev2.GoWaiting:
351 if st.Reason != "forever" {
352 g.lastBlockTime = ev.Time()
353 g.lastBlockReason = st.Reason
356 // "Forever" is like goroutine death.
358 case tracev2.GoNotExist:
359 g.finalize(ev.Time(), &ev)
360 case tracev2.GoSyscall:
361 b.syscallingP[ev.Proc()] = id
362 b.syscallingG[id] = ev.Proc()
363 g.lastSyscallTime = ev.Time()
366 // Handle procs to detect syscall blocking, which si identifiable as a
367 // proc going idle while the goroutine it was attached to is in a syscall.
368 case tracev2.ResourceProc:
369 id := st.Resource.Proc()
370 old, new := st.Proc()
371 if old != new && new == tracev2.ProcIdle {
372 if goid, ok := b.syscallingP[id]; ok {
374 g.lastSyscallBlockTime = ev.Time()
375 delete(b.syscallingP, id)
380 // Handle ranges of all kinds.
381 case tracev2.EventRangeBegin, tracev2.EventRangeActive:
383 var g *GoroutineSummary
384 switch r.Scope.Kind {
385 case tracev2.ResourceGoroutine:
386 // Simple goroutine range. We attribute the entire range regardless of
387 // goroutine stats. Lots of situations are still identifiable, e.g. a
388 // goroutine blocked often in mark assist will have both high mark assist
389 // and high block times. Those interested in a deeper view can look at the
391 g = b.gs[r.Scope.Goroutine()]
392 case tracev2.ResourceProc:
393 // N.B. These ranges are not actually bound to the goroutine, they're
394 // bound to the P. But if we happen to be on the P the whole time, let's
395 // try to attribute it to the goroutine. (e.g. GC sweeps are here.)
396 g = b.gs[ev.Goroutine()]
398 b.rangesP[rangeP{id: r.Scope.Proc(), name: r.Name}] = ev.Goroutine()
404 if ev.Kind() == tracev2.EventRangeActive {
405 if ts := g.lastRangeTime[r.Name]; ts != 0 {
406 g.RangeTime[r.Name] += b.syncTs.Sub(ts)
408 g.lastRangeTime[r.Name] = b.syncTs
410 g.lastRangeTime[r.Name] = ev.Time()
412 case tracev2.EventRangeEnd:
414 var g *GoroutineSummary
415 switch r.Scope.Kind {
416 case tracev2.ResourceGoroutine:
417 g = b.gs[r.Scope.Goroutine()]
418 case tracev2.ResourceProc:
419 rp := rangeP{id: r.Scope.Proc(), name: r.Name}
420 if goid, ok := b.rangesP[rp]; ok {
421 if goid == ev.Goroutine() {
422 // As the comment in the RangeBegin case states, this is only OK
423 // if we finish on the same goroutine we started on.
426 delete(b.rangesP, rp)
432 ts := g.lastRangeTime[r.Name]
436 g.RangeTime[r.Name] += ev.Time().Sub(ts)
437 delete(g.lastRangeTime, r.Name)
439 // Handle user-defined regions.
440 case tracev2.EventRegionBegin:
441 g := b.gs[ev.Goroutine()]
443 g.activeRegions = append(g.activeRegions, &UserRegionSummary{
447 GoroutineExecStats: g.snapshotStat(ev.Time()),
449 case tracev2.EventRegionEnd:
450 g := b.gs[ev.Goroutine()]
452 var sd *UserRegionSummary
453 if regionStk := g.activeRegions; len(regionStk) > 0 {
454 // Pop the top region from the stack since that's what must have ended.
457 regionStk = regionStk[:n-1]
458 g.activeRegions = regionStk
460 // This is an "end" without a start. Just fabricate the region now.
461 sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task}
463 sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats)
465 g.Regions = append(g.Regions, sd)
469 // finalize indicates to the builder that we're done processing the trace.
470 // It cleans up any remaining state and returns the full summary.
471 func (b *goroutineStatsBuilder) finalize() map[tracev2.GoID]*GoroutineSummary {
472 for _, g := range b.gs {
473 g.finalize(b.lastTs, nil)
475 // Sort based on region start time.
476 sort.Slice(g.Regions, func(i, j int) bool {
477 x := g.Regions[i].Start
478 y := g.Regions[j].Start
485 return x.Time() < y.Time()
487 g.goroutineSummary = nil
492 // RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces.
493 // The association is based on whether they have synchronized with each other in the Go
494 // scheduler (one has unblocked another).
495 func RelatedGoroutinesV2(trace io.Reader, goid tracev2.GoID) (map[tracev2.GoID]struct{}, error) {
496 r, err := tracev2.NewReader(trace)
500 // Process all the events, looking for transitions of goroutines
501 // out of GoWaiting. If there was an active goroutine when this
502 // happened, then we know that active goroutine unblocked another.
503 // Scribble all these down so we can process them.
504 type unblockEdge struct {
505 operator tracev2.GoID
508 var unblockEdges []unblockEdge
510 ev, err := r.ReadEvent()
517 if ev.Goroutine() == tracev2.NoGoroutine {
520 if ev.Kind() != tracev2.EventStateTransition {
523 st := ev.StateTransition()
524 if st.Resource.Kind != tracev2.ResourceGoroutine {
527 id := st.Resource.Goroutine()
528 old, new := st.Goroutine()
529 if old == new || old != tracev2.GoWaiting {
532 unblockEdges = append(unblockEdges, unblockEdge{
533 operator: ev.Goroutine(),
537 // Compute the transitive closure of depth 2 of goroutines that have unblocked each other
538 // (starting from goid).
539 gmap := make(map[tracev2.GoID]struct{})
540 gmap[goid] = struct{}{}
541 for i := 0; i < 2; i++ {
543 gmap1 := make(map[tracev2.GoID]struct{})
544 for g := range gmap {
545 gmap1[g] = struct{}{}
547 for _, edge := range unblockEdges {
548 if _, ok := gmap[edge.operand]; ok {
549 gmap1[edge.operator] = struct{}{}