For #63960.
Change-Id: I1efe35435e32623aba894a915114e394570ebc56
Reviewed-on: https://go-review.googlesource.com/c/go/+/540259
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
--- /dev/null
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ tracev2 "internal/trace/v2"
+ "io"
+ "sort"
+ "time"
+)
+
+// GoroutineSummary contains statistics and execution details of a single goroutine.
+// (For v2 traces.)
+type GoroutineSummary struct {
+ ID tracev2.GoID
+ Name string // A non-unique human-friendly identifier for the goroutine.
+ PC uint64 // The start PC of the goroutine.
+ CreationTime tracev2.Time // Timestamp of the first appearance in the trace.
+ StartTime tracev2.Time // Timestamp of the first time it started running. 0 if the goroutine never ran.
+ EndTime tracev2.Time // Timestamp of when the goroutine exited. 0 if the goroutine never exited.
+
+ // List of regions in the goroutine, sorted based on the start time.
+ Regions []*UserRegionSummary
+
+ // Statistics of execution time during the goroutine execution.
+ GoroutineExecStats
+
+ // goroutineSummary is state used just for computing this structure.
+ // It's dropped before being returned to the caller.
+ //
+ // More specifically, if it's nil, it indicates that this summary has
+ // already been finalized.
+ *goroutineSummary
+}
+
+// UserRegionSummary represents a region and goroutine execution stats
+// while the region was active. (For v2 traces.)
+type UserRegionSummary struct {
+ TaskID tracev2.TaskID
+ Name string
+
+ // Region start event. Normally EventRegionBegin event or nil,
+ // but can be a state transition event from NotExist or Undetermined
+ // if the region is a synthetic region representing task inheritance
+ // from the parent goroutine.
+ Start *tracev2.Event
+
+ // Region end event. Normally EventRegionEnd event or nil,
+ // but can be a state transition event to NotExist if the goroutine
+ // terminated without explicitly ending the region.
+ End *tracev2.Event
+
+ GoroutineExecStats
+}
+
+// GoroutineExecStats contains statistics about a goroutine's execution
+// during a period of time.
+type GoroutineExecStats struct {
+ ExecTime time.Duration
+ SchedWaitTime time.Duration
+ BlockTimeByReason map[string]time.Duration
+ SyscallTime time.Duration
+ SyscallBlockTime time.Duration
+ RangeTime map[string]time.Duration
+ TotalTime time.Duration
+}
+
+// sub returns the stats v-s.
+func (s GoroutineExecStats) sub(v GoroutineExecStats) (r GoroutineExecStats) {
+ r = s.clone()
+ r.ExecTime -= v.ExecTime
+ r.SchedWaitTime -= v.SchedWaitTime
+ for reason := range s.BlockTimeByReason {
+ r.BlockTimeByReason[reason] -= v.BlockTimeByReason[reason]
+ }
+ r.SyscallTime -= v.SyscallTime
+ r.SyscallBlockTime -= v.SyscallBlockTime
+ r.TotalTime -= v.TotalTime
+ for name := range s.RangeTime {
+ r.RangeTime[name] -= v.RangeTime[name]
+ }
+ return r
+}
+
+func (s GoroutineExecStats) clone() (r GoroutineExecStats) {
+ r = s
+ r.BlockTimeByReason = make(map[string]time.Duration)
+ for reason, dt := range s.BlockTimeByReason {
+ r.BlockTimeByReason[reason] = dt
+ }
+ r.RangeTime = make(map[string]time.Duration)
+ for name, dt := range s.RangeTime {
+ r.RangeTime[name] = dt
+ }
+ return r
+}
+
+// snapshotStat returns the snapshot of the goroutine execution statistics.
+// This is called as we process the ordered trace event stream. lastTs is used
+// to process pending statistics if this is called before any goroutine end event.
+func (g *GoroutineSummary) snapshotStat(lastTs tracev2.Time) (ret GoroutineExecStats) {
+ ret = g.GoroutineExecStats.clone()
+
+ if g.goroutineSummary == nil {
+ return ret // Already finalized; no pending state.
+ }
+
+ // Set the total time if necessary.
+ if g.TotalTime == 0 {
+ ret.TotalTime = lastTs.Sub(g.CreationTime)
+ }
+
+ // Add in time since lastTs.
+ if g.lastStartTime != 0 {
+ ret.ExecTime += lastTs.Sub(g.lastStartTime)
+ }
+ if g.lastRunnableTime != 0 {
+ ret.SchedWaitTime += lastTs.Sub(g.lastRunnableTime)
+ }
+ if g.lastBlockTime != 0 {
+ ret.BlockTimeByReason[g.lastBlockReason] += lastTs.Sub(g.lastBlockTime)
+ }
+ if g.lastSyscallTime != 0 {
+ ret.SyscallTime += lastTs.Sub(g.lastSyscallTime)
+ }
+ if g.lastSyscallBlockTime != 0 {
+ ret.SchedWaitTime += lastTs.Sub(g.lastSyscallBlockTime)
+ }
+ for name, ts := range g.lastRangeTime {
+ ret.RangeTime[name] += lastTs.Sub(ts)
+ }
+ return ret
+}
+
+// finalize is called when processing a goroutine end event or at
+// the end of trace processing. This finalizes the execution stat
+// and any active regions in the goroutine, in which case trigger is nil.
+func (g *GoroutineSummary) finalize(lastTs tracev2.Time, trigger *tracev2.Event) {
+ if trigger != nil {
+ g.EndTime = trigger.Time()
+ }
+ finalStat := g.snapshotStat(lastTs)
+
+ g.GoroutineExecStats = finalStat
+
+ // System goroutines are never part of regions, even though they
+ // "inherit" a task due to creation (EvGoCreate) from within a region.
+ // This may happen e.g. if the first GC is triggered within a region,
+ // starting the GC worker goroutines.
+ if !IsSystemGoroutine(g.Name) {
+ for _, s := range g.activeRegions {
+ s.End = trigger
+ s.GoroutineExecStats = finalStat.sub(s.GoroutineExecStats)
+ g.Regions = append(g.Regions, s)
+ }
+ }
+ *(g.goroutineSummary) = goroutineSummary{}
+}
+
+// goroutineSummary is a private part of GoroutineSummary that is required only during analysis.
+type goroutineSummary struct {
+ lastStartTime tracev2.Time
+ lastRunnableTime tracev2.Time
+ lastBlockTime tracev2.Time
+ lastBlockReason string
+ lastSyscallTime tracev2.Time
+ lastSyscallBlockTime tracev2.Time
+ lastRangeTime map[string]tracev2.Time
+ activeRegions []*UserRegionSummary // stack of active regions
+}
+
+// SummarizeGoroutines generates statistics for all goroutines in the trace.
+func SummarizeGoroutines(trace io.Reader) (map[tracev2.GoID]*GoroutineSummary, error) {
+ // Create the analysis state.
+ b := goroutineStatsBuilder{
+ gs: make(map[tracev2.GoID]*GoroutineSummary),
+ syscallingP: make(map[tracev2.ProcID]tracev2.GoID),
+ syscallingG: make(map[tracev2.GoID]tracev2.ProcID),
+ rangesP: make(map[rangeP]tracev2.GoID),
+ }
+
+ // Process the trace.
+ r, err := tracev2.NewReader(trace)
+ if err != nil {
+ return nil, err
+ }
+ for {
+ ev, err := r.ReadEvent()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ b.event(ev)
+ }
+ return b.finalize(), nil
+}
+
+// goroutineStatsBuilder constructs per-goroutine time statistics for v2 traces.
+type goroutineStatsBuilder struct {
+ // gs contains the map of goroutine summaries we're building up to return to the caller.
+ gs map[tracev2.GoID]*GoroutineSummary
+
+ // syscallingP and syscallingG represent a binding between a P and G in a syscall.
+ // Used to correctly identify and clean up after syscalls (blocking or otherwise).
+ syscallingP map[tracev2.ProcID]tracev2.GoID
+ syscallingG map[tracev2.GoID]tracev2.ProcID
+
+ // rangesP is used for optimistic tracking of P-based ranges for goroutines.
+ //
+ // It's a best-effort mapping of an active range on a P to the goroutine we think
+ // is associated with it.
+ rangesP map[rangeP]tracev2.GoID
+
+ lastTs tracev2.Time // timestamp of the last event processed.
+ syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace).
+}
+
+type rangeP struct {
+ id tracev2.ProcID
+ name string
+}
+
+// event feeds a single event into the stats builder.
+func (b *goroutineStatsBuilder) event(ev tracev2.Event) {
+ if b.syncTs == 0 {
+ b.syncTs = ev.Time()
+ }
+ b.lastTs = ev.Time()
+
+ switch ev.Kind() {
+ // Record sync time for the RangeActive events.
+ case tracev2.EventSync:
+ b.syncTs = ev.Time()
+
+ // Handle state transitions.
+ case tracev2.EventStateTransition:
+ st := ev.StateTransition()
+ switch st.Resource.Kind {
+ // Handle goroutine transitions, which are the meat of this computation.
+ case tracev2.ResourceGoroutine:
+ id := st.Resource.Goroutine()
+ old, new := st.Goroutine()
+ if old == new {
+ // Skip these events; they're not telling us anything new.
+ break
+ }
+
+ // Handle transition out.
+ g := b.gs[id]
+ switch old {
+ case tracev2.GoUndetermined, tracev2.GoNotExist:
+ g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
+ // If we're coming out of GoUndetermined, then the creation time is the
+ // time of the last sync.
+ if old == tracev2.GoUndetermined {
+ g.CreationTime = b.syncTs
+ } else {
+ g.CreationTime = ev.Time()
+ }
+ // The goroutine is being created, or it's being named for the first time.
+ g.lastRangeTime = make(map[string]tracev2.Time)
+ g.BlockTimeByReason = make(map[string]time.Duration)
+ g.RangeTime = make(map[string]time.Duration)
+
+ // When a goroutine is newly created, inherit the task
+ // of the active region. For ease handling of this
+ // case, we create a fake region description with the
+ // task id. This isn't strictly necessary as this
+ // goroutine may not be associated with the task, but
+ // it can be convenient to see all children created
+ // during a region.
+ //
+ // N.B. ev.Goroutine() will always be NoGoroutine for the
+ // Undetermined case, so this is will simply not fire.
+ if creatorG := b.gs[ev.Goroutine()]; creatorG != nil && len(creatorG.activeRegions) > 0 {
+ regions := creatorG.activeRegions
+ s := regions[len(regions)-1]
+ if s.TaskID != tracev2.NoTask {
+ g.activeRegions = []*UserRegionSummary{{TaskID: s.TaskID, Start: &ev}}
+ }
+ }
+ b.gs[g.ID] = g
+ case tracev2.GoRunning:
+ // Record execution time as we transition out of running
+ g.ExecTime += ev.Time().Sub(g.lastStartTime)
+ g.lastStartTime = 0
+ case tracev2.GoWaiting:
+ // Record block time as we transition out of waiting.
+ if g.lastBlockTime != 0 {
+ g.BlockTimeByReason[g.lastBlockReason] += ev.Time().Sub(g.lastBlockTime)
+ g.lastBlockTime = 0
+ }
+ case tracev2.GoRunnable:
+ // Record sched latency time as we transition out of runnable.
+ if g.lastRunnableTime != 0 {
+ g.SchedWaitTime += ev.Time().Sub(g.lastRunnableTime)
+ g.lastRunnableTime = 0
+ }
+ case tracev2.GoSyscall:
+ // Record syscall execution time and syscall block time as we transition out of syscall.
+ if g.lastSyscallTime != 0 {
+ if g.lastSyscallBlockTime != 0 {
+ g.SyscallBlockTime += ev.Time().Sub(g.lastSyscallBlockTime)
+ g.SyscallTime += g.lastSyscallBlockTime.Sub(g.lastSyscallTime)
+ } else {
+ g.SyscallTime += ev.Time().Sub(g.lastSyscallTime)
+ }
+ g.lastSyscallTime = 0
+ g.lastSyscallBlockTime = 0
+
+ // Clear the syscall map.
+ delete(b.syscallingP, b.syscallingG[id])
+ delete(b.syscallingG, id)
+ }
+ }
+
+ // The goroutine hasn't been identified yet. Take any stack we
+ // can get and identify it by the bottom-most frame of that stack.
+ if g.PC == 0 {
+ stk := ev.Stack()
+ if stk != tracev2.NoStack {
+ var frame tracev2.StackFrame
+ var ok bool
+ stk.Frames(func(f tracev2.StackFrame) bool {
+ frame = f
+ ok = true
+ return false
+ })
+ if ok {
+ g.PC = frame.PC
+ g.Name = frame.Func
+ }
+ }
+ }
+
+ // Handle transition in.
+ switch new {
+ case tracev2.GoRunning:
+ // We started running. Record it.
+ g.lastStartTime = ev.Time()
+ if g.StartTime == 0 {
+ g.StartTime = ev.Time()
+ }
+ case tracev2.GoRunnable:
+ g.lastRunnableTime = ev.Time()
+ case tracev2.GoWaiting:
+ if st.Reason != "forever" {
+ g.lastBlockTime = ev.Time()
+ g.lastBlockReason = st.Reason
+ break
+ }
+ // "Forever" is like goroutine death.
+ fallthrough
+ case tracev2.GoNotExist:
+ g.finalize(ev.Time(), &ev)
+ case tracev2.GoSyscall:
+ b.syscallingP[ev.Proc()] = id
+ b.syscallingG[id] = ev.Proc()
+ g.lastSyscallTime = ev.Time()
+ }
+
+ // Handle procs to detect syscall blocking, which si identifiable as a
+ // proc going idle while the goroutine it was attached to is in a syscall.
+ case tracev2.ResourceProc:
+ id := st.Resource.Proc()
+ old, new := st.Proc()
+ if old != new && new == tracev2.ProcIdle {
+ if goid, ok := b.syscallingP[id]; ok {
+ g := b.gs[goid]
+ g.lastSyscallBlockTime = ev.Time()
+ delete(b.syscallingP, id)
+ }
+ }
+ }
+
+ // Handle ranges of all kinds.
+ case tracev2.EventRangeBegin, tracev2.EventRangeActive:
+ r := ev.Range()
+ var g *GoroutineSummary
+ switch r.Scope.Kind {
+ case tracev2.ResourceGoroutine:
+ // Simple goroutine range. We attribute the entire range regardless of
+ // goroutine stats. Lots of situations are still identifiable, e.g. a
+ // goroutine blocked often in mark assist will have both high mark assist
+ // and high block times. Those interested in a deeper view can look at the
+ // trace viewer.
+ g = b.gs[r.Scope.Goroutine()]
+ case tracev2.ResourceProc:
+ // N.B. These ranges are not actually bound to the goroutine, they're
+ // bound to the P. But if we happen to be on the P the whole time, let's
+ // try to attribute it to the goroutine. (e.g. GC sweeps are here.)
+ g = b.gs[ev.Goroutine()]
+ if g != nil {
+ b.rangesP[rangeP{id: r.Scope.Proc(), name: r.Name}] = ev.Goroutine()
+ }
+ }
+ if g == nil {
+ break
+ }
+ if ev.Kind() == tracev2.EventRangeActive {
+ if ts := g.lastRangeTime[r.Name]; ts != 0 {
+ g.RangeTime[r.Name] += b.syncTs.Sub(ts)
+ }
+ g.lastRangeTime[r.Name] = b.syncTs
+ } else {
+ g.lastRangeTime[r.Name] = ev.Time()
+ }
+ case tracev2.EventRangeEnd:
+ r := ev.Range()
+ var g *GoroutineSummary
+ switch r.Scope.Kind {
+ case tracev2.ResourceGoroutine:
+ g = b.gs[r.Scope.Goroutine()]
+ case tracev2.ResourceProc:
+ rp := rangeP{id: r.Scope.Proc(), name: r.Name}
+ if goid, ok := b.rangesP[rp]; ok {
+ if goid == ev.Goroutine() {
+ // As the comment in the RangeBegin case states, this is only OK
+ // if we finish on the same goroutine we started on.
+ g = b.gs[goid]
+ }
+ delete(b.rangesP, rp)
+ }
+ }
+ if g == nil {
+ break
+ }
+ ts := g.lastRangeTime[r.Name]
+ if ts == 0 {
+ break
+ }
+ g.RangeTime[r.Name] += ev.Time().Sub(ts)
+ delete(g.lastRangeTime, r.Name)
+
+ // Handle user-defined regions.
+ case tracev2.EventRegionBegin:
+ g := b.gs[ev.Goroutine()]
+ r := ev.Region()
+ g.activeRegions = append(g.activeRegions, &UserRegionSummary{
+ Name: r.Type,
+ TaskID: r.Task,
+ Start: &ev,
+ GoroutineExecStats: g.snapshotStat(ev.Time()),
+ })
+ case tracev2.EventRegionEnd:
+ g := b.gs[ev.Goroutine()]
+ r := ev.Region()
+ var sd *UserRegionSummary
+ if regionStk := g.activeRegions; len(regionStk) > 0 {
+ // Pop the top region from the stack since that's what must have ended.
+ n := len(regionStk)
+ sd = regionStk[n-1]
+ regionStk = regionStk[:n-1]
+ g.activeRegions = regionStk
+ } else {
+ // This is an "end" without a start. Just fabricate the region now.
+ sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task}
+ }
+ sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats)
+ sd.End = &ev
+ g.Regions = append(g.Regions, sd)
+ }
+}
+
+// finalize indicates to the builder that we're done processing the trace.
+// It cleans up any remaining state and returns the full summary.
+func (b *goroutineStatsBuilder) finalize() map[tracev2.GoID]*GoroutineSummary {
+ for _, g := range b.gs {
+ g.finalize(b.lastTs, nil)
+
+ // Sort based on region start time.
+ sort.Slice(g.Regions, func(i, j int) bool {
+ x := g.Regions[i].Start
+ y := g.Regions[j].Start
+ if x == nil {
+ return true
+ }
+ if y == nil {
+ return false
+ }
+ return x.Time() < y.Time()
+ })
+ g.goroutineSummary = nil
+ }
+ return b.gs
+}
+
+// RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces.
+// The association is based on whether they have synchronized with each other in the Go
+// scheduler (one has unblocked another).
+func RelatedGoroutinesV2(trace io.Reader, goid tracev2.GoID) (map[tracev2.GoID]struct{}, error) {
+ r, err := tracev2.NewReader(trace)
+ if err != nil {
+ return nil, err
+ }
+ // Process all the events, looking for transitions of goroutines
+ // out of GoWaiting. If there was an active goroutine when this
+ // happened, then we know that active goroutine unblocked another.
+ // Scribble all these down so we can process them.
+ type unblockEdge struct {
+ operator tracev2.GoID
+ operand tracev2.GoID
+ }
+ var unblockEdges []unblockEdge
+ for {
+ ev, err := r.ReadEvent()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ if ev.Goroutine() == tracev2.NoGoroutine {
+ continue
+ }
+ if ev.Kind() != tracev2.EventStateTransition {
+ continue
+ }
+ st := ev.StateTransition()
+ if st.Resource.Kind != tracev2.ResourceGoroutine {
+ continue
+ }
+ id := st.Resource.Goroutine()
+ old, new := st.Goroutine()
+ if old == new || old != tracev2.GoWaiting {
+ continue
+ }
+ unblockEdges = append(unblockEdges, unblockEdge{
+ operator: ev.Goroutine(),
+ operand: id,
+ })
+ }
+ // Compute the transitive closure of depth 2 of goroutines that have unblocked each other
+ // (starting from goid).
+ gmap := make(map[tracev2.GoID]struct{})
+ gmap[goid] = struct{}{}
+ for i := 0; i < 2; i++ {
+ // Copy the map.
+ gmap1 := make(map[tracev2.GoID]struct{})
+ for g := range gmap {
+ gmap1[g] = struct{}{}
+ }
+ for _, edge := range unblockEdges {
+ if _, ok := gmap[edge.operand]; ok {
+ gmap1[edge.operator] = struct{}{}
+ }
+ }
+ gmap = gmap1
+ }
+ return gmap, nil
+}
--- /dev/null
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ tracev2 "internal/trace/v2"
+ "internal/trace/v2/testtrace"
+ "testing"
+)
+
+func TestSummarizeGoroutinesTrace(t *testing.T) {
+ summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-gc-stress.test")
+ var (
+ hasSchedWaitTime bool
+ hasSyncBlockTime bool
+ hasGCMarkAssistTime bool
+ )
+ for _, summary := range summaries {
+ basicSummaryChecks(t, summary)
+ hasSchedWaitTime = hasSchedWaitTime || summary.SchedWaitTime > 0
+ if dt, ok := summary.BlockTimeByReason["sync"]; ok && dt > 0 {
+ hasSyncBlockTime = true
+ }
+ if dt, ok := summary.RangeTime["GC mark assist"]; ok && dt > 0 {
+ hasGCMarkAssistTime = true
+ }
+ }
+ if !hasSchedWaitTime {
+ t.Error("missing sched wait time")
+ }
+ if !hasSyncBlockTime {
+ t.Error("missing sync block time")
+ }
+ if !hasGCMarkAssistTime {
+ t.Error("missing GC mark assist time")
+ }
+}
+
+func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {
+ summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations.test")
+ type region struct {
+ startKind tracev2.EventKind
+ endKind tracev2.EventKind
+ }
+ wantRegions := map[string]region{
+ // N.B. "pre-existing region" never even makes it into the trace.
+ //
+ // TODO(mknyszek): Add test case for end-without-a-start, which can happen at
+ // a generation split only.
+ "": {tracev2.EventStateTransition, tracev2.EventStateTransition}, // Task inheritance marker.
+ "task0 region": {tracev2.EventRegionBegin, tracev2.EventBad},
+ "region0": {tracev2.EventRegionBegin, tracev2.EventRegionEnd},
+ "region1": {tracev2.EventRegionBegin, tracev2.EventRegionEnd},
+ "unended region": {tracev2.EventRegionBegin, tracev2.EventStateTransition},
+ "post-existing region": {tracev2.EventRegionBegin, tracev2.EventBad},
+ }
+ for _, summary := range summaries {
+ basicSummaryChecks(t, summary)
+ for _, region := range summary.Regions {
+ want, ok := wantRegions[region.Name]
+ if !ok {
+ continue
+ }
+ checkRegionEvents(t, want.startKind, want.endKind, summary.ID, region)
+ delete(wantRegions, region.Name)
+ }
+ }
+ if len(wantRegions) != 0 {
+ t.Errorf("failed to find regions: %#v", wantRegions)
+ }
+}
+
+func basicSummaryChecks(t *testing.T, summary *GoroutineSummary) {
+ if summary.ID == tracev2.NoGoroutine {
+ t.Error("summary found for no goroutine")
+ return
+ }
+ if (summary.StartTime != 0 && summary.CreationTime > summary.StartTime) ||
+ (summary.StartTime != 0 && summary.EndTime != 0 && summary.StartTime > summary.EndTime) {
+ t.Errorf("bad summary creation/start/end times for G %d: creation=%d start=%d end=%d", summary.ID, summary.CreationTime, summary.StartTime, summary.EndTime)
+ }
+ if (summary.PC != 0 && summary.Name == "") || (summary.PC == 0 && summary.Name != "") {
+ t.Errorf("bad name and/or PC for G %d: pc=0x%x name=%q", summary.ID, summary.PC, summary.Name)
+ }
+ basicGoroutineExecStatsChecks(t, &summary.GoroutineExecStats)
+ for _, region := range summary.Regions {
+ basicGoroutineExecStatsChecks(t, ®ion.GoroutineExecStats)
+ }
+}
+
+func summarizeTraceTest(t *testing.T, testPath string) map[tracev2.GoID]*GoroutineSummary {
+ r, _, err := testtrace.ParseFile(testPath)
+ if err != nil {
+ t.Fatalf("malformed test %s: bad trace file: %v", testPath, err)
+ }
+ summaries, err := SummarizeGoroutines(r)
+ if err != nil {
+ t.Fatalf("failed to process trace %s: %v", testPath, err)
+ }
+ return summaries
+}
+
+func checkRegionEvents(t *testing.T, wantStart, wantEnd tracev2.EventKind, goid tracev2.GoID, region *UserRegionSummary) {
+ switch wantStart {
+ case tracev2.EventBad:
+ if region.Start != nil {
+ t.Errorf("expected nil region start event, got\n%s", region.Start.String())
+ }
+ case tracev2.EventStateTransition, tracev2.EventRegionBegin:
+ if region.Start == nil {
+ t.Error("expected non-nil region start event, got nil")
+ }
+ kind := region.Start.Kind()
+ if kind != wantStart {
+ t.Errorf("wanted region start event %s, got %s", wantStart, kind)
+ }
+ if kind == tracev2.EventRegionBegin {
+ if region.Start.Region().Type != region.Name {
+ t.Errorf("region name mismatch: event has %s, summary has %s", region.Start.Region().Type, region.Name)
+ }
+ } else {
+ st := region.Start.StateTransition()
+ if st.Resource.Kind != tracev2.ResourceGoroutine {
+ t.Errorf("found region start event for the wrong resource: %s", st.Resource)
+ }
+ if st.Resource.Goroutine() != goid {
+ t.Errorf("found region start event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource)
+ }
+ if old, _ := st.Goroutine(); old != tracev2.GoNotExist && old != tracev2.GoUndetermined {
+ t.Errorf("expected transition from GoNotExist or GoUndetermined, got transition from %s instead", old)
+ }
+ }
+ default:
+ t.Errorf("unexpected want start event type: %s", wantStart)
+ }
+
+ switch wantEnd {
+ case tracev2.EventBad:
+ if region.End != nil {
+ t.Errorf("expected nil region end event, got\n%s", region.End.String())
+ }
+ case tracev2.EventStateTransition, tracev2.EventRegionEnd:
+ if region.End == nil {
+ t.Error("expected non-nil region end event, got nil")
+ }
+ kind := region.End.Kind()
+ if kind != wantEnd {
+ t.Errorf("wanted region end event %s, got %s", wantEnd, kind)
+ }
+ if kind == tracev2.EventRegionEnd {
+ if region.End.Region().Type != region.Name {
+ t.Errorf("region name mismatch: event has %s, summary has %s", region.End.Region().Type, region.Name)
+ }
+ } else {
+ st := region.End.StateTransition()
+ if st.Resource.Kind != tracev2.ResourceGoroutine {
+ t.Errorf("found region end event for the wrong resource: %s", st.Resource)
+ }
+ if st.Resource.Goroutine() != goid {
+ t.Errorf("found region end event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource)
+ }
+ if _, new := st.Goroutine(); new != tracev2.GoNotExist {
+ t.Errorf("expected transition to GoNotExist, got transition to %s instead", new)
+ }
+ }
+ default:
+ t.Errorf("unexpected want end event type: %s", wantEnd)
+ }
+}
+
+func basicGoroutineExecStatsChecks(t *testing.T, stats *GoroutineExecStats) {
+ if stats.ExecTime < 0 {
+ t.Error("found negative ExecTime")
+ }
+ if stats.SchedWaitTime < 0 {
+ t.Error("found negative SchedWaitTime")
+ }
+ if stats.SyscallTime < 0 {
+ t.Error("found negative SyscallTime")
+ }
+ if stats.SyscallBlockTime < 0 {
+ t.Error("found negative SyscallBlockTime")
+ }
+ if stats.TotalTime < 0 {
+ t.Error("found negative TotalTime")
+ }
+ for reason, dt := range stats.BlockTimeByReason {
+ if dt < 0 {
+ t.Errorf("found negative BlockTimeByReason for %s", reason)
+ }
+ }
+ for name, dt := range stats.RangeTime {
+ if dt < 0 {
+ t.Errorf("found negative RangeTime for range %s", name)
+ }
+ }
+}
+
+func TestRelatedGoroutinesV2Trace(t *testing.T) {
+ testPath := "v2/testdata/tests/go122-gc-stress.test"
+ r, _, err := testtrace.ParseFile(testPath)
+ if err != nil {
+ t.Fatalf("malformed test %s: bad trace file: %v", testPath, err)
+ }
+ targetg := tracev2.GoID(86)
+ got, err := RelatedGoroutinesV2(r, targetg)
+ if err != nil {
+ t.Fatalf("failed to find related goroutines for %s: %v", testPath, err)
+ }
+ want := map[tracev2.GoID]struct{}{
+ tracev2.GoID(86): struct{}{}, // N.B. Result includes target.
+ tracev2.GoID(85): struct{}{},
+ tracev2.GoID(111): struct{}{},
+ }
+ for goid := range got {
+ if _, ok := want[goid]; ok {
+ delete(want, goid)
+ } else {
+ t.Errorf("unexpected goroutine %d found in related goroutines for %d in test %s", goid, targetg, testPath)
+ }
+ }
+ if len(want) != 0 {
+ for goid := range want {
+ t.Errorf("failed to find related goroutine %d for goroutine %d in test %s", goid, targetg, testPath)
+ }
+ }
+}
// Beginning of traced execution.
var wg sync.WaitGroup
ctx, task := trace.NewTask(bgctx, "task0") // EvUserTaskCreate("task0")
+ trace.StartRegion(ctx, "task0 region")
+
wg.Add(1)
go func() {
defer wg.Done()
defer task.End() // EvUserTaskEnd("task0")
+ trace.StartRegion(ctx, "unended region")
+
trace.WithRegion(ctx, "region0", func() {
// EvUserRegionBegin("region0", start)
trace.WithRegion(ctx, "region1", func() {
--- /dev/null
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=18446744073709551615 time=23991883811065 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=1331278 time=23991883808413 size=16
+ProcStart dt=297 p=2 p_seq=1
+GoStart dt=162 g=7 g_seq=1
+HeapAlloc dt=267 heapalloc_value=1908736
+EventBatch gen=1 m=1331277 time=23991883807353 size=28
+ProcStart dt=472 p=1 p_seq=1
+GoStart dt=309 g=6 g_seq=1
+HeapAlloc dt=533 heapalloc_value=1892352
+HeapAlloc dt=47 heapalloc_value=1900544
+GoBlock dt=2071 reason_string=12 stack=23
+ProcStop dt=54
+EventBatch gen=1 m=1331276 time=23991883809162 size=10
+ProcStart dt=426 p=3 p_seq=1
+HeapAlloc dt=108 heapalloc_value=1916928
+EventBatch gen=1 m=1331274 time=23991883805418 size=325
+ProcStatus dt=241 p=0 pstatus=1
+GoStatus dt=5 g=1 m=1331274 gstatus=2
+ProcsChange dt=486 procs_value=48 stack=1
+STWBegin dt=80 kind_string=21 stack=2
+HeapGoal dt=3 heapgoal_value=4194304
+ProcStatus dt=3 p=1 pstatus=2
+ProcStatus dt=2 p=2 pstatus=2
+ProcStatus dt=1 p=3 pstatus=2
+ProcStatus dt=1 p=4 pstatus=2
+ProcStatus dt=1 p=5 pstatus=2
+ProcStatus dt=1 p=6 pstatus=2
+ProcStatus dt=1 p=7 pstatus=2
+ProcStatus dt=1 p=8 pstatus=2
+ProcStatus dt=1 p=9 pstatus=2
+ProcStatus dt=1 p=10 pstatus=2
+ProcStatus dt=1 p=11 pstatus=2
+ProcStatus dt=1 p=12 pstatus=2
+ProcStatus dt=1 p=13 pstatus=2
+ProcStatus dt=1 p=14 pstatus=2
+ProcStatus dt=1 p=15 pstatus=2
+ProcStatus dt=1 p=16 pstatus=2
+ProcStatus dt=1 p=17 pstatus=2
+ProcStatus dt=1 p=18 pstatus=2
+ProcStatus dt=1 p=19 pstatus=2
+ProcStatus dt=1 p=20 pstatus=2
+ProcStatus dt=1 p=21 pstatus=2
+ProcStatus dt=1 p=22 pstatus=2
+ProcStatus dt=1 p=23 pstatus=2
+ProcStatus dt=1 p=24 pstatus=2
+ProcStatus dt=1 p=25 pstatus=2
+ProcStatus dt=1 p=26 pstatus=2
+ProcStatus dt=1 p=27 pstatus=2
+ProcStatus dt=1 p=28 pstatus=2
+ProcStatus dt=1 p=29 pstatus=2
+ProcStatus dt=1 p=30 pstatus=2
+ProcStatus dt=1 p=31 pstatus=2
+ProcStatus dt=1 p=32 pstatus=2
+ProcStatus dt=1 p=33 pstatus=2
+ProcStatus dt=1 p=34 pstatus=2
+ProcStatus dt=1 p=35 pstatus=2
+ProcStatus dt=1 p=36 pstatus=2
+ProcStatus dt=1 p=37 pstatus=2
+ProcStatus dt=1 p=38 pstatus=2
+ProcStatus dt=1 p=39 pstatus=2
+ProcStatus dt=1 p=40 pstatus=2
+ProcStatus dt=1 p=41 pstatus=2
+ProcStatus dt=1 p=42 pstatus=2
+ProcStatus dt=1 p=43 pstatus=2
+ProcStatus dt=1 p=44 pstatus=2
+ProcStatus dt=1 p=45 pstatus=2
+ProcStatus dt=1 p=46 pstatus=2
+ProcStatus dt=1 p=47 pstatus=2
+ProcsChange dt=9 procs_value=48 stack=3
+STWEnd dt=209
+GoCreate dt=376 new_g=6 new_stack=4 stack=5
+GoCreate dt=96 new_g=7 new_stack=6 stack=7
+GoCreate dt=574 new_g=8 new_stack=8 stack=9
+UserTaskBegin dt=19 task=1 parent_task=0 name_string=22 stack=10
+UserRegionBegin dt=19 task=1 name_string=23 stack=11
+HeapAlloc dt=27 heapalloc_value=1884160
+GoCreate dt=578 new_g=9 new_stack=12 stack=13
+GoBlock dt=67 reason_string=10 stack=14
+GoStart dt=13 g=9 g_seq=1
+UserRegionBegin dt=61 task=1 name_string=24 stack=15
+UserRegionBegin dt=8 task=1 name_string=25 stack=16
+UserRegionBegin dt=6 task=1 name_string=26 stack=17
+UserLog dt=780 task=1 key_string=27 value_string=28 stack=18
+UserRegionEnd dt=9 task=1 name_string=26 stack=19
+UserRegionEnd dt=504 task=1 name_string=25 stack=20
+UserTaskEnd dt=425 task=1 stack=21
+GoUnblock dt=503 g=1 g_seq=1 stack=22
+GoDestroy dt=7
+GoStart dt=13 g=1 g_seq=2
+UserRegionBegin dt=367 task=0 name_string=29 stack=24
+EventBatch gen=1 m=18446744073709551615 time=23991883811600 size=57
+GoStatus dt=358 g=2 m=18446744073709551615 gstatus=4
+GoStatus dt=3 g=3 m=18446744073709551615 gstatus=4
+GoStatus dt=2 g=4 m=18446744073709551615 gstatus=4
+GoStatus dt=1 g=5 m=18446744073709551615 gstatus=4
+EventBatch gen=1 m=18446744073709551615 time=23991883812197 size=461
+Stacks
+Stack id=8 nframes=1
+ pc=4812576 func=30 file=31 line=128
+Stack id=13 nframes=1
+ pc=4813540 func=32 file=33 line=37
+Stack id=3 nframes=4
+ pc=4421156 func=34 file=35 line=1360
+ pc=4537289 func=36 file=37 line=246
+ pc=4812325 func=38 file=31 line=125
+ pc=4813276 func=32 file=33 line=27
+Stack id=2 nframes=3
+ pc=4537037 func=36 file=37 line=229
+ pc=4812325 func=38 file=31 line=125
+ pc=4813276 func=32 file=33 line=27
+Stack id=16 nframes=1
+ pc=4813956 func=39 file=33 line=43
+Stack id=19 nframes=3
+ pc=4814128 func=40 file=33 line=45
+ pc=4811708 func=41 file=42 line=141
+ pc=4813956 func=39 file=33 line=43
+Stack id=10 nframes=1
+ pc=4813380 func=32 file=33 line=33
+Stack id=1 nframes=4
+ pc=4551915 func=43 file=44 line=248
+ pc=4537017 func=36 file=37 line=228
+ pc=4812325 func=38 file=31 line=125
+ pc=4813276 func=32 file=33 line=27
+Stack id=7 nframes=4
+ pc=4543253 func=45 file=37 line=831
+ pc=4537306 func=36 file=37 line=249
+ pc=4812325 func=38 file=31 line=125
+ pc=4813276 func=32 file=33 line=27
+Stack id=11 nframes=1
+ pc=4813412 func=32 file=33 line=34
+Stack id=4 nframes=1
+ pc=4545952 func=46 file=47 line=42
+Stack id=5 nframes=4
+ pc=4545861 func=48 file=47 line=42
+ pc=4537294 func=36 file=37 line=248
+ pc=4812325 func=38 file=31 line=125
+ pc=4813276 func=32 file=33 line=27
+Stack id=17 nframes=3
+ pc=4814128 func=40 file=33 line=45
+ pc=4811708 func=41 file=42 line=141
+ pc=4813956 func=39 file=33 line=43
+Stack id=24 nframes=1
+ pc=4813616 func=32 file=33 line=54
+Stack id=21 nframes=2
+ pc=4814276 func=49 file=42 line=80
+ pc=4813974 func=39 file=33 line=50
+Stack id=12 nframes=1
+ pc=4813728 func=39 file=33 line=37
+Stack id=14 nframes=2
+ pc=4640519 func=50 file=51 line=116
+ pc=4813550 func=32 file=33 line=51
+Stack id=9 nframes=2
+ pc=4812399 func=38 file=31 line=128
+ pc=4813276 func=32 file=33 line=27
+Stack id=22 nframes=3
+ pc=4640260 func=52 file=51 line=81
+ pc=4814374 func=53 file=51 line=87
+ pc=4813989 func=39 file=33 line=50
+Stack id=20 nframes=1
+ pc=4813956 func=39 file=33 line=43
+Stack id=6 nframes=1
+ pc=4543296 func=54 file=37 line=831
+Stack id=15 nframes=1
+ pc=4813886 func=39 file=33 line=41
+Stack id=18 nframes=5
+ pc=4814195 func=55 file=33 line=46
+ pc=4811708 func=41 file=42 line=141
+ pc=4814128 func=40 file=33 line=45
+ pc=4811708 func=41 file=42 line=141
+ pc=4813956 func=39 file=33 line=43
+Stack id=23 nframes=3
+ pc=4217457 func=56 file=57 line=442
+ pc=4543501 func=58 file=37 line=881
+ pc=4546010 func=46 file=47 line=54
+EventBatch gen=1 m=18446744073709551615 time=23991883804190 size=1620
+Strings
+String id=1
+ data="Not worker"
+String id=2
+ data="GC (dedicated)"
+String id=3
+ data="GC (fractional)"
+String id=4
+ data="GC (idle)"
+String id=5
+ data="unspecified"
+String id=6
+ data="forever"
+String id=7
+ data="network"
+String id=8
+ data="select"
+String id=9
+ data="sync.(*Cond).Wait"
+String id=10
+ data="sync"
+String id=11
+ data="chan send"
+String id=12
+ data="chan receive"
+String id=13
+ data="GC mark assist wait for work"
+String id=14
+ data="GC background sweeper wait"
+String id=15
+ data="system goroutine wait"
+String id=16
+ data="preempted"
+String id=17
+ data="wait for debug call"
+String id=18
+ data="wait until GC ends"
+String id=19
+ data="sleep"
+String id=20
+ data="runtime.GoSched"
+String id=21
+ data="start trace"
+String id=22
+ data="task0"
+String id=23
+ data="task0 region"
+String id=24
+ data="unended region"
+String id=25
+ data="region0"
+String id=26
+ data="region1"
+String id=27
+ data="key0"
+String id=28
+ data="0123456789abcdef"
+String id=29
+ data="post-existing region"
+String id=30
+ data="runtime/trace.Start.func1"
+String id=31
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/trace.go"
+String id=32
+ data="main.main"
+String id=33
+ data="/usr/local/google/home/mknyszek/work/go-1/src/internal/trace/v2/testdata/testprog/annotations.go"
+String id=34
+ data="runtime.startTheWorld"
+String id=35
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/proc.go"
+String id=36
+ data="runtime.StartTrace"
+String id=37
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2.go"
+String id=38
+ data="runtime/trace.Start"
+String id=39
+ data="main.main.func1"
+String id=40
+ data="main.main.func1.1"
+String id=41
+ data="runtime/trace.WithRegion"
+String id=42
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/annotation.go"
+String id=43
+ data="runtime.traceLocker.Gomaxprocs"
+String id=44
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2runtime.go"
+String id=45
+ data="runtime.(*traceAdvancerState).start"
+String id=46
+ data="runtime.traceStartReadCPU.func1"
+String id=47
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2cpu.go"
+String id=48
+ data="runtime.traceStartReadCPU"
+String id=49
+ data="runtime/trace.(*Task).End"
+String id=50
+ data="sync.(*WaitGroup).Wait"
+String id=51
+ data="/usr/local/google/home/mknyszek/work/go-1/src/sync/waitgroup.go"
+String id=52
+ data="sync.(*WaitGroup).Add"
+String id=53
+ data="sync.(*WaitGroup).Done"
+String id=54
+ data="runtime.(*traceAdvancerState).start.func1"
+String id=55
+ data="main.main.func1.1.1"
+String id=56
+ data="runtime.chanrecv1"
+String id=57
+ data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/chan.go"
+String id=58
+ data="runtime.(*wakeableSleep).sleep"