From: Michael Anthony Knyszek Date: Tue, 7 Nov 2023 18:35:37 +0000 (+0000) Subject: internal/trace: implement goroutine analysis for v2 traces X-Git-Tag: go1.22rc1~351 X-Git-Url: http://www.git.cypherpunks.ru/?p=gostls13.git;a=commitdiff_plain;h=3b303fa9815f0869a9ba429af537fd75d1dda85d internal/trace: implement goroutine analysis for v2 traces For #63960. Change-Id: I1efe35435e32623aba894a915114e394570ebc56 Reviewed-on: https://go-review.googlesource.com/c/go/+/540259 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI --- diff --git a/src/internal/trace/goroutinesv2.go b/src/internal/trace/goroutinesv2.go new file mode 100644 index 0000000000..6b13f20425 --- /dev/null +++ b/src/internal/trace/goroutinesv2.go @@ -0,0 +1,555 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + tracev2 "internal/trace/v2" + "io" + "sort" + "time" +) + +// GoroutineSummary contains statistics and execution details of a single goroutine. +// (For v2 traces.) +type GoroutineSummary struct { + ID tracev2.GoID + Name string // A non-unique human-friendly identifier for the goroutine. + PC uint64 // The start PC of the goroutine. + CreationTime tracev2.Time // Timestamp of the first appearance in the trace. + StartTime tracev2.Time // Timestamp of the first time it started running. 0 if the goroutine never ran. + EndTime tracev2.Time // Timestamp of when the goroutine exited. 0 if the goroutine never exited. + + // List of regions in the goroutine, sorted based on the start time. + Regions []*UserRegionSummary + + // Statistics of execution time during the goroutine execution. + GoroutineExecStats + + // goroutineSummary is state used just for computing this structure. + // It's dropped before being returned to the caller. + // + // More specifically, if it's nil, it indicates that this summary has + // already been finalized. + *goroutineSummary +} + +// UserRegionSummary represents a region and goroutine execution stats +// while the region was active. (For v2 traces.) +type UserRegionSummary struct { + TaskID tracev2.TaskID + Name string + + // Region start event. Normally EventRegionBegin event or nil, + // but can be a state transition event from NotExist or Undetermined + // if the region is a synthetic region representing task inheritance + // from the parent goroutine. + Start *tracev2.Event + + // Region end event. Normally EventRegionEnd event or nil, + // but can be a state transition event to NotExist if the goroutine + // terminated without explicitly ending the region. + End *tracev2.Event + + GoroutineExecStats +} + +// GoroutineExecStats contains statistics about a goroutine's execution +// during a period of time. +type GoroutineExecStats struct { + ExecTime time.Duration + SchedWaitTime time.Duration + BlockTimeByReason map[string]time.Duration + SyscallTime time.Duration + SyscallBlockTime time.Duration + RangeTime map[string]time.Duration + TotalTime time.Duration +} + +// sub returns the stats v-s. +func (s GoroutineExecStats) sub(v GoroutineExecStats) (r GoroutineExecStats) { + r = s.clone() + r.ExecTime -= v.ExecTime + r.SchedWaitTime -= v.SchedWaitTime + for reason := range s.BlockTimeByReason { + r.BlockTimeByReason[reason] -= v.BlockTimeByReason[reason] + } + r.SyscallTime -= v.SyscallTime + r.SyscallBlockTime -= v.SyscallBlockTime + r.TotalTime -= v.TotalTime + for name := range s.RangeTime { + r.RangeTime[name] -= v.RangeTime[name] + } + return r +} + +func (s GoroutineExecStats) clone() (r GoroutineExecStats) { + r = s + r.BlockTimeByReason = make(map[string]time.Duration) + for reason, dt := range s.BlockTimeByReason { + r.BlockTimeByReason[reason] = dt + } + r.RangeTime = make(map[string]time.Duration) + for name, dt := range s.RangeTime { + r.RangeTime[name] = dt + } + return r +} + +// snapshotStat returns the snapshot of the goroutine execution statistics. +// This is called as we process the ordered trace event stream. lastTs is used +// to process pending statistics if this is called before any goroutine end event. +func (g *GoroutineSummary) snapshotStat(lastTs tracev2.Time) (ret GoroutineExecStats) { + ret = g.GoroutineExecStats.clone() + + if g.goroutineSummary == nil { + return ret // Already finalized; no pending state. + } + + // Set the total time if necessary. + if g.TotalTime == 0 { + ret.TotalTime = lastTs.Sub(g.CreationTime) + } + + // Add in time since lastTs. + if g.lastStartTime != 0 { + ret.ExecTime += lastTs.Sub(g.lastStartTime) + } + if g.lastRunnableTime != 0 { + ret.SchedWaitTime += lastTs.Sub(g.lastRunnableTime) + } + if g.lastBlockTime != 0 { + ret.BlockTimeByReason[g.lastBlockReason] += lastTs.Sub(g.lastBlockTime) + } + if g.lastSyscallTime != 0 { + ret.SyscallTime += lastTs.Sub(g.lastSyscallTime) + } + if g.lastSyscallBlockTime != 0 { + ret.SchedWaitTime += lastTs.Sub(g.lastSyscallBlockTime) + } + for name, ts := range g.lastRangeTime { + ret.RangeTime[name] += lastTs.Sub(ts) + } + return ret +} + +// finalize is called when processing a goroutine end event or at +// the end of trace processing. This finalizes the execution stat +// and any active regions in the goroutine, in which case trigger is nil. +func (g *GoroutineSummary) finalize(lastTs tracev2.Time, trigger *tracev2.Event) { + if trigger != nil { + g.EndTime = trigger.Time() + } + finalStat := g.snapshotStat(lastTs) + + g.GoroutineExecStats = finalStat + + // System goroutines are never part of regions, even though they + // "inherit" a task due to creation (EvGoCreate) from within a region. + // This may happen e.g. if the first GC is triggered within a region, + // starting the GC worker goroutines. + if !IsSystemGoroutine(g.Name) { + for _, s := range g.activeRegions { + s.End = trigger + s.GoroutineExecStats = finalStat.sub(s.GoroutineExecStats) + g.Regions = append(g.Regions, s) + } + } + *(g.goroutineSummary) = goroutineSummary{} +} + +// goroutineSummary is a private part of GoroutineSummary that is required only during analysis. +type goroutineSummary struct { + lastStartTime tracev2.Time + lastRunnableTime tracev2.Time + lastBlockTime tracev2.Time + lastBlockReason string + lastSyscallTime tracev2.Time + lastSyscallBlockTime tracev2.Time + lastRangeTime map[string]tracev2.Time + activeRegions []*UserRegionSummary // stack of active regions +} + +// SummarizeGoroutines generates statistics for all goroutines in the trace. +func SummarizeGoroutines(trace io.Reader) (map[tracev2.GoID]*GoroutineSummary, error) { + // Create the analysis state. + b := goroutineStatsBuilder{ + gs: make(map[tracev2.GoID]*GoroutineSummary), + syscallingP: make(map[tracev2.ProcID]tracev2.GoID), + syscallingG: make(map[tracev2.GoID]tracev2.ProcID), + rangesP: make(map[rangeP]tracev2.GoID), + } + + // Process the trace. + r, err := tracev2.NewReader(trace) + if err != nil { + return nil, err + } + for { + ev, err := r.ReadEvent() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + b.event(ev) + } + return b.finalize(), nil +} + +// goroutineStatsBuilder constructs per-goroutine time statistics for v2 traces. +type goroutineStatsBuilder struct { + // gs contains the map of goroutine summaries we're building up to return to the caller. + gs map[tracev2.GoID]*GoroutineSummary + + // syscallingP and syscallingG represent a binding between a P and G in a syscall. + // Used to correctly identify and clean up after syscalls (blocking or otherwise). + syscallingP map[tracev2.ProcID]tracev2.GoID + syscallingG map[tracev2.GoID]tracev2.ProcID + + // rangesP is used for optimistic tracking of P-based ranges for goroutines. + // + // It's a best-effort mapping of an active range on a P to the goroutine we think + // is associated with it. + rangesP map[rangeP]tracev2.GoID + + lastTs tracev2.Time // timestamp of the last event processed. + syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace). +} + +type rangeP struct { + id tracev2.ProcID + name string +} + +// event feeds a single event into the stats builder. +func (b *goroutineStatsBuilder) event(ev tracev2.Event) { + if b.syncTs == 0 { + b.syncTs = ev.Time() + } + b.lastTs = ev.Time() + + switch ev.Kind() { + // Record sync time for the RangeActive events. + case tracev2.EventSync: + b.syncTs = ev.Time() + + // Handle state transitions. + case tracev2.EventStateTransition: + st := ev.StateTransition() + switch st.Resource.Kind { + // Handle goroutine transitions, which are the meat of this computation. + case tracev2.ResourceGoroutine: + id := st.Resource.Goroutine() + old, new := st.Goroutine() + if old == new { + // Skip these events; they're not telling us anything new. + break + } + + // Handle transition out. + g := b.gs[id] + switch old { + case tracev2.GoUndetermined, tracev2.GoNotExist: + g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}} + // If we're coming out of GoUndetermined, then the creation time is the + // time of the last sync. + if old == tracev2.GoUndetermined { + g.CreationTime = b.syncTs + } else { + g.CreationTime = ev.Time() + } + // The goroutine is being created, or it's being named for the first time. + g.lastRangeTime = make(map[string]tracev2.Time) + g.BlockTimeByReason = make(map[string]time.Duration) + g.RangeTime = make(map[string]time.Duration) + + // When a goroutine is newly created, inherit the task + // of the active region. For ease handling of this + // case, we create a fake region description with the + // task id. This isn't strictly necessary as this + // goroutine may not be associated with the task, but + // it can be convenient to see all children created + // during a region. + // + // N.B. ev.Goroutine() will always be NoGoroutine for the + // Undetermined case, so this is will simply not fire. + if creatorG := b.gs[ev.Goroutine()]; creatorG != nil && len(creatorG.activeRegions) > 0 { + regions := creatorG.activeRegions + s := regions[len(regions)-1] + if s.TaskID != tracev2.NoTask { + g.activeRegions = []*UserRegionSummary{{TaskID: s.TaskID, Start: &ev}} + } + } + b.gs[g.ID] = g + case tracev2.GoRunning: + // Record execution time as we transition out of running + g.ExecTime += ev.Time().Sub(g.lastStartTime) + g.lastStartTime = 0 + case tracev2.GoWaiting: + // Record block time as we transition out of waiting. + if g.lastBlockTime != 0 { + g.BlockTimeByReason[g.lastBlockReason] += ev.Time().Sub(g.lastBlockTime) + g.lastBlockTime = 0 + } + case tracev2.GoRunnable: + // Record sched latency time as we transition out of runnable. + if g.lastRunnableTime != 0 { + g.SchedWaitTime += ev.Time().Sub(g.lastRunnableTime) + g.lastRunnableTime = 0 + } + case tracev2.GoSyscall: + // Record syscall execution time and syscall block time as we transition out of syscall. + if g.lastSyscallTime != 0 { + if g.lastSyscallBlockTime != 0 { + g.SyscallBlockTime += ev.Time().Sub(g.lastSyscallBlockTime) + g.SyscallTime += g.lastSyscallBlockTime.Sub(g.lastSyscallTime) + } else { + g.SyscallTime += ev.Time().Sub(g.lastSyscallTime) + } + g.lastSyscallTime = 0 + g.lastSyscallBlockTime = 0 + + // Clear the syscall map. + delete(b.syscallingP, b.syscallingG[id]) + delete(b.syscallingG, id) + } + } + + // The goroutine hasn't been identified yet. Take any stack we + // can get and identify it by the bottom-most frame of that stack. + if g.PC == 0 { + stk := ev.Stack() + if stk != tracev2.NoStack { + var frame tracev2.StackFrame + var ok bool + stk.Frames(func(f tracev2.StackFrame) bool { + frame = f + ok = true + return false + }) + if ok { + g.PC = frame.PC + g.Name = frame.Func + } + } + } + + // Handle transition in. + switch new { + case tracev2.GoRunning: + // We started running. Record it. + g.lastStartTime = ev.Time() + if g.StartTime == 0 { + g.StartTime = ev.Time() + } + case tracev2.GoRunnable: + g.lastRunnableTime = ev.Time() + case tracev2.GoWaiting: + if st.Reason != "forever" { + g.lastBlockTime = ev.Time() + g.lastBlockReason = st.Reason + break + } + // "Forever" is like goroutine death. + fallthrough + case tracev2.GoNotExist: + g.finalize(ev.Time(), &ev) + case tracev2.GoSyscall: + b.syscallingP[ev.Proc()] = id + b.syscallingG[id] = ev.Proc() + g.lastSyscallTime = ev.Time() + } + + // Handle procs to detect syscall blocking, which si identifiable as a + // proc going idle while the goroutine it was attached to is in a syscall. + case tracev2.ResourceProc: + id := st.Resource.Proc() + old, new := st.Proc() + if old != new && new == tracev2.ProcIdle { + if goid, ok := b.syscallingP[id]; ok { + g := b.gs[goid] + g.lastSyscallBlockTime = ev.Time() + delete(b.syscallingP, id) + } + } + } + + // Handle ranges of all kinds. + case tracev2.EventRangeBegin, tracev2.EventRangeActive: + r := ev.Range() + var g *GoroutineSummary + switch r.Scope.Kind { + case tracev2.ResourceGoroutine: + // Simple goroutine range. We attribute the entire range regardless of + // goroutine stats. Lots of situations are still identifiable, e.g. a + // goroutine blocked often in mark assist will have both high mark assist + // and high block times. Those interested in a deeper view can look at the + // trace viewer. + g = b.gs[r.Scope.Goroutine()] + case tracev2.ResourceProc: + // N.B. These ranges are not actually bound to the goroutine, they're + // bound to the P. But if we happen to be on the P the whole time, let's + // try to attribute it to the goroutine. (e.g. GC sweeps are here.) + g = b.gs[ev.Goroutine()] + if g != nil { + b.rangesP[rangeP{id: r.Scope.Proc(), name: r.Name}] = ev.Goroutine() + } + } + if g == nil { + break + } + if ev.Kind() == tracev2.EventRangeActive { + if ts := g.lastRangeTime[r.Name]; ts != 0 { + g.RangeTime[r.Name] += b.syncTs.Sub(ts) + } + g.lastRangeTime[r.Name] = b.syncTs + } else { + g.lastRangeTime[r.Name] = ev.Time() + } + case tracev2.EventRangeEnd: + r := ev.Range() + var g *GoroutineSummary + switch r.Scope.Kind { + case tracev2.ResourceGoroutine: + g = b.gs[r.Scope.Goroutine()] + case tracev2.ResourceProc: + rp := rangeP{id: r.Scope.Proc(), name: r.Name} + if goid, ok := b.rangesP[rp]; ok { + if goid == ev.Goroutine() { + // As the comment in the RangeBegin case states, this is only OK + // if we finish on the same goroutine we started on. + g = b.gs[goid] + } + delete(b.rangesP, rp) + } + } + if g == nil { + break + } + ts := g.lastRangeTime[r.Name] + if ts == 0 { + break + } + g.RangeTime[r.Name] += ev.Time().Sub(ts) + delete(g.lastRangeTime, r.Name) + + // Handle user-defined regions. + case tracev2.EventRegionBegin: + g := b.gs[ev.Goroutine()] + r := ev.Region() + g.activeRegions = append(g.activeRegions, &UserRegionSummary{ + Name: r.Type, + TaskID: r.Task, + Start: &ev, + GoroutineExecStats: g.snapshotStat(ev.Time()), + }) + case tracev2.EventRegionEnd: + g := b.gs[ev.Goroutine()] + r := ev.Region() + var sd *UserRegionSummary + if regionStk := g.activeRegions; len(regionStk) > 0 { + // Pop the top region from the stack since that's what must have ended. + n := len(regionStk) + sd = regionStk[n-1] + regionStk = regionStk[:n-1] + g.activeRegions = regionStk + } else { + // This is an "end" without a start. Just fabricate the region now. + sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task} + } + sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats) + sd.End = &ev + g.Regions = append(g.Regions, sd) + } +} + +// finalize indicates to the builder that we're done processing the trace. +// It cleans up any remaining state and returns the full summary. +func (b *goroutineStatsBuilder) finalize() map[tracev2.GoID]*GoroutineSummary { + for _, g := range b.gs { + g.finalize(b.lastTs, nil) + + // Sort based on region start time. + sort.Slice(g.Regions, func(i, j int) bool { + x := g.Regions[i].Start + y := g.Regions[j].Start + if x == nil { + return true + } + if y == nil { + return false + } + return x.Time() < y.Time() + }) + g.goroutineSummary = nil + } + return b.gs +} + +// RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces. +// The association is based on whether they have synchronized with each other in the Go +// scheduler (one has unblocked another). +func RelatedGoroutinesV2(trace io.Reader, goid tracev2.GoID) (map[tracev2.GoID]struct{}, error) { + r, err := tracev2.NewReader(trace) + if err != nil { + return nil, err + } + // Process all the events, looking for transitions of goroutines + // out of GoWaiting. If there was an active goroutine when this + // happened, then we know that active goroutine unblocked another. + // Scribble all these down so we can process them. + type unblockEdge struct { + operator tracev2.GoID + operand tracev2.GoID + } + var unblockEdges []unblockEdge + for { + ev, err := r.ReadEvent() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if ev.Goroutine() == tracev2.NoGoroutine { + continue + } + if ev.Kind() != tracev2.EventStateTransition { + continue + } + st := ev.StateTransition() + if st.Resource.Kind != tracev2.ResourceGoroutine { + continue + } + id := st.Resource.Goroutine() + old, new := st.Goroutine() + if old == new || old != tracev2.GoWaiting { + continue + } + unblockEdges = append(unblockEdges, unblockEdge{ + operator: ev.Goroutine(), + operand: id, + }) + } + // Compute the transitive closure of depth 2 of goroutines that have unblocked each other + // (starting from goid). + gmap := make(map[tracev2.GoID]struct{}) + gmap[goid] = struct{}{} + for i := 0; i < 2; i++ { + // Copy the map. + gmap1 := make(map[tracev2.GoID]struct{}) + for g := range gmap { + gmap1[g] = struct{}{} + } + for _, edge := range unblockEdges { + if _, ok := gmap[edge.operand]; ok { + gmap1[edge.operator] = struct{}{} + } + } + gmap = gmap1 + } + return gmap, nil +} diff --git a/src/internal/trace/goroutinesv2_test.go b/src/internal/trace/goroutinesv2_test.go new file mode 100644 index 0000000000..5ab3726137 --- /dev/null +++ b/src/internal/trace/goroutinesv2_test.go @@ -0,0 +1,229 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + tracev2 "internal/trace/v2" + "internal/trace/v2/testtrace" + "testing" +) + +func TestSummarizeGoroutinesTrace(t *testing.T) { + summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-gc-stress.test") + var ( + hasSchedWaitTime bool + hasSyncBlockTime bool + hasGCMarkAssistTime bool + ) + for _, summary := range summaries { + basicSummaryChecks(t, summary) + hasSchedWaitTime = hasSchedWaitTime || summary.SchedWaitTime > 0 + if dt, ok := summary.BlockTimeByReason["sync"]; ok && dt > 0 { + hasSyncBlockTime = true + } + if dt, ok := summary.RangeTime["GC mark assist"]; ok && dt > 0 { + hasGCMarkAssistTime = true + } + } + if !hasSchedWaitTime { + t.Error("missing sched wait time") + } + if !hasSyncBlockTime { + t.Error("missing sync block time") + } + if !hasGCMarkAssistTime { + t.Error("missing GC mark assist time") + } +} + +func TestSummarizeGoroutinesRegionsTrace(t *testing.T) { + summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations.test") + type region struct { + startKind tracev2.EventKind + endKind tracev2.EventKind + } + wantRegions := map[string]region{ + // N.B. "pre-existing region" never even makes it into the trace. + // + // TODO(mknyszek): Add test case for end-without-a-start, which can happen at + // a generation split only. + "": {tracev2.EventStateTransition, tracev2.EventStateTransition}, // Task inheritance marker. + "task0 region": {tracev2.EventRegionBegin, tracev2.EventBad}, + "region0": {tracev2.EventRegionBegin, tracev2.EventRegionEnd}, + "region1": {tracev2.EventRegionBegin, tracev2.EventRegionEnd}, + "unended region": {tracev2.EventRegionBegin, tracev2.EventStateTransition}, + "post-existing region": {tracev2.EventRegionBegin, tracev2.EventBad}, + } + for _, summary := range summaries { + basicSummaryChecks(t, summary) + for _, region := range summary.Regions { + want, ok := wantRegions[region.Name] + if !ok { + continue + } + checkRegionEvents(t, want.startKind, want.endKind, summary.ID, region) + delete(wantRegions, region.Name) + } + } + if len(wantRegions) != 0 { + t.Errorf("failed to find regions: %#v", wantRegions) + } +} + +func basicSummaryChecks(t *testing.T, summary *GoroutineSummary) { + if summary.ID == tracev2.NoGoroutine { + t.Error("summary found for no goroutine") + return + } + if (summary.StartTime != 0 && summary.CreationTime > summary.StartTime) || + (summary.StartTime != 0 && summary.EndTime != 0 && summary.StartTime > summary.EndTime) { + t.Errorf("bad summary creation/start/end times for G %d: creation=%d start=%d end=%d", summary.ID, summary.CreationTime, summary.StartTime, summary.EndTime) + } + if (summary.PC != 0 && summary.Name == "") || (summary.PC == 0 && summary.Name != "") { + t.Errorf("bad name and/or PC for G %d: pc=0x%x name=%q", summary.ID, summary.PC, summary.Name) + } + basicGoroutineExecStatsChecks(t, &summary.GoroutineExecStats) + for _, region := range summary.Regions { + basicGoroutineExecStatsChecks(t, ®ion.GoroutineExecStats) + } +} + +func summarizeTraceTest(t *testing.T, testPath string) map[tracev2.GoID]*GoroutineSummary { + r, _, err := testtrace.ParseFile(testPath) + if err != nil { + t.Fatalf("malformed test %s: bad trace file: %v", testPath, err) + } + summaries, err := SummarizeGoroutines(r) + if err != nil { + t.Fatalf("failed to process trace %s: %v", testPath, err) + } + return summaries +} + +func checkRegionEvents(t *testing.T, wantStart, wantEnd tracev2.EventKind, goid tracev2.GoID, region *UserRegionSummary) { + switch wantStart { + case tracev2.EventBad: + if region.Start != nil { + t.Errorf("expected nil region start event, got\n%s", region.Start.String()) + } + case tracev2.EventStateTransition, tracev2.EventRegionBegin: + if region.Start == nil { + t.Error("expected non-nil region start event, got nil") + } + kind := region.Start.Kind() + if kind != wantStart { + t.Errorf("wanted region start event %s, got %s", wantStart, kind) + } + if kind == tracev2.EventRegionBegin { + if region.Start.Region().Type != region.Name { + t.Errorf("region name mismatch: event has %s, summary has %s", region.Start.Region().Type, region.Name) + } + } else { + st := region.Start.StateTransition() + if st.Resource.Kind != tracev2.ResourceGoroutine { + t.Errorf("found region start event for the wrong resource: %s", st.Resource) + } + if st.Resource.Goroutine() != goid { + t.Errorf("found region start event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource) + } + if old, _ := st.Goroutine(); old != tracev2.GoNotExist && old != tracev2.GoUndetermined { + t.Errorf("expected transition from GoNotExist or GoUndetermined, got transition from %s instead", old) + } + } + default: + t.Errorf("unexpected want start event type: %s", wantStart) + } + + switch wantEnd { + case tracev2.EventBad: + if region.End != nil { + t.Errorf("expected nil region end event, got\n%s", region.End.String()) + } + case tracev2.EventStateTransition, tracev2.EventRegionEnd: + if region.End == nil { + t.Error("expected non-nil region end event, got nil") + } + kind := region.End.Kind() + if kind != wantEnd { + t.Errorf("wanted region end event %s, got %s", wantEnd, kind) + } + if kind == tracev2.EventRegionEnd { + if region.End.Region().Type != region.Name { + t.Errorf("region name mismatch: event has %s, summary has %s", region.End.Region().Type, region.Name) + } + } else { + st := region.End.StateTransition() + if st.Resource.Kind != tracev2.ResourceGoroutine { + t.Errorf("found region end event for the wrong resource: %s", st.Resource) + } + if st.Resource.Goroutine() != goid { + t.Errorf("found region end event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource) + } + if _, new := st.Goroutine(); new != tracev2.GoNotExist { + t.Errorf("expected transition to GoNotExist, got transition to %s instead", new) + } + } + default: + t.Errorf("unexpected want end event type: %s", wantEnd) + } +} + +func basicGoroutineExecStatsChecks(t *testing.T, stats *GoroutineExecStats) { + if stats.ExecTime < 0 { + t.Error("found negative ExecTime") + } + if stats.SchedWaitTime < 0 { + t.Error("found negative SchedWaitTime") + } + if stats.SyscallTime < 0 { + t.Error("found negative SyscallTime") + } + if stats.SyscallBlockTime < 0 { + t.Error("found negative SyscallBlockTime") + } + if stats.TotalTime < 0 { + t.Error("found negative TotalTime") + } + for reason, dt := range stats.BlockTimeByReason { + if dt < 0 { + t.Errorf("found negative BlockTimeByReason for %s", reason) + } + } + for name, dt := range stats.RangeTime { + if dt < 0 { + t.Errorf("found negative RangeTime for range %s", name) + } + } +} + +func TestRelatedGoroutinesV2Trace(t *testing.T) { + testPath := "v2/testdata/tests/go122-gc-stress.test" + r, _, err := testtrace.ParseFile(testPath) + if err != nil { + t.Fatalf("malformed test %s: bad trace file: %v", testPath, err) + } + targetg := tracev2.GoID(86) + got, err := RelatedGoroutinesV2(r, targetg) + if err != nil { + t.Fatalf("failed to find related goroutines for %s: %v", testPath, err) + } + want := map[tracev2.GoID]struct{}{ + tracev2.GoID(86): struct{}{}, // N.B. Result includes target. + tracev2.GoID(85): struct{}{}, + tracev2.GoID(111): struct{}{}, + } + for goid := range got { + if _, ok := want[goid]; ok { + delete(want, goid) + } else { + t.Errorf("unexpected goroutine %d found in related goroutines for %d in test %s", goid, targetg, testPath) + } + } + if len(want) != 0 { + for goid := range want { + t.Errorf("failed to find related goroutine %d for goroutine %d in test %s", goid, targetg, testPath) + } + } +} diff --git a/src/internal/trace/v2/testdata/testprog/annotations.go b/src/internal/trace/v2/testdata/testprog/annotations.go index 33180d1204..2507bc4d38 100644 --- a/src/internal/trace/v2/testdata/testprog/annotations.go +++ b/src/internal/trace/v2/testdata/testprog/annotations.go @@ -31,11 +31,15 @@ func main() { // Beginning of traced execution. var wg sync.WaitGroup ctx, task := trace.NewTask(bgctx, "task0") // EvUserTaskCreate("task0") + trace.StartRegion(ctx, "task0 region") + wg.Add(1) go func() { defer wg.Done() defer task.End() // EvUserTaskEnd("task0") + trace.StartRegion(ctx, "unended region") + trace.WithRegion(ctx, "region0", func() { // EvUserRegionBegin("region0", start) trace.WithRegion(ctx, "region1", func() { diff --git a/src/internal/trace/v2/testdata/tests/go122-annotations.test b/src/internal/trace/v2/testdata/tests/go122-annotations.test new file mode 100644 index 0000000000..ab4b92352d --- /dev/null +++ b/src/internal/trace/v2/testdata/tests/go122-annotations.test @@ -0,0 +1,297 @@ +-- expect -- +SUCCESS +-- trace -- +Trace Go1.22 +EventBatch gen=1 m=18446744073709551615 time=23991883811065 size=5 +Frequency freq=15625000 +EventBatch gen=1 m=1331278 time=23991883808413 size=16 +ProcStart dt=297 p=2 p_seq=1 +GoStart dt=162 g=7 g_seq=1 +HeapAlloc dt=267 heapalloc_value=1908736 +EventBatch gen=1 m=1331277 time=23991883807353 size=28 +ProcStart dt=472 p=1 p_seq=1 +GoStart dt=309 g=6 g_seq=1 +HeapAlloc dt=533 heapalloc_value=1892352 +HeapAlloc dt=47 heapalloc_value=1900544 +GoBlock dt=2071 reason_string=12 stack=23 +ProcStop dt=54 +EventBatch gen=1 m=1331276 time=23991883809162 size=10 +ProcStart dt=426 p=3 p_seq=1 +HeapAlloc dt=108 heapalloc_value=1916928 +EventBatch gen=1 m=1331274 time=23991883805418 size=325 +ProcStatus dt=241 p=0 pstatus=1 +GoStatus dt=5 g=1 m=1331274 gstatus=2 +ProcsChange dt=486 procs_value=48 stack=1 +STWBegin dt=80 kind_string=21 stack=2 +HeapGoal dt=3 heapgoal_value=4194304 +ProcStatus dt=3 p=1 pstatus=2 +ProcStatus dt=2 p=2 pstatus=2 +ProcStatus dt=1 p=3 pstatus=2 +ProcStatus dt=1 p=4 pstatus=2 +ProcStatus dt=1 p=5 pstatus=2 +ProcStatus dt=1 p=6 pstatus=2 +ProcStatus dt=1 p=7 pstatus=2 +ProcStatus dt=1 p=8 pstatus=2 +ProcStatus dt=1 p=9 pstatus=2 +ProcStatus dt=1 p=10 pstatus=2 +ProcStatus dt=1 p=11 pstatus=2 +ProcStatus dt=1 p=12 pstatus=2 +ProcStatus dt=1 p=13 pstatus=2 +ProcStatus dt=1 p=14 pstatus=2 +ProcStatus dt=1 p=15 pstatus=2 +ProcStatus dt=1 p=16 pstatus=2 +ProcStatus dt=1 p=17 pstatus=2 +ProcStatus dt=1 p=18 pstatus=2 +ProcStatus dt=1 p=19 pstatus=2 +ProcStatus dt=1 p=20 pstatus=2 +ProcStatus dt=1 p=21 pstatus=2 +ProcStatus dt=1 p=22 pstatus=2 +ProcStatus dt=1 p=23 pstatus=2 +ProcStatus dt=1 p=24 pstatus=2 +ProcStatus dt=1 p=25 pstatus=2 +ProcStatus dt=1 p=26 pstatus=2 +ProcStatus dt=1 p=27 pstatus=2 +ProcStatus dt=1 p=28 pstatus=2 +ProcStatus dt=1 p=29 pstatus=2 +ProcStatus dt=1 p=30 pstatus=2 +ProcStatus dt=1 p=31 pstatus=2 +ProcStatus dt=1 p=32 pstatus=2 +ProcStatus dt=1 p=33 pstatus=2 +ProcStatus dt=1 p=34 pstatus=2 +ProcStatus dt=1 p=35 pstatus=2 +ProcStatus dt=1 p=36 pstatus=2 +ProcStatus dt=1 p=37 pstatus=2 +ProcStatus dt=1 p=38 pstatus=2 +ProcStatus dt=1 p=39 pstatus=2 +ProcStatus dt=1 p=40 pstatus=2 +ProcStatus dt=1 p=41 pstatus=2 +ProcStatus dt=1 p=42 pstatus=2 +ProcStatus dt=1 p=43 pstatus=2 +ProcStatus dt=1 p=44 pstatus=2 +ProcStatus dt=1 p=45 pstatus=2 +ProcStatus dt=1 p=46 pstatus=2 +ProcStatus dt=1 p=47 pstatus=2 +ProcsChange dt=9 procs_value=48 stack=3 +STWEnd dt=209 +GoCreate dt=376 new_g=6 new_stack=4 stack=5 +GoCreate dt=96 new_g=7 new_stack=6 stack=7 +GoCreate dt=574 new_g=8 new_stack=8 stack=9 +UserTaskBegin dt=19 task=1 parent_task=0 name_string=22 stack=10 +UserRegionBegin dt=19 task=1 name_string=23 stack=11 +HeapAlloc dt=27 heapalloc_value=1884160 +GoCreate dt=578 new_g=9 new_stack=12 stack=13 +GoBlock dt=67 reason_string=10 stack=14 +GoStart dt=13 g=9 g_seq=1 +UserRegionBegin dt=61 task=1 name_string=24 stack=15 +UserRegionBegin dt=8 task=1 name_string=25 stack=16 +UserRegionBegin dt=6 task=1 name_string=26 stack=17 +UserLog dt=780 task=1 key_string=27 value_string=28 stack=18 +UserRegionEnd dt=9 task=1 name_string=26 stack=19 +UserRegionEnd dt=504 task=1 name_string=25 stack=20 +UserTaskEnd dt=425 task=1 stack=21 +GoUnblock dt=503 g=1 g_seq=1 stack=22 +GoDestroy dt=7 +GoStart dt=13 g=1 g_seq=2 +UserRegionBegin dt=367 task=0 name_string=29 stack=24 +EventBatch gen=1 m=18446744073709551615 time=23991883811600 size=57 +GoStatus dt=358 g=2 m=18446744073709551615 gstatus=4 +GoStatus dt=3 g=3 m=18446744073709551615 gstatus=4 +GoStatus dt=2 g=4 m=18446744073709551615 gstatus=4 +GoStatus dt=1 g=5 m=18446744073709551615 gstatus=4 +EventBatch gen=1 m=18446744073709551615 time=23991883812197 size=461 +Stacks +Stack id=8 nframes=1 + pc=4812576 func=30 file=31 line=128 +Stack id=13 nframes=1 + pc=4813540 func=32 file=33 line=37 +Stack id=3 nframes=4 + pc=4421156 func=34 file=35 line=1360 + pc=4537289 func=36 file=37 line=246 + pc=4812325 func=38 file=31 line=125 + pc=4813276 func=32 file=33 line=27 +Stack id=2 nframes=3 + pc=4537037 func=36 file=37 line=229 + pc=4812325 func=38 file=31 line=125 + pc=4813276 func=32 file=33 line=27 +Stack id=16 nframes=1 + pc=4813956 func=39 file=33 line=43 +Stack id=19 nframes=3 + pc=4814128 func=40 file=33 line=45 + pc=4811708 func=41 file=42 line=141 + pc=4813956 func=39 file=33 line=43 +Stack id=10 nframes=1 + pc=4813380 func=32 file=33 line=33 +Stack id=1 nframes=4 + pc=4551915 func=43 file=44 line=248 + pc=4537017 func=36 file=37 line=228 + pc=4812325 func=38 file=31 line=125 + pc=4813276 func=32 file=33 line=27 +Stack id=7 nframes=4 + pc=4543253 func=45 file=37 line=831 + pc=4537306 func=36 file=37 line=249 + pc=4812325 func=38 file=31 line=125 + pc=4813276 func=32 file=33 line=27 +Stack id=11 nframes=1 + pc=4813412 func=32 file=33 line=34 +Stack id=4 nframes=1 + pc=4545952 func=46 file=47 line=42 +Stack id=5 nframes=4 + pc=4545861 func=48 file=47 line=42 + pc=4537294 func=36 file=37 line=248 + pc=4812325 func=38 file=31 line=125 + pc=4813276 func=32 file=33 line=27 +Stack id=17 nframes=3 + pc=4814128 func=40 file=33 line=45 + pc=4811708 func=41 file=42 line=141 + pc=4813956 func=39 file=33 line=43 +Stack id=24 nframes=1 + pc=4813616 func=32 file=33 line=54 +Stack id=21 nframes=2 + pc=4814276 func=49 file=42 line=80 + pc=4813974 func=39 file=33 line=50 +Stack id=12 nframes=1 + pc=4813728 func=39 file=33 line=37 +Stack id=14 nframes=2 + pc=4640519 func=50 file=51 line=116 + pc=4813550 func=32 file=33 line=51 +Stack id=9 nframes=2 + pc=4812399 func=38 file=31 line=128 + pc=4813276 func=32 file=33 line=27 +Stack id=22 nframes=3 + pc=4640260 func=52 file=51 line=81 + pc=4814374 func=53 file=51 line=87 + pc=4813989 func=39 file=33 line=50 +Stack id=20 nframes=1 + pc=4813956 func=39 file=33 line=43 +Stack id=6 nframes=1 + pc=4543296 func=54 file=37 line=831 +Stack id=15 nframes=1 + pc=4813886 func=39 file=33 line=41 +Stack id=18 nframes=5 + pc=4814195 func=55 file=33 line=46 + pc=4811708 func=41 file=42 line=141 + pc=4814128 func=40 file=33 line=45 + pc=4811708 func=41 file=42 line=141 + pc=4813956 func=39 file=33 line=43 +Stack id=23 nframes=3 + pc=4217457 func=56 file=57 line=442 + pc=4543501 func=58 file=37 line=881 + pc=4546010 func=46 file=47 line=54 +EventBatch gen=1 m=18446744073709551615 time=23991883804190 size=1620 +Strings +String id=1 + data="Not worker" +String id=2 + data="GC (dedicated)" +String id=3 + data="GC (fractional)" +String id=4 + data="GC (idle)" +String id=5 + data="unspecified" +String id=6 + data="forever" +String id=7 + data="network" +String id=8 + data="select" +String id=9 + data="sync.(*Cond).Wait" +String id=10 + data="sync" +String id=11 + data="chan send" +String id=12 + data="chan receive" +String id=13 + data="GC mark assist wait for work" +String id=14 + data="GC background sweeper wait" +String id=15 + data="system goroutine wait" +String id=16 + data="preempted" +String id=17 + data="wait for debug call" +String id=18 + data="wait until GC ends" +String id=19 + data="sleep" +String id=20 + data="runtime.GoSched" +String id=21 + data="start trace" +String id=22 + data="task0" +String id=23 + data="task0 region" +String id=24 + data="unended region" +String id=25 + data="region0" +String id=26 + data="region1" +String id=27 + data="key0" +String id=28 + data="0123456789abcdef" +String id=29 + data="post-existing region" +String id=30 + data="runtime/trace.Start.func1" +String id=31 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/trace.go" +String id=32 + data="main.main" +String id=33 + data="/usr/local/google/home/mknyszek/work/go-1/src/internal/trace/v2/testdata/testprog/annotations.go" +String id=34 + data="runtime.startTheWorld" +String id=35 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/proc.go" +String id=36 + data="runtime.StartTrace" +String id=37 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2.go" +String id=38 + data="runtime/trace.Start" +String id=39 + data="main.main.func1" +String id=40 + data="main.main.func1.1" +String id=41 + data="runtime/trace.WithRegion" +String id=42 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/annotation.go" +String id=43 + data="runtime.traceLocker.Gomaxprocs" +String id=44 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2runtime.go" +String id=45 + data="runtime.(*traceAdvancerState).start" +String id=46 + data="runtime.traceStartReadCPU.func1" +String id=47 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2cpu.go" +String id=48 + data="runtime.traceStartReadCPU" +String id=49 + data="runtime/trace.(*Task).End" +String id=50 + data="sync.(*WaitGroup).Wait" +String id=51 + data="/usr/local/google/home/mknyszek/work/go-1/src/sync/waitgroup.go" +String id=52 + data="sync.(*WaitGroup).Add" +String id=53 + data="sync.(*WaitGroup).Done" +String id=54 + data="runtime.(*traceAdvancerState).start.func1" +String id=55 + data="main.main.func1.1.1" +String id=56 + data="runtime.chanrecv1" +String id=57 + data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/chan.go" +String id=58 + data="runtime.(*wakeableSleep).sleep"