]> Cypherpunks.ru repositories - gostls13.git/commitdiff
internal/trace: implement goroutine analysis for v2 traces
authorMichael Anthony Knyszek <mknyszek@google.com>
Tue, 7 Nov 2023 18:35:37 +0000 (18:35 +0000)
committerMichael Knyszek <mknyszek@google.com>
Fri, 10 Nov 2023 15:51:33 +0000 (15:51 +0000)
For #63960.

Change-Id: I1efe35435e32623aba894a915114e394570ebc56
Reviewed-on: https://go-review.googlesource.com/c/go/+/540259
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/internal/trace/goroutinesv2.go [new file with mode: 0644]
src/internal/trace/goroutinesv2_test.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/annotations.go
src/internal/trace/v2/testdata/tests/go122-annotations.test [new file with mode: 0644]

diff --git a/src/internal/trace/goroutinesv2.go b/src/internal/trace/goroutinesv2.go
new file mode 100644 (file)
index 0000000..6b13f20
--- /dev/null
@@ -0,0 +1,555 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       tracev2 "internal/trace/v2"
+       "io"
+       "sort"
+       "time"
+)
+
+// GoroutineSummary contains statistics and execution details of a single goroutine.
+// (For v2 traces.)
+type GoroutineSummary struct {
+       ID           tracev2.GoID
+       Name         string       // A non-unique human-friendly identifier for the goroutine.
+       PC           uint64       // The start PC of the goroutine.
+       CreationTime tracev2.Time // Timestamp of the first appearance in the trace.
+       StartTime    tracev2.Time // Timestamp of the first time it started running. 0 if the goroutine never ran.
+       EndTime      tracev2.Time // Timestamp of when the goroutine exited. 0 if the goroutine never exited.
+
+       // List of regions in the goroutine, sorted based on the start time.
+       Regions []*UserRegionSummary
+
+       // Statistics of execution time during the goroutine execution.
+       GoroutineExecStats
+
+       // goroutineSummary is state used just for computing this structure.
+       // It's dropped before being returned to the caller.
+       //
+       // More specifically, if it's nil, it indicates that this summary has
+       // already been finalized.
+       *goroutineSummary
+}
+
+// UserRegionSummary represents a region and goroutine execution stats
+// while the region was active. (For v2 traces.)
+type UserRegionSummary struct {
+       TaskID tracev2.TaskID
+       Name   string
+
+       // Region start event. Normally EventRegionBegin event or nil,
+       // but can be a state transition event from NotExist or Undetermined
+       // if the region is a synthetic region representing task inheritance
+       // from the parent goroutine.
+       Start *tracev2.Event
+
+       // Region end event. Normally EventRegionEnd event or nil,
+       // but can be a state transition event to NotExist if the goroutine
+       // terminated without explicitly ending the region.
+       End *tracev2.Event
+
+       GoroutineExecStats
+}
+
+// GoroutineExecStats contains statistics about a goroutine's execution
+// during a period of time.
+type GoroutineExecStats struct {
+       ExecTime          time.Duration
+       SchedWaitTime     time.Duration
+       BlockTimeByReason map[string]time.Duration
+       SyscallTime       time.Duration
+       SyscallBlockTime  time.Duration
+       RangeTime         map[string]time.Duration
+       TotalTime         time.Duration
+}
+
+// sub returns the stats v-s.
+func (s GoroutineExecStats) sub(v GoroutineExecStats) (r GoroutineExecStats) {
+       r = s.clone()
+       r.ExecTime -= v.ExecTime
+       r.SchedWaitTime -= v.SchedWaitTime
+       for reason := range s.BlockTimeByReason {
+               r.BlockTimeByReason[reason] -= v.BlockTimeByReason[reason]
+       }
+       r.SyscallTime -= v.SyscallTime
+       r.SyscallBlockTime -= v.SyscallBlockTime
+       r.TotalTime -= v.TotalTime
+       for name := range s.RangeTime {
+               r.RangeTime[name] -= v.RangeTime[name]
+       }
+       return r
+}
+
+func (s GoroutineExecStats) clone() (r GoroutineExecStats) {
+       r = s
+       r.BlockTimeByReason = make(map[string]time.Duration)
+       for reason, dt := range s.BlockTimeByReason {
+               r.BlockTimeByReason[reason] = dt
+       }
+       r.RangeTime = make(map[string]time.Duration)
+       for name, dt := range s.RangeTime {
+               r.RangeTime[name] = dt
+       }
+       return r
+}
+
+// snapshotStat returns the snapshot of the goroutine execution statistics.
+// This is called as we process the ordered trace event stream. lastTs is used
+// to process pending statistics if this is called before any goroutine end event.
+func (g *GoroutineSummary) snapshotStat(lastTs tracev2.Time) (ret GoroutineExecStats) {
+       ret = g.GoroutineExecStats.clone()
+
+       if g.goroutineSummary == nil {
+               return ret // Already finalized; no pending state.
+       }
+
+       // Set the total time if necessary.
+       if g.TotalTime == 0 {
+               ret.TotalTime = lastTs.Sub(g.CreationTime)
+       }
+
+       // Add in time since lastTs.
+       if g.lastStartTime != 0 {
+               ret.ExecTime += lastTs.Sub(g.lastStartTime)
+       }
+       if g.lastRunnableTime != 0 {
+               ret.SchedWaitTime += lastTs.Sub(g.lastRunnableTime)
+       }
+       if g.lastBlockTime != 0 {
+               ret.BlockTimeByReason[g.lastBlockReason] += lastTs.Sub(g.lastBlockTime)
+       }
+       if g.lastSyscallTime != 0 {
+               ret.SyscallTime += lastTs.Sub(g.lastSyscallTime)
+       }
+       if g.lastSyscallBlockTime != 0 {
+               ret.SchedWaitTime += lastTs.Sub(g.lastSyscallBlockTime)
+       }
+       for name, ts := range g.lastRangeTime {
+               ret.RangeTime[name] += lastTs.Sub(ts)
+       }
+       return ret
+}
+
+// finalize is called when processing a goroutine end event or at
+// the end of trace processing. This finalizes the execution stat
+// and any active regions in the goroutine, in which case trigger is nil.
+func (g *GoroutineSummary) finalize(lastTs tracev2.Time, trigger *tracev2.Event) {
+       if trigger != nil {
+               g.EndTime = trigger.Time()
+       }
+       finalStat := g.snapshotStat(lastTs)
+
+       g.GoroutineExecStats = finalStat
+
+       // System goroutines are never part of regions, even though they
+       // "inherit" a task due to creation (EvGoCreate) from within a region.
+       // This may happen e.g. if the first GC is triggered within a region,
+       // starting the GC worker goroutines.
+       if !IsSystemGoroutine(g.Name) {
+               for _, s := range g.activeRegions {
+                       s.End = trigger
+                       s.GoroutineExecStats = finalStat.sub(s.GoroutineExecStats)
+                       g.Regions = append(g.Regions, s)
+               }
+       }
+       *(g.goroutineSummary) = goroutineSummary{}
+}
+
+// goroutineSummary is a private part of GoroutineSummary that is required only during analysis.
+type goroutineSummary struct {
+       lastStartTime        tracev2.Time
+       lastRunnableTime     tracev2.Time
+       lastBlockTime        tracev2.Time
+       lastBlockReason      string
+       lastSyscallTime      tracev2.Time
+       lastSyscallBlockTime tracev2.Time
+       lastRangeTime        map[string]tracev2.Time
+       activeRegions        []*UserRegionSummary // stack of active regions
+}
+
+// SummarizeGoroutines generates statistics for all goroutines in the trace.
+func SummarizeGoroutines(trace io.Reader) (map[tracev2.GoID]*GoroutineSummary, error) {
+       // Create the analysis state.
+       b := goroutineStatsBuilder{
+               gs:          make(map[tracev2.GoID]*GoroutineSummary),
+               syscallingP: make(map[tracev2.ProcID]tracev2.GoID),
+               syscallingG: make(map[tracev2.GoID]tracev2.ProcID),
+               rangesP:     make(map[rangeP]tracev2.GoID),
+       }
+
+       // Process the trace.
+       r, err := tracev2.NewReader(trace)
+       if err != nil {
+               return nil, err
+       }
+       for {
+               ev, err := r.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       return nil, err
+               }
+               b.event(ev)
+       }
+       return b.finalize(), nil
+}
+
+// goroutineStatsBuilder constructs per-goroutine time statistics for v2 traces.
+type goroutineStatsBuilder struct {
+       // gs contains the map of goroutine summaries we're building up to return to the caller.
+       gs map[tracev2.GoID]*GoroutineSummary
+
+       // syscallingP and syscallingG represent a binding between a P and G in a syscall.
+       // Used to correctly identify and clean up after syscalls (blocking or otherwise).
+       syscallingP map[tracev2.ProcID]tracev2.GoID
+       syscallingG map[tracev2.GoID]tracev2.ProcID
+
+       // rangesP is used for optimistic tracking of P-based ranges for goroutines.
+       //
+       // It's a best-effort mapping of an active range on a P to the goroutine we think
+       // is associated with it.
+       rangesP map[rangeP]tracev2.GoID
+
+       lastTs tracev2.Time // timestamp of the last event processed.
+       syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace).
+}
+
+type rangeP struct {
+       id   tracev2.ProcID
+       name string
+}
+
+// event feeds a single event into the stats builder.
+func (b *goroutineStatsBuilder) event(ev tracev2.Event) {
+       if b.syncTs == 0 {
+               b.syncTs = ev.Time()
+       }
+       b.lastTs = ev.Time()
+
+       switch ev.Kind() {
+       // Record sync time for the RangeActive events.
+       case tracev2.EventSync:
+               b.syncTs = ev.Time()
+
+       // Handle state transitions.
+       case tracev2.EventStateTransition:
+               st := ev.StateTransition()
+               switch st.Resource.Kind {
+               // Handle goroutine transitions, which are the meat of this computation.
+               case tracev2.ResourceGoroutine:
+                       id := st.Resource.Goroutine()
+                       old, new := st.Goroutine()
+                       if old == new {
+                               // Skip these events; they're not telling us anything new.
+                               break
+                       }
+
+                       // Handle transition out.
+                       g := b.gs[id]
+                       switch old {
+                       case tracev2.GoUndetermined, tracev2.GoNotExist:
+                               g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
+                               // If we're coming out of GoUndetermined, then the creation time is the
+                               // time of the last sync.
+                               if old == tracev2.GoUndetermined {
+                                       g.CreationTime = b.syncTs
+                               } else {
+                                       g.CreationTime = ev.Time()
+                               }
+                               // The goroutine is being created, or it's being named for the first time.
+                               g.lastRangeTime = make(map[string]tracev2.Time)
+                               g.BlockTimeByReason = make(map[string]time.Duration)
+                               g.RangeTime = make(map[string]time.Duration)
+
+                               // When a goroutine is newly created, inherit the task
+                               // of the active region. For ease handling of this
+                               // case, we create a fake region description with the
+                               // task id. This isn't strictly necessary as this
+                               // goroutine may not be associated with the task, but
+                               // it can be convenient to see all children created
+                               // during a region.
+                               //
+                               // N.B. ev.Goroutine() will always be NoGoroutine for the
+                               // Undetermined case, so this is will simply not fire.
+                               if creatorG := b.gs[ev.Goroutine()]; creatorG != nil && len(creatorG.activeRegions) > 0 {
+                                       regions := creatorG.activeRegions
+                                       s := regions[len(regions)-1]
+                                       if s.TaskID != tracev2.NoTask {
+                                               g.activeRegions = []*UserRegionSummary{{TaskID: s.TaskID, Start: &ev}}
+                                       }
+                               }
+                               b.gs[g.ID] = g
+                       case tracev2.GoRunning:
+                               // Record execution time as we transition out of running
+                               g.ExecTime += ev.Time().Sub(g.lastStartTime)
+                               g.lastStartTime = 0
+                       case tracev2.GoWaiting:
+                               // Record block time as we transition out of waiting.
+                               if g.lastBlockTime != 0 {
+                                       g.BlockTimeByReason[g.lastBlockReason] += ev.Time().Sub(g.lastBlockTime)
+                                       g.lastBlockTime = 0
+                               }
+                       case tracev2.GoRunnable:
+                               // Record sched latency time as we transition out of runnable.
+                               if g.lastRunnableTime != 0 {
+                                       g.SchedWaitTime += ev.Time().Sub(g.lastRunnableTime)
+                                       g.lastRunnableTime = 0
+                               }
+                       case tracev2.GoSyscall:
+                               // Record syscall execution time and syscall block time as we transition out of syscall.
+                               if g.lastSyscallTime != 0 {
+                                       if g.lastSyscallBlockTime != 0 {
+                                               g.SyscallBlockTime += ev.Time().Sub(g.lastSyscallBlockTime)
+                                               g.SyscallTime += g.lastSyscallBlockTime.Sub(g.lastSyscallTime)
+                                       } else {
+                                               g.SyscallTime += ev.Time().Sub(g.lastSyscallTime)
+                                       }
+                                       g.lastSyscallTime = 0
+                                       g.lastSyscallBlockTime = 0
+
+                                       // Clear the syscall map.
+                                       delete(b.syscallingP, b.syscallingG[id])
+                                       delete(b.syscallingG, id)
+                               }
+                       }
+
+                       // The goroutine hasn't been identified yet. Take any stack we
+                       // can get and identify it by the bottom-most frame of that stack.
+                       if g.PC == 0 {
+                               stk := ev.Stack()
+                               if stk != tracev2.NoStack {
+                                       var frame tracev2.StackFrame
+                                       var ok bool
+                                       stk.Frames(func(f tracev2.StackFrame) bool {
+                                               frame = f
+                                               ok = true
+                                               return false
+                                       })
+                                       if ok {
+                                               g.PC = frame.PC
+                                               g.Name = frame.Func
+                                       }
+                               }
+                       }
+
+                       // Handle transition in.
+                       switch new {
+                       case tracev2.GoRunning:
+                               // We started running. Record it.
+                               g.lastStartTime = ev.Time()
+                               if g.StartTime == 0 {
+                                       g.StartTime = ev.Time()
+                               }
+                       case tracev2.GoRunnable:
+                               g.lastRunnableTime = ev.Time()
+                       case tracev2.GoWaiting:
+                               if st.Reason != "forever" {
+                                       g.lastBlockTime = ev.Time()
+                                       g.lastBlockReason = st.Reason
+                                       break
+                               }
+                               // "Forever" is like goroutine death.
+                               fallthrough
+                       case tracev2.GoNotExist:
+                               g.finalize(ev.Time(), &ev)
+                       case tracev2.GoSyscall:
+                               b.syscallingP[ev.Proc()] = id
+                               b.syscallingG[id] = ev.Proc()
+                               g.lastSyscallTime = ev.Time()
+                       }
+
+               // Handle procs to detect syscall blocking, which si identifiable as a
+               // proc going idle while the goroutine it was attached to is in a syscall.
+               case tracev2.ResourceProc:
+                       id := st.Resource.Proc()
+                       old, new := st.Proc()
+                       if old != new && new == tracev2.ProcIdle {
+                               if goid, ok := b.syscallingP[id]; ok {
+                                       g := b.gs[goid]
+                                       g.lastSyscallBlockTime = ev.Time()
+                                       delete(b.syscallingP, id)
+                               }
+                       }
+               }
+
+       // Handle ranges of all kinds.
+       case tracev2.EventRangeBegin, tracev2.EventRangeActive:
+               r := ev.Range()
+               var g *GoroutineSummary
+               switch r.Scope.Kind {
+               case tracev2.ResourceGoroutine:
+                       // Simple goroutine range. We attribute the entire range regardless of
+                       // goroutine stats. Lots of situations are still identifiable, e.g. a
+                       // goroutine blocked often in mark assist will have both high mark assist
+                       // and high block times. Those interested in a deeper view can look at the
+                       // trace viewer.
+                       g = b.gs[r.Scope.Goroutine()]
+               case tracev2.ResourceProc:
+                       // N.B. These ranges are not actually bound to the goroutine, they're
+                       // bound to the P. But if we happen to be on the P the whole time, let's
+                       // try to attribute it to the goroutine. (e.g. GC sweeps are here.)
+                       g = b.gs[ev.Goroutine()]
+                       if g != nil {
+                               b.rangesP[rangeP{id: r.Scope.Proc(), name: r.Name}] = ev.Goroutine()
+                       }
+               }
+               if g == nil {
+                       break
+               }
+               if ev.Kind() == tracev2.EventRangeActive {
+                       if ts := g.lastRangeTime[r.Name]; ts != 0 {
+                               g.RangeTime[r.Name] += b.syncTs.Sub(ts)
+                       }
+                       g.lastRangeTime[r.Name] = b.syncTs
+               } else {
+                       g.lastRangeTime[r.Name] = ev.Time()
+               }
+       case tracev2.EventRangeEnd:
+               r := ev.Range()
+               var g *GoroutineSummary
+               switch r.Scope.Kind {
+               case tracev2.ResourceGoroutine:
+                       g = b.gs[r.Scope.Goroutine()]
+               case tracev2.ResourceProc:
+                       rp := rangeP{id: r.Scope.Proc(), name: r.Name}
+                       if goid, ok := b.rangesP[rp]; ok {
+                               if goid == ev.Goroutine() {
+                                       // As the comment in the RangeBegin case states, this is only OK
+                                       // if we finish on the same goroutine we started on.
+                                       g = b.gs[goid]
+                               }
+                               delete(b.rangesP, rp)
+                       }
+               }
+               if g == nil {
+                       break
+               }
+               ts := g.lastRangeTime[r.Name]
+               if ts == 0 {
+                       break
+               }
+               g.RangeTime[r.Name] += ev.Time().Sub(ts)
+               delete(g.lastRangeTime, r.Name)
+
+       // Handle user-defined regions.
+       case tracev2.EventRegionBegin:
+               g := b.gs[ev.Goroutine()]
+               r := ev.Region()
+               g.activeRegions = append(g.activeRegions, &UserRegionSummary{
+                       Name:               r.Type,
+                       TaskID:             r.Task,
+                       Start:              &ev,
+                       GoroutineExecStats: g.snapshotStat(ev.Time()),
+               })
+       case tracev2.EventRegionEnd:
+               g := b.gs[ev.Goroutine()]
+               r := ev.Region()
+               var sd *UserRegionSummary
+               if regionStk := g.activeRegions; len(regionStk) > 0 {
+                       // Pop the top region from the stack since that's what must have ended.
+                       n := len(regionStk)
+                       sd = regionStk[n-1]
+                       regionStk = regionStk[:n-1]
+                       g.activeRegions = regionStk
+               } else {
+                       // This is an "end" without a start. Just fabricate the region now.
+                       sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task}
+               }
+               sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats)
+               sd.End = &ev
+               g.Regions = append(g.Regions, sd)
+       }
+}
+
+// finalize indicates to the builder that we're done processing the trace.
+// It cleans up any remaining state and returns the full summary.
+func (b *goroutineStatsBuilder) finalize() map[tracev2.GoID]*GoroutineSummary {
+       for _, g := range b.gs {
+               g.finalize(b.lastTs, nil)
+
+               // Sort based on region start time.
+               sort.Slice(g.Regions, func(i, j int) bool {
+                       x := g.Regions[i].Start
+                       y := g.Regions[j].Start
+                       if x == nil {
+                               return true
+                       }
+                       if y == nil {
+                               return false
+                       }
+                       return x.Time() < y.Time()
+               })
+               g.goroutineSummary = nil
+       }
+       return b.gs
+}
+
+// RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces.
+// The association is based on whether they have synchronized with each other in the Go
+// scheduler (one has unblocked another).
+func RelatedGoroutinesV2(trace io.Reader, goid tracev2.GoID) (map[tracev2.GoID]struct{}, error) {
+       r, err := tracev2.NewReader(trace)
+       if err != nil {
+               return nil, err
+       }
+       // Process all the events, looking for transitions of goroutines
+       // out of GoWaiting. If there was an active goroutine when this
+       // happened, then we know that active goroutine unblocked another.
+       // Scribble all these down so we can process them.
+       type unblockEdge struct {
+               operator tracev2.GoID
+               operand  tracev2.GoID
+       }
+       var unblockEdges []unblockEdge
+       for {
+               ev, err := r.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       return nil, err
+               }
+               if ev.Goroutine() == tracev2.NoGoroutine {
+                       continue
+               }
+               if ev.Kind() != tracev2.EventStateTransition {
+                       continue
+               }
+               st := ev.StateTransition()
+               if st.Resource.Kind != tracev2.ResourceGoroutine {
+                       continue
+               }
+               id := st.Resource.Goroutine()
+               old, new := st.Goroutine()
+               if old == new || old != tracev2.GoWaiting {
+                       continue
+               }
+               unblockEdges = append(unblockEdges, unblockEdge{
+                       operator: ev.Goroutine(),
+                       operand:  id,
+               })
+       }
+       // Compute the transitive closure of depth 2 of goroutines that have unblocked each other
+       // (starting from goid).
+       gmap := make(map[tracev2.GoID]struct{})
+       gmap[goid] = struct{}{}
+       for i := 0; i < 2; i++ {
+               // Copy the map.
+               gmap1 := make(map[tracev2.GoID]struct{})
+               for g := range gmap {
+                       gmap1[g] = struct{}{}
+               }
+               for _, edge := range unblockEdges {
+                       if _, ok := gmap[edge.operand]; ok {
+                               gmap1[edge.operator] = struct{}{}
+                       }
+               }
+               gmap = gmap1
+       }
+       return gmap, nil
+}
diff --git a/src/internal/trace/goroutinesv2_test.go b/src/internal/trace/goroutinesv2_test.go
new file mode 100644 (file)
index 0000000..5ab3726
--- /dev/null
@@ -0,0 +1,229 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       tracev2 "internal/trace/v2"
+       "internal/trace/v2/testtrace"
+       "testing"
+)
+
+func TestSummarizeGoroutinesTrace(t *testing.T) {
+       summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-gc-stress.test")
+       var (
+               hasSchedWaitTime    bool
+               hasSyncBlockTime    bool
+               hasGCMarkAssistTime bool
+       )
+       for _, summary := range summaries {
+               basicSummaryChecks(t, summary)
+               hasSchedWaitTime = hasSchedWaitTime || summary.SchedWaitTime > 0
+               if dt, ok := summary.BlockTimeByReason["sync"]; ok && dt > 0 {
+                       hasSyncBlockTime = true
+               }
+               if dt, ok := summary.RangeTime["GC mark assist"]; ok && dt > 0 {
+                       hasGCMarkAssistTime = true
+               }
+       }
+       if !hasSchedWaitTime {
+               t.Error("missing sched wait time")
+       }
+       if !hasSyncBlockTime {
+               t.Error("missing sync block time")
+       }
+       if !hasGCMarkAssistTime {
+               t.Error("missing GC mark assist time")
+       }
+}
+
+func TestSummarizeGoroutinesRegionsTrace(t *testing.T) {
+       summaries := summarizeTraceTest(t, "v2/testdata/tests/go122-annotations.test")
+       type region struct {
+               startKind tracev2.EventKind
+               endKind   tracev2.EventKind
+       }
+       wantRegions := map[string]region{
+               // N.B. "pre-existing region" never even makes it into the trace.
+               //
+               // TODO(mknyszek): Add test case for end-without-a-start, which can happen at
+               // a generation split only.
+               "":                     {tracev2.EventStateTransition, tracev2.EventStateTransition}, // Task inheritance marker.
+               "task0 region":         {tracev2.EventRegionBegin, tracev2.EventBad},
+               "region0":              {tracev2.EventRegionBegin, tracev2.EventRegionEnd},
+               "region1":              {tracev2.EventRegionBegin, tracev2.EventRegionEnd},
+               "unended region":       {tracev2.EventRegionBegin, tracev2.EventStateTransition},
+               "post-existing region": {tracev2.EventRegionBegin, tracev2.EventBad},
+       }
+       for _, summary := range summaries {
+               basicSummaryChecks(t, summary)
+               for _, region := range summary.Regions {
+                       want, ok := wantRegions[region.Name]
+                       if !ok {
+                               continue
+                       }
+                       checkRegionEvents(t, want.startKind, want.endKind, summary.ID, region)
+                       delete(wantRegions, region.Name)
+               }
+       }
+       if len(wantRegions) != 0 {
+               t.Errorf("failed to find regions: %#v", wantRegions)
+       }
+}
+
+func basicSummaryChecks(t *testing.T, summary *GoroutineSummary) {
+       if summary.ID == tracev2.NoGoroutine {
+               t.Error("summary found for no goroutine")
+               return
+       }
+       if (summary.StartTime != 0 && summary.CreationTime > summary.StartTime) ||
+               (summary.StartTime != 0 && summary.EndTime != 0 && summary.StartTime > summary.EndTime) {
+               t.Errorf("bad summary creation/start/end times for G %d: creation=%d start=%d end=%d", summary.ID, summary.CreationTime, summary.StartTime, summary.EndTime)
+       }
+       if (summary.PC != 0 && summary.Name == "") || (summary.PC == 0 && summary.Name != "") {
+               t.Errorf("bad name and/or PC for G %d: pc=0x%x name=%q", summary.ID, summary.PC, summary.Name)
+       }
+       basicGoroutineExecStatsChecks(t, &summary.GoroutineExecStats)
+       for _, region := range summary.Regions {
+               basicGoroutineExecStatsChecks(t, &region.GoroutineExecStats)
+       }
+}
+
+func summarizeTraceTest(t *testing.T, testPath string) map[tracev2.GoID]*GoroutineSummary {
+       r, _, err := testtrace.ParseFile(testPath)
+       if err != nil {
+               t.Fatalf("malformed test %s: bad trace file: %v", testPath, err)
+       }
+       summaries, err := SummarizeGoroutines(r)
+       if err != nil {
+               t.Fatalf("failed to process trace %s: %v", testPath, err)
+       }
+       return summaries
+}
+
+func checkRegionEvents(t *testing.T, wantStart, wantEnd tracev2.EventKind, goid tracev2.GoID, region *UserRegionSummary) {
+       switch wantStart {
+       case tracev2.EventBad:
+               if region.Start != nil {
+                       t.Errorf("expected nil region start event, got\n%s", region.Start.String())
+               }
+       case tracev2.EventStateTransition, tracev2.EventRegionBegin:
+               if region.Start == nil {
+                       t.Error("expected non-nil region start event, got nil")
+               }
+               kind := region.Start.Kind()
+               if kind != wantStart {
+                       t.Errorf("wanted region start event %s, got %s", wantStart, kind)
+               }
+               if kind == tracev2.EventRegionBegin {
+                       if region.Start.Region().Type != region.Name {
+                               t.Errorf("region name mismatch: event has %s, summary has %s", region.Start.Region().Type, region.Name)
+                       }
+               } else {
+                       st := region.Start.StateTransition()
+                       if st.Resource.Kind != tracev2.ResourceGoroutine {
+                               t.Errorf("found region start event for the wrong resource: %s", st.Resource)
+                       }
+                       if st.Resource.Goroutine() != goid {
+                               t.Errorf("found region start event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource)
+                       }
+                       if old, _ := st.Goroutine(); old != tracev2.GoNotExist && old != tracev2.GoUndetermined {
+                               t.Errorf("expected transition from GoNotExist or GoUndetermined, got transition from %s instead", old)
+                       }
+               }
+       default:
+               t.Errorf("unexpected want start event type: %s", wantStart)
+       }
+
+       switch wantEnd {
+       case tracev2.EventBad:
+               if region.End != nil {
+                       t.Errorf("expected nil region end event, got\n%s", region.End.String())
+               }
+       case tracev2.EventStateTransition, tracev2.EventRegionEnd:
+               if region.End == nil {
+                       t.Error("expected non-nil region end event, got nil")
+               }
+               kind := region.End.Kind()
+               if kind != wantEnd {
+                       t.Errorf("wanted region end event %s, got %s", wantEnd, kind)
+               }
+               if kind == tracev2.EventRegionEnd {
+                       if region.End.Region().Type != region.Name {
+                               t.Errorf("region name mismatch: event has %s, summary has %s", region.End.Region().Type, region.Name)
+                       }
+               } else {
+                       st := region.End.StateTransition()
+                       if st.Resource.Kind != tracev2.ResourceGoroutine {
+                               t.Errorf("found region end event for the wrong resource: %s", st.Resource)
+                       }
+                       if st.Resource.Goroutine() != goid {
+                               t.Errorf("found region end event for the wrong resource: wanted goroutine %d, got %s", goid, st.Resource)
+                       }
+                       if _, new := st.Goroutine(); new != tracev2.GoNotExist {
+                               t.Errorf("expected transition to GoNotExist, got transition to %s instead", new)
+                       }
+               }
+       default:
+               t.Errorf("unexpected want end event type: %s", wantEnd)
+       }
+}
+
+func basicGoroutineExecStatsChecks(t *testing.T, stats *GoroutineExecStats) {
+       if stats.ExecTime < 0 {
+               t.Error("found negative ExecTime")
+       }
+       if stats.SchedWaitTime < 0 {
+               t.Error("found negative SchedWaitTime")
+       }
+       if stats.SyscallTime < 0 {
+               t.Error("found negative SyscallTime")
+       }
+       if stats.SyscallBlockTime < 0 {
+               t.Error("found negative SyscallBlockTime")
+       }
+       if stats.TotalTime < 0 {
+               t.Error("found negative TotalTime")
+       }
+       for reason, dt := range stats.BlockTimeByReason {
+               if dt < 0 {
+                       t.Errorf("found negative BlockTimeByReason for %s", reason)
+               }
+       }
+       for name, dt := range stats.RangeTime {
+               if dt < 0 {
+                       t.Errorf("found negative RangeTime for range %s", name)
+               }
+       }
+}
+
+func TestRelatedGoroutinesV2Trace(t *testing.T) {
+       testPath := "v2/testdata/tests/go122-gc-stress.test"
+       r, _, err := testtrace.ParseFile(testPath)
+       if err != nil {
+               t.Fatalf("malformed test %s: bad trace file: %v", testPath, err)
+       }
+       targetg := tracev2.GoID(86)
+       got, err := RelatedGoroutinesV2(r, targetg)
+       if err != nil {
+               t.Fatalf("failed to find related goroutines for %s: %v", testPath, err)
+       }
+       want := map[tracev2.GoID]struct{}{
+               tracev2.GoID(86):  struct{}{}, // N.B. Result includes target.
+               tracev2.GoID(85):  struct{}{},
+               tracev2.GoID(111): struct{}{},
+       }
+       for goid := range got {
+               if _, ok := want[goid]; ok {
+                       delete(want, goid)
+               } else {
+                       t.Errorf("unexpected goroutine %d found in related goroutines for %d in test %s", goid, targetg, testPath)
+               }
+       }
+       if len(want) != 0 {
+               for goid := range want {
+                       t.Errorf("failed to find related goroutine %d for goroutine %d in test %s", goid, targetg, testPath)
+               }
+       }
+}
index 33180d1204b93e59c1f053dbbdee63243f0ff3b6..2507bc4d3890d1f7a1cbb8c99106b3ecce8595f7 100644 (file)
@@ -31,11 +31,15 @@ func main() {
        // Beginning of traced execution.
        var wg sync.WaitGroup
        ctx, task := trace.NewTask(bgctx, "task0") // EvUserTaskCreate("task0")
+       trace.StartRegion(ctx, "task0 region")
+
        wg.Add(1)
        go func() {
                defer wg.Done()
                defer task.End() // EvUserTaskEnd("task0")
 
+               trace.StartRegion(ctx, "unended region")
+
                trace.WithRegion(ctx, "region0", func() {
                        // EvUserRegionBegin("region0", start)
                        trace.WithRegion(ctx, "region1", func() {
diff --git a/src/internal/trace/v2/testdata/tests/go122-annotations.test b/src/internal/trace/v2/testdata/tests/go122-annotations.test
new file mode 100644 (file)
index 0000000..ab4b923
--- /dev/null
@@ -0,0 +1,297 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=18446744073709551615 time=23991883811065 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=1331278 time=23991883808413 size=16
+ProcStart dt=297 p=2 p_seq=1
+GoStart dt=162 g=7 g_seq=1
+HeapAlloc dt=267 heapalloc_value=1908736
+EventBatch gen=1 m=1331277 time=23991883807353 size=28
+ProcStart dt=472 p=1 p_seq=1
+GoStart dt=309 g=6 g_seq=1
+HeapAlloc dt=533 heapalloc_value=1892352
+HeapAlloc dt=47 heapalloc_value=1900544
+GoBlock dt=2071 reason_string=12 stack=23
+ProcStop dt=54
+EventBatch gen=1 m=1331276 time=23991883809162 size=10
+ProcStart dt=426 p=3 p_seq=1
+HeapAlloc dt=108 heapalloc_value=1916928
+EventBatch gen=1 m=1331274 time=23991883805418 size=325
+ProcStatus dt=241 p=0 pstatus=1
+GoStatus dt=5 g=1 m=1331274 gstatus=2
+ProcsChange dt=486 procs_value=48 stack=1
+STWBegin dt=80 kind_string=21 stack=2
+HeapGoal dt=3 heapgoal_value=4194304
+ProcStatus dt=3 p=1 pstatus=2
+ProcStatus dt=2 p=2 pstatus=2
+ProcStatus dt=1 p=3 pstatus=2
+ProcStatus dt=1 p=4 pstatus=2
+ProcStatus dt=1 p=5 pstatus=2
+ProcStatus dt=1 p=6 pstatus=2
+ProcStatus dt=1 p=7 pstatus=2
+ProcStatus dt=1 p=8 pstatus=2
+ProcStatus dt=1 p=9 pstatus=2
+ProcStatus dt=1 p=10 pstatus=2
+ProcStatus dt=1 p=11 pstatus=2
+ProcStatus dt=1 p=12 pstatus=2
+ProcStatus dt=1 p=13 pstatus=2
+ProcStatus dt=1 p=14 pstatus=2
+ProcStatus dt=1 p=15 pstatus=2
+ProcStatus dt=1 p=16 pstatus=2
+ProcStatus dt=1 p=17 pstatus=2
+ProcStatus dt=1 p=18 pstatus=2
+ProcStatus dt=1 p=19 pstatus=2
+ProcStatus dt=1 p=20 pstatus=2
+ProcStatus dt=1 p=21 pstatus=2
+ProcStatus dt=1 p=22 pstatus=2
+ProcStatus dt=1 p=23 pstatus=2
+ProcStatus dt=1 p=24 pstatus=2
+ProcStatus dt=1 p=25 pstatus=2
+ProcStatus dt=1 p=26 pstatus=2
+ProcStatus dt=1 p=27 pstatus=2
+ProcStatus dt=1 p=28 pstatus=2
+ProcStatus dt=1 p=29 pstatus=2
+ProcStatus dt=1 p=30 pstatus=2
+ProcStatus dt=1 p=31 pstatus=2
+ProcStatus dt=1 p=32 pstatus=2
+ProcStatus dt=1 p=33 pstatus=2
+ProcStatus dt=1 p=34 pstatus=2
+ProcStatus dt=1 p=35 pstatus=2
+ProcStatus dt=1 p=36 pstatus=2
+ProcStatus dt=1 p=37 pstatus=2
+ProcStatus dt=1 p=38 pstatus=2
+ProcStatus dt=1 p=39 pstatus=2
+ProcStatus dt=1 p=40 pstatus=2
+ProcStatus dt=1 p=41 pstatus=2
+ProcStatus dt=1 p=42 pstatus=2
+ProcStatus dt=1 p=43 pstatus=2
+ProcStatus dt=1 p=44 pstatus=2
+ProcStatus dt=1 p=45 pstatus=2
+ProcStatus dt=1 p=46 pstatus=2
+ProcStatus dt=1 p=47 pstatus=2
+ProcsChange dt=9 procs_value=48 stack=3
+STWEnd dt=209
+GoCreate dt=376 new_g=6 new_stack=4 stack=5
+GoCreate dt=96 new_g=7 new_stack=6 stack=7
+GoCreate dt=574 new_g=8 new_stack=8 stack=9
+UserTaskBegin dt=19 task=1 parent_task=0 name_string=22 stack=10
+UserRegionBegin dt=19 task=1 name_string=23 stack=11
+HeapAlloc dt=27 heapalloc_value=1884160
+GoCreate dt=578 new_g=9 new_stack=12 stack=13
+GoBlock dt=67 reason_string=10 stack=14
+GoStart dt=13 g=9 g_seq=1
+UserRegionBegin dt=61 task=1 name_string=24 stack=15
+UserRegionBegin dt=8 task=1 name_string=25 stack=16
+UserRegionBegin dt=6 task=1 name_string=26 stack=17
+UserLog dt=780 task=1 key_string=27 value_string=28 stack=18
+UserRegionEnd dt=9 task=1 name_string=26 stack=19
+UserRegionEnd dt=504 task=1 name_string=25 stack=20
+UserTaskEnd dt=425 task=1 stack=21
+GoUnblock dt=503 g=1 g_seq=1 stack=22
+GoDestroy dt=7
+GoStart dt=13 g=1 g_seq=2
+UserRegionBegin dt=367 task=0 name_string=29 stack=24
+EventBatch gen=1 m=18446744073709551615 time=23991883811600 size=57
+GoStatus dt=358 g=2 m=18446744073709551615 gstatus=4
+GoStatus dt=3 g=3 m=18446744073709551615 gstatus=4
+GoStatus dt=2 g=4 m=18446744073709551615 gstatus=4
+GoStatus dt=1 g=5 m=18446744073709551615 gstatus=4
+EventBatch gen=1 m=18446744073709551615 time=23991883812197 size=461
+Stacks
+Stack id=8 nframes=1
+       pc=4812576 func=30 file=31 line=128
+Stack id=13 nframes=1
+       pc=4813540 func=32 file=33 line=37
+Stack id=3 nframes=4
+       pc=4421156 func=34 file=35 line=1360
+       pc=4537289 func=36 file=37 line=246
+       pc=4812325 func=38 file=31 line=125
+       pc=4813276 func=32 file=33 line=27
+Stack id=2 nframes=3
+       pc=4537037 func=36 file=37 line=229
+       pc=4812325 func=38 file=31 line=125
+       pc=4813276 func=32 file=33 line=27
+Stack id=16 nframes=1
+       pc=4813956 func=39 file=33 line=43
+Stack id=19 nframes=3
+       pc=4814128 func=40 file=33 line=45
+       pc=4811708 func=41 file=42 line=141
+       pc=4813956 func=39 file=33 line=43
+Stack id=10 nframes=1
+       pc=4813380 func=32 file=33 line=33
+Stack id=1 nframes=4
+       pc=4551915 func=43 file=44 line=248
+       pc=4537017 func=36 file=37 line=228
+       pc=4812325 func=38 file=31 line=125
+       pc=4813276 func=32 file=33 line=27
+Stack id=7 nframes=4
+       pc=4543253 func=45 file=37 line=831
+       pc=4537306 func=36 file=37 line=249
+       pc=4812325 func=38 file=31 line=125
+       pc=4813276 func=32 file=33 line=27
+Stack id=11 nframes=1
+       pc=4813412 func=32 file=33 line=34
+Stack id=4 nframes=1
+       pc=4545952 func=46 file=47 line=42
+Stack id=5 nframes=4
+       pc=4545861 func=48 file=47 line=42
+       pc=4537294 func=36 file=37 line=248
+       pc=4812325 func=38 file=31 line=125
+       pc=4813276 func=32 file=33 line=27
+Stack id=17 nframes=3
+       pc=4814128 func=40 file=33 line=45
+       pc=4811708 func=41 file=42 line=141
+       pc=4813956 func=39 file=33 line=43
+Stack id=24 nframes=1
+       pc=4813616 func=32 file=33 line=54
+Stack id=21 nframes=2
+       pc=4814276 func=49 file=42 line=80
+       pc=4813974 func=39 file=33 line=50
+Stack id=12 nframes=1
+       pc=4813728 func=39 file=33 line=37
+Stack id=14 nframes=2
+       pc=4640519 func=50 file=51 line=116
+       pc=4813550 func=32 file=33 line=51
+Stack id=9 nframes=2
+       pc=4812399 func=38 file=31 line=128
+       pc=4813276 func=32 file=33 line=27
+Stack id=22 nframes=3
+       pc=4640260 func=52 file=51 line=81
+       pc=4814374 func=53 file=51 line=87
+       pc=4813989 func=39 file=33 line=50
+Stack id=20 nframes=1
+       pc=4813956 func=39 file=33 line=43
+Stack id=6 nframes=1
+       pc=4543296 func=54 file=37 line=831
+Stack id=15 nframes=1
+       pc=4813886 func=39 file=33 line=41
+Stack id=18 nframes=5
+       pc=4814195 func=55 file=33 line=46
+       pc=4811708 func=41 file=42 line=141
+       pc=4814128 func=40 file=33 line=45
+       pc=4811708 func=41 file=42 line=141
+       pc=4813956 func=39 file=33 line=43
+Stack id=23 nframes=3
+       pc=4217457 func=56 file=57 line=442
+       pc=4543501 func=58 file=37 line=881
+       pc=4546010 func=46 file=47 line=54
+EventBatch gen=1 m=18446744073709551615 time=23991883804190 size=1620
+Strings
+String id=1
+       data="Not worker"
+String id=2
+       data="GC (dedicated)"
+String id=3
+       data="GC (fractional)"
+String id=4
+       data="GC (idle)"
+String id=5
+       data="unspecified"
+String id=6
+       data="forever"
+String id=7
+       data="network"
+String id=8
+       data="select"
+String id=9
+       data="sync.(*Cond).Wait"
+String id=10
+       data="sync"
+String id=11
+       data="chan send"
+String id=12
+       data="chan receive"
+String id=13
+       data="GC mark assist wait for work"
+String id=14
+       data="GC background sweeper wait"
+String id=15
+       data="system goroutine wait"
+String id=16
+       data="preempted"
+String id=17
+       data="wait for debug call"
+String id=18
+       data="wait until GC ends"
+String id=19
+       data="sleep"
+String id=20
+       data="runtime.GoSched"
+String id=21
+       data="start trace"
+String id=22
+       data="task0"
+String id=23
+       data="task0 region"
+String id=24
+       data="unended region"
+String id=25
+       data="region0"
+String id=26
+       data="region1"
+String id=27
+       data="key0"
+String id=28
+       data="0123456789abcdef"
+String id=29
+       data="post-existing region"
+String id=30
+       data="runtime/trace.Start.func1"
+String id=31
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/trace.go"
+String id=32
+       data="main.main"
+String id=33
+       data="/usr/local/google/home/mknyszek/work/go-1/src/internal/trace/v2/testdata/testprog/annotations.go"
+String id=34
+       data="runtime.startTheWorld"
+String id=35
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/proc.go"
+String id=36
+       data="runtime.StartTrace"
+String id=37
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2.go"
+String id=38
+       data="runtime/trace.Start"
+String id=39
+       data="main.main.func1"
+String id=40
+       data="main.main.func1.1"
+String id=41
+       data="runtime/trace.WithRegion"
+String id=42
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace/annotation.go"
+String id=43
+       data="runtime.traceLocker.Gomaxprocs"
+String id=44
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2runtime.go"
+String id=45
+       data="runtime.(*traceAdvancerState).start"
+String id=46
+       data="runtime.traceStartReadCPU.func1"
+String id=47
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/trace2cpu.go"
+String id=48
+       data="runtime.traceStartReadCPU"
+String id=49
+       data="runtime/trace.(*Task).End"
+String id=50
+       data="sync.(*WaitGroup).Wait"
+String id=51
+       data="/usr/local/google/home/mknyszek/work/go-1/src/sync/waitgroup.go"
+String id=52
+       data="sync.(*WaitGroup).Add"
+String id=53
+       data="sync.(*WaitGroup).Done"
+String id=54
+       data="runtime.(*traceAdvancerState).start.func1"
+String id=55
+       data="main.main.func1.1.1"
+String id=56
+       data="runtime.chanrecv1"
+String id=57
+       data="/usr/local/google/home/mknyszek/work/go-1/src/runtime/chan.go"
+String id=58
+       data="runtime.(*wakeableSleep).sleep"