]> Cypherpunks.ru repositories - gostls13.git/commitdiff
runtime: add execution tracer v2 behind GOEXPERIMENT=exectracer2
authorMichael Anthony Knyszek <mknyszek@google.com>
Mon, 8 May 2023 22:29:52 +0000 (22:29 +0000)
committerMichael Knyszek <mknyszek@google.com>
Fri, 10 Nov 2023 15:49:59 +0000 (15:49 +0000)
This change mostly implements the design described in #60773 and
includes a new scalable parser for the new trace format, available in
internal/trace/v2. I'll leave this commit message short because this is
clearly an enormous CL with a lot of detail.

This change does not hook up the new tracer into cmd/trace yet. A
follow-up CL will handle that.

For #60773.

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race
Change-Id: I5d2aca2cc07580ed3c76a9813ac48ec96b157de0
Reviewed-on: https://go-review.googlesource.com/c/go/+/494187
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

100 files changed:
src/cmd/compile/internal/test/inl_test.go
src/cmd/trace/annotations_test.go
src/cmd/trace/trace_unix_test.go
src/go/build/deps_test.go
src/internal/goexperiment/exp_exectracer2_off.go [new file with mode: 0644]
src/internal/goexperiment/exp_exectracer2_on.go [new file with mode: 0644]
src/internal/goexperiment/flags.go
src/internal/trace/v2/base.go [new file with mode: 0644]
src/internal/trace/v2/batch.go [new file with mode: 0644]
src/internal/trace/v2/batchcursor.go [new file with mode: 0644]
src/internal/trace/v2/batchcursor_test.go [new file with mode: 0644]
src/internal/trace/v2/event.go [new file with mode: 0644]
src/internal/trace/v2/event/event.go [new file with mode: 0644]
src/internal/trace/v2/event/go122/event.go [new file with mode: 0644]
src/internal/trace/v2/event/requirements.go [new file with mode: 0644]
src/internal/trace/v2/event_test.go [new file with mode: 0644]
src/internal/trace/v2/generation.go [new file with mode: 0644]
src/internal/trace/v2/internal/testgen/go122/trace.go [new file with mode: 0644]
src/internal/trace/v2/mkexp.bash [new file with mode: 0755]
src/internal/trace/v2/order.go [new file with mode: 0644]
src/internal/trace/v2/raw/doc.go [new file with mode: 0644]
src/internal/trace/v2/raw/event.go [new file with mode: 0644]
src/internal/trace/v2/raw/reader.go [new file with mode: 0644]
src/internal/trace/v2/raw/textreader.go [new file with mode: 0644]
src/internal/trace/v2/raw/textwriter.go [new file with mode: 0644]
src/internal/trace/v2/raw/writer.go [new file with mode: 0644]
src/internal/trace/v2/reader.go [new file with mode: 0644]
src/internal/trace/v2/reader_test.go [new file with mode: 0644]
src/internal/trace/v2/resources.go [new file with mode: 0644]
src/internal/trace/v2/testdata/README.md [new file with mode: 0644]
src/internal/trace/v2/testdata/cmd/gotraceraw/main.go [new file with mode: 0644]
src/internal/trace/v2/testdata/cmd/gotracevalidate/main.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generate.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-confuse-seq-across-generations.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-go-create-without-running-g.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-bare-m.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc-bare-m.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple-bare-m.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-sitting-in-syscall.go [new file with mode: 0644]
src/internal/trace/v2/testdata/generators/go122-task-across-generations.go [new file with mode: 0644]
src/internal/trace/v2/testdata/mktests.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/annotations-stress.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/annotations.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/cgo-callback.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/cpu-profile.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/futile-wakeup.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/gc-stress.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/gomaxprocs.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/many-start-stop.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/stacks.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/stress-start-stop.go [new file with mode: 0644]
src/internal/trace/v2/testdata/testprog/stress.go [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-confuse-seq-across-generations.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-go-create-without-running-g.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-bare-m.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc-bare-m.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple-bare-m.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-sitting-in-syscall.test [new file with mode: 0644]
src/internal/trace/v2/testdata/tests/go122-task-across-generations.test [new file with mode: 0644]
src/internal/trace/v2/testtrace/expectation.go [new file with mode: 0644]
src/internal/trace/v2/testtrace/format.go [new file with mode: 0644]
src/internal/trace/v2/testtrace/validation.go [new file with mode: 0644]
src/internal/trace/v2/trace_test.go [new file with mode: 0644]
src/internal/trace/v2/value.go [new file with mode: 0644]
src/internal/trace/v2/version/version.go [new file with mode: 0644]
src/runtime/crash_cgo_test.go
src/runtime/extern.go
src/runtime/lockrank_on.go
src/runtime/mgc.go
src/runtime/mgcmark.go
src/runtime/proc.go
src/runtime/runtime1.go
src/runtime/runtime2.go
src/runtime/sizeof_test.go
src/runtime/trace.go
src/runtime/trace/annotation_test.go
src/runtime/trace/trace_stack_test.go
src/runtime/trace/trace_test.go
src/runtime/trace2.go [new file with mode: 0644]
src/runtime/trace2buf.go [new file with mode: 0644]
src/runtime/trace2cpu.go [new file with mode: 0644]
src/runtime/trace2event.go [new file with mode: 0644]
src/runtime/trace2map.go [new file with mode: 0644]
src/runtime/trace2region.go [new file with mode: 0644]
src/runtime/trace2runtime.go [new file with mode: 0644]
src/runtime/trace2stack.go [new file with mode: 0644]
src/runtime/trace2status.go [new file with mode: 0644]
src/runtime/trace2string.go [new file with mode: 0644]
src/runtime/trace2time.go [new file with mode: 0644]
src/runtime/trace_cgo_test.go

index ea7f317ef52aac34cfe4dcc79061f797fdd6545d..6d10f6c54c572dafc2b15c45f873ace192e1e10c 100644 (file)
@@ -93,6 +93,10 @@ func TestIntendedInlining(t *testing.T) {
                        "(*puintptr).set",
                        "(*wbBuf).get1",
                        "(*wbBuf).get2",
+
+                       // Trace-related ones.
+                       "traceLocker.ok",
+                       "traceEnabled",
                },
                "runtime/internal/sys": {},
                "runtime/internal/math": {
@@ -249,6 +253,10 @@ func TestIntendedInlining(t *testing.T) {
                want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "TrailingZeros32")
                want["runtime/internal/sys"] = append(want["runtime/internal/sys"], "Bswap32")
        }
+       if runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64" || runtime.GOARCH == "loong64" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "s390x" {
+               // runtime/internal/atomic.Loaduintptr is only intrinsified on these platforms.
+               want["runtime"] = append(want["runtime"], "traceAcquire")
+       }
        if bits.UintSize == 64 {
                // mix is only defined on 64-bit architectures
                want["runtime"] = append(want["runtime"], "mix")
index ca14d3160b0c16b48384965a84e30bf6a511bfb0..36d3ec9d6dc15801f67cec4bdae1dc28f0d25a58 100644 (file)
@@ -11,6 +11,7 @@ import (
        "context"
        "flag"
        "fmt"
+       "internal/goexperiment"
        traceparser "internal/trace"
        "os"
        "reflect"
@@ -330,6 +331,9 @@ func TestAnalyzeAnnotationGC(t *testing.T) {
 // If savetraces flag is set, the captured trace will be saved in the named file.
 func traceProgram(t *testing.T, f func(), name string) error {
        t.Helper()
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because test programs are covered elsewhere for the new tracer")
+       }
        buf := new(bytes.Buffer)
        if err := trace.Start(buf); err != nil {
                return err
index f35061ec63030506be56d1de0afe9b0df3d91611..87ad86fce8b0de824f1cf297048e3c2adc3a97e4 100644 (file)
@@ -9,6 +9,7 @@ package main
 import (
        "bytes"
        "cmd/internal/traceviewer"
+       "internal/goexperiment"
        traceparser "internal/trace"
        "io"
        "runtime"
@@ -23,6 +24,9 @@ import (
 // that preexisted when the tracing started were not counted
 // as threads in syscall. See golang.org/issues/22574.
 func TestGoroutineInSyscall(t *testing.T) {
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because this test is obsolete and incompatible with the new tracer")
+       }
        // Start one goroutine blocked in syscall.
        //
        // TODO: syscall.Pipe used to cause the goroutine to
index a733441d5b63869a00da711974845eaffc0fb6d1..9291c95ac17f71d5c46c196f68ea5b6c5bd62157 100644 (file)
@@ -605,12 +605,35 @@ var depsRules = `
        syscall
        < os/exec/internal/fdtest;
 
+       FMT
+       < internal/diff, internal/txtar;
+
        FMT, container/heap, math/rand
        < internal/trace;
 
+       # v2 execution trace parser.
        FMT
-       < internal/diff, internal/txtar;
+       < internal/trace/v2/event;
+
+       internal/trace/v2/event
+       < internal/trace/v2/event/go122;
+
+       FMT, io, internal/trace/v2/event/go122
+       < internal/trace/v2/version;
+
+       FMT, encoding/binary, internal/trace/v2/version
+       < internal/trace/v2/raw;
+
+       FMT, encoding/binary, internal/trace/v2/version
+       < internal/trace/v2;
+
+       regexp, internal/trace/v2, internal/trace/v2/raw, internal/txtar
+       < internal/trace/v2/testtrace;
+
+       regexp, internal/txtar, internal/trace/v2, internal/trace/v2/raw
+       < internal/trace/v2/internal/testgen/go122;
 
+       # Coverage.
        FMT, crypto/md5, encoding/binary, regexp, sort, text/tabwriter, unsafe,
        internal/coverage, internal/coverage/uleb128
        < internal/coverage/cmerge,
diff --git a/src/internal/goexperiment/exp_exectracer2_off.go b/src/internal/goexperiment/exp_exectracer2_off.go
new file mode 100644 (file)
index 0000000..2f9c826
--- /dev/null
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.exectracer2
+// +build !goexperiment.exectracer2
+
+package goexperiment
+
+const ExecTracer2 = false
+const ExecTracer2Int = 0
diff --git a/src/internal/goexperiment/exp_exectracer2_on.go b/src/internal/goexperiment/exp_exectracer2_on.go
new file mode 100644 (file)
index 0000000..f94a292
--- /dev/null
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.exectracer2
+// +build goexperiment.exectracer2
+
+package goexperiment
+
+const ExecTracer2 = true
+const ExecTracer2Int = 1
index 12b788234383323fa67beee709c1292d56f37107..dacc4c3b1357320459ba18d72d0fddf8c483ca28 100644 (file)
@@ -123,4 +123,8 @@ type Flags struct {
        // AllocHeaders enables a different, more efficient way for the GC to
        // manage heap metadata.
        AllocHeaders bool
+
+       // ExecTracer2 controls whether to use the new execution trace
+       // implementation.
+       ExecTracer2 bool
 }
diff --git a/src/internal/trace/v2/base.go b/src/internal/trace/v2/base.go
new file mode 100644 (file)
index 0000000..e7cee29
--- /dev/null
@@ -0,0 +1,256 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains data types that all implementations of the trace format
+// parser need to provide to the rest of the package.
+
+package trace
+
+import (
+       "fmt"
+       "strings"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+       "internal/trace/v2/version"
+)
+
+// maxArgs is the maximum number of arguments for "plain" events,
+// i.e. anything that could reasonably be represented as a Base.
+const maxArgs = 5
+
+// baseEvent is the basic unprocessed event. This serves as a common
+// fundamental data structure across.
+type baseEvent struct {
+       typ  event.Type
+       time Time
+       args [maxArgs - 1]uint64
+}
+
+// extra returns a slice representing extra available space in args
+// that the parser can use to pass data up into Event.
+func (e *baseEvent) extra(v version.Version) []uint64 {
+       switch v {
+       case version.Go122:
+               return e.args[len(go122.Specs()[e.typ].Args)-1:]
+       }
+       panic(fmt.Sprintf("unsupported version: go 1.%d", v))
+}
+
+// evTable contains the per-generation data necessary to
+// interpret an individual event.
+type evTable struct {
+       freq    frequency
+       strings dataTable[stringID, string]
+       stacks  dataTable[stackID, stack]
+
+       // extraStrings are strings that get generated during
+       // parsing but haven't come directly from the trace, so
+       // they don't appear in strings.
+       extraStrings   []string
+       extraStringIDs map[string]extraStringID
+       nextExtra      extraStringID
+}
+
+// addExtraString adds an extra string to the evTable and returns
+// a unique ID for the string in the table.
+func (t *evTable) addExtraString(s string) extraStringID {
+       if s == "" {
+               return 0
+       }
+       if t.extraStringIDs == nil {
+               t.extraStringIDs = make(map[string]extraStringID)
+       }
+       if id, ok := t.extraStringIDs[s]; ok {
+               return id
+       }
+       t.nextExtra++
+       id := t.nextExtra
+       t.extraStrings = append(t.extraStrings, s)
+       t.extraStringIDs[s] = id
+       return id
+}
+
+// getExtraString returns the extra string for the provided ID.
+// The ID must have been produced by addExtraString for this evTable.
+func (t *evTable) getExtraString(id extraStringID) string {
+       if id == 0 {
+               return ""
+       }
+       return t.extraStrings[id-1]
+}
+
+// dataTable is a mapping from EIs to Es.
+type dataTable[EI ~uint64, E any] struct {
+       present []uint8
+       dense   []E
+       sparse  map[EI]E
+}
+
+// insert tries to add a mapping from id to s.
+//
+// Returns an error if a mapping for id already exists, regardless
+// of whether or not s is the same in content. This should be used
+// for validation during parsing.
+func (d *dataTable[EI, E]) insert(id EI, data E) error {
+       if d.sparse == nil {
+               d.sparse = make(map[EI]E)
+       }
+       if existing, ok := d.get(id); ok {
+               return fmt.Errorf("multiple %Ts with the same ID: id=%d, new=%v, existing=%v", data, id, data, existing)
+       }
+       d.sparse[id] = data
+       return nil
+}
+
+// compactify attempts to compact sparse into dense.
+//
+// This is intended to be called only once after insertions are done.
+func (d *dataTable[EI, E]) compactify() {
+       if d.sparse == nil || len(d.dense) != 0 {
+               // Already compactified.
+               return
+       }
+       // Find the range of IDs.
+       maxID := EI(0)
+       minID := ^EI(0)
+       for id := range d.sparse {
+               if id > maxID {
+                       maxID = id
+               }
+               if id < minID {
+                       minID = id
+               }
+       }
+       // We're willing to waste at most 2x memory.
+       if int(maxID-minID) > 2*len(d.sparse) {
+               return
+       }
+       if int(minID) > len(d.sparse) {
+               return
+       }
+       size := int(maxID) + 1
+       d.present = make([]uint8, (size+7)/8)
+       d.dense = make([]E, size)
+       for id, data := range d.sparse {
+               d.dense[id] = data
+               d.present[id/8] |= uint8(1) << (id % 8)
+       }
+       d.sparse = nil
+}
+
+// get returns the E for id or false if it doesn't
+// exist. This should be used for validation during parsing.
+func (d *dataTable[EI, E]) get(id EI) (E, bool) {
+       if id == 0 {
+               return *new(E), true
+       }
+       if int(id) < len(d.dense) {
+               if d.present[id/8]&(uint8(1)<<(id%8)) != 0 {
+                       return d.dense[id], true
+               }
+       } else if d.sparse != nil {
+               if data, ok := d.sparse[id]; ok {
+                       return data, true
+               }
+       }
+       return *new(E), false
+}
+
+// forEach iterates over all ID/value pairs in the data table.
+func (d *dataTable[EI, E]) forEach(yield func(EI, E) bool) bool {
+       for id, value := range d.dense {
+               if d.present[id/8]&(uint8(1)<<(id%8)) == 0 {
+                       continue
+               }
+               if !yield(EI(id), value) {
+                       return false
+               }
+       }
+       if d.sparse == nil {
+               return true
+       }
+       for id, value := range d.sparse {
+               if !yield(id, value) {
+                       return false
+               }
+       }
+       return true
+}
+
+// mustGet returns the E for id or panics if it fails.
+//
+// This should only be used if id has already been validated.
+func (d *dataTable[EI, E]) mustGet(id EI) E {
+       data, ok := d.get(id)
+       if !ok {
+               panic(fmt.Sprintf("expected id %d in %T table", id, data))
+       }
+       return data
+}
+
+// frequency is nanoseconds per timestamp unit.
+type frequency float64
+
+// mul multiplies an unprocessed to produce a time in nanoseconds.
+func (f frequency) mul(t timestamp) Time {
+       return Time(float64(t) * float64(f))
+}
+
+// stringID is an index into the string table for a generation.
+type stringID uint64
+
+// extraStringID is an index into the extra string table for a generation.
+type extraStringID uint64
+
+// stackID is an index into the stack table for a generation.
+type stackID uint64
+
+// cpuSample represents a CPU profiling sample captured by the trace.
+type cpuSample struct {
+       schedCtx
+       time  Time
+       stack stackID
+}
+
+// asEvent produces a complete Event from a cpuSample. It needs
+// the evTable from the generation that created it.
+//
+// We don't just store it as an Event in generation to minimize
+// the amount of pointer data floating around.
+func (s cpuSample) asEvent(table *evTable) Event {
+       // TODO(mknyszek): This is go122-specific, but shouldn't be.
+       // Generalize this in the future.
+       e := Event{
+               table: table,
+               ctx:   s.schedCtx,
+               base: baseEvent{
+                       typ:  go122.EvCPUSample,
+                       time: s.time,
+               },
+       }
+       e.base.args[0] = uint64(s.stack)
+       return e
+}
+
+// stack represents a goroutine stack sample.
+type stack struct {
+       frames []frame
+}
+
+func (s stack) String() string {
+       var sb strings.Builder
+       for _, frame := range s.frames {
+               fmt.Fprintf(&sb, "\t%#v\n", frame)
+       }
+       return sb.String()
+}
+
+// frame represents a single stack frame.
+type frame struct {
+       pc     uint64
+       funcID stringID
+       fileID stringID
+       line   uint64
+}
diff --git a/src/internal/trace/v2/batch.go b/src/internal/trace/v2/batch.go
new file mode 100644 (file)
index 0000000..899eb0f
--- /dev/null
@@ -0,0 +1,97 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "bufio"
+       "bytes"
+       "encoding/binary"
+       "fmt"
+       "io"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+)
+
+// timestamp is an unprocessed timestamp.
+type timestamp uint64
+
+// batch represents a batch of trace events.
+// It is unparsed except for its header.
+type batch struct {
+       m    ThreadID
+       time timestamp
+       data []byte
+}
+
+func (b *batch) isStringsBatch() bool {
+       return len(b.data) > 0 && event.Type(b.data[0]) == go122.EvStrings
+}
+
+func (b *batch) isStacksBatch() bool {
+       return len(b.data) > 0 && event.Type(b.data[0]) == go122.EvStacks
+}
+
+func (b *batch) isCPUSamplesBatch() bool {
+       return len(b.data) > 0 && event.Type(b.data[0]) == go122.EvCPUSamples
+}
+
+func (b *batch) isFreqBatch() bool {
+       return len(b.data) > 0 && event.Type(b.data[0]) == go122.EvFrequency
+}
+
+// readBatch reads the next full batch from r.
+func readBatch(r *bufio.Reader) (batch, uint64, error) {
+       // Read batch header byte.
+       b, err := r.ReadByte()
+       if err != nil {
+               return batch{}, 0, err
+       }
+       if typ := event.Type(b); typ != go122.EvEventBatch {
+               return batch{}, 0, fmt.Errorf("expected batch event (%s), got %s", go122.EventString(go122.EvEventBatch), go122.EventString(typ))
+       }
+
+       // Read the batch header: gen (generation), thread (M) ID, base timestamp
+       // for the batch.
+       gen, err := binary.ReadUvarint(r)
+       if err != nil {
+               return batch{}, gen, fmt.Errorf("error reading batch gen: %w", err)
+       }
+       m, err := binary.ReadUvarint(r)
+       if err != nil {
+               return batch{}, gen, fmt.Errorf("error reading batch M ID: %w", err)
+       }
+       ts, err := binary.ReadUvarint(r)
+       if err != nil {
+               return batch{}, gen, fmt.Errorf("error reading batch timestamp: %w", err)
+       }
+
+       // Read in the size of the batch to follow.
+       size, err := binary.ReadUvarint(r)
+       if err != nil {
+               return batch{}, gen, fmt.Errorf("error reading batch size: %w", err)
+       }
+       if size > go122.MaxBatchSize {
+               return batch{}, gen, fmt.Errorf("invalid batch size %d, maximum is %d", size, go122.MaxBatchSize)
+       }
+
+       // Copy out the batch for later processing.
+       var data bytes.Buffer
+       data.Grow(int(size))
+       n, err := io.CopyN(&data, r, int64(size))
+       if n != int64(size) {
+               return batch{}, gen, fmt.Errorf("failed to read full batch: read %d but wanted %d", n, size)
+       }
+       if err != nil {
+               return batch{}, gen, fmt.Errorf("copying batch data: %w", err)
+       }
+
+       // Return the batch.
+       return batch{
+               m:    ThreadID(m),
+               time: timestamp(ts),
+               data: data.Bytes(),
+       }, gen, nil
+}
diff --git a/src/internal/trace/v2/batchcursor.go b/src/internal/trace/v2/batchcursor.go
new file mode 100644 (file)
index 0000000..fe62750
--- /dev/null
@@ -0,0 +1,168 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "cmp"
+       "encoding/binary"
+       "fmt"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+)
+
+type batchCursor struct {
+       m       ThreadID
+       lastTs  Time
+       idx     int       // next index into []batch
+       dataOff int       // next index into batch.data
+       ev      baseEvent // last read event
+}
+
+func (b *batchCursor) nextEvent(batches []batch, freq frequency) (ok bool, err error) {
+       // Batches should generally always have at least one event,
+       // but let's be defensive about that and accept empty batches.
+       for b.idx < len(batches) && len(batches[b.idx].data) == b.dataOff {
+               b.idx++
+               b.dataOff = 0
+               b.lastTs = 0
+       }
+       // Have we reached the end of the batches?
+       if b.idx == len(batches) {
+               return false, nil
+       }
+       // Initialize lastTs if it hasn't been yet.
+       if b.lastTs == 0 {
+               b.lastTs = freq.mul(batches[b.idx].time)
+       }
+       // Read an event out.
+       n, tsdiff, err := readTimedBaseEvent(batches[b.idx].data[b.dataOff:], &b.ev)
+       if err != nil {
+               return false, err
+       }
+       // Complete the timestamp from the cursor's last timestamp.
+       b.ev.time = freq.mul(tsdiff) + b.lastTs
+
+       // Move the cursor's timestamp forward.
+       b.lastTs = b.ev.time
+
+       // Move the cursor forward.
+       b.dataOff += n
+       return true, nil
+}
+
+func (b *batchCursor) compare(a *batchCursor) int {
+       return cmp.Compare(b.ev.time, a.ev.time)
+}
+
+// readTimedBaseEvent reads out the raw event data from b
+// into e. It does not try to interpret the arguments
+// but it does validate that the event is a regular
+// event with a timestamp (vs. a structural event).
+//
+// It requires that the event its reading be timed, which must
+// be the case for every event in a plain EventBatch.
+func readTimedBaseEvent(b []byte, e *baseEvent) (int, timestamp, error) {
+       // Get the event type.
+       typ := event.Type(b[0])
+       specs := go122.Specs()
+       if int(typ) > len(specs) {
+               return 0, 0, fmt.Errorf("found invalid event type: %v", typ)
+       }
+       e.typ = typ
+
+       // Get spec.
+       spec := &specs[typ]
+       if len(spec.Args) == 0 || !spec.IsTimedEvent {
+               return 0, 0, fmt.Errorf("found event without a timestamp: type=%v", typ)
+       }
+       n := 1
+
+       // Read timestamp diff.
+       ts, nb := binary.Uvarint(b[n:])
+       n += nb
+
+       // Read the rest of the arguments.
+       for i := 0; i < len(spec.Args)-1; i++ {
+               arg, nb := binary.Uvarint(b[n:])
+               e.args[i] = arg
+               n += nb
+       }
+       return n, timestamp(ts), nil
+}
+
+func heapInsert(heap []*batchCursor, bc *batchCursor) []*batchCursor {
+       // Add the cursor to the end of the heap.
+       heap = append(heap, bc)
+
+       // Sift the new entry up to the right place.
+       heapSiftUp(heap, len(heap)-1)
+       return heap
+}
+
+func heapUpdate(heap []*batchCursor, i int) {
+       // Try to sift up.
+       if heapSiftUp(heap, i) != i {
+               return
+       }
+       // Try to sift down, if sifting up failed.
+       heapSiftDown(heap, i)
+}
+
+func heapRemove(heap []*batchCursor, i int) []*batchCursor {
+       // Sift index i up to the root, ignoring actual values.
+       for i > 0 {
+               heap[(i-1)/2], heap[i] = heap[i], heap[(i-1)/2]
+               i = (i - 1) / 2
+       }
+       // Swap the root with the last element, then remove it.
+       heap[0], heap[len(heap)-1] = heap[len(heap)-1], heap[0]
+       heap = heap[:len(heap)-1]
+       // Sift the root down.
+       heapSiftDown(heap, 0)
+       return heap
+}
+
+func heapSiftUp(heap []*batchCursor, i int) int {
+       for i > 0 && heap[(i-1)/2].ev.time > heap[i].ev.time {
+               heap[(i-1)/2], heap[i] = heap[i], heap[(i-1)/2]
+               i = (i - 1) / 2
+       }
+       return i
+}
+
+func heapSiftDown(heap []*batchCursor, i int) int {
+       for {
+               m := min3(heap, i, 2*i+1, 2*i+2)
+               if m == i {
+                       // Heap invariant already applies.
+                       break
+               }
+               heap[i], heap[m] = heap[m], heap[i]
+               i = m
+       }
+       return i
+}
+
+func min3(b []*batchCursor, i0, i1, i2 int) int {
+       minIdx := i0
+       minT := maxTime
+       if i0 < len(b) {
+               minT = b[i0].ev.time
+       }
+       if i1 < len(b) {
+               if t := b[i1].ev.time; t < minT {
+                       minT = t
+                       minIdx = i1
+               }
+       }
+       if i2 < len(b) {
+               if t := b[i2].ev.time; t < minT {
+                       minT = t
+                       minIdx = i2
+               }
+       }
+       return minIdx
+}
diff --git a/src/internal/trace/v2/batchcursor_test.go b/src/internal/trace/v2/batchcursor_test.go
new file mode 100644 (file)
index 0000000..69731e5
--- /dev/null
@@ -0,0 +1,126 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "fmt"
+       "strings"
+       "testing"
+
+       "slices"
+)
+
+func TestHeap(t *testing.T) {
+       var heap []*batchCursor
+
+       // Insert a bunch of values into the heap.
+       checkHeap(t, heap)
+       heap = heapInsert(heap, makeBatchCursor(5))
+       checkHeap(t, heap)
+       for i := int64(-20); i < 20; i++ {
+               heap = heapInsert(heap, makeBatchCursor(i))
+               checkHeap(t, heap)
+       }
+
+       // Update an element in the middle to be the new minimum.
+       for i := range heap {
+               if heap[i].ev.time == 5 {
+                       heap[i].ev.time = -21
+                       heapUpdate(heap, i)
+                       break
+               }
+       }
+       checkHeap(t, heap)
+       if heap[0].ev.time != -21 {
+               t.Fatalf("heap update failed, expected %d as heap min: %s", -21, heapDebugString(heap))
+       }
+
+       // Update the minimum element to be smaller. There should be no change.
+       heap[0].ev.time = -22
+       heapUpdate(heap, 0)
+       checkHeap(t, heap)
+       if heap[0].ev.time != -22 {
+               t.Fatalf("heap update failed, expected %d as heap min: %s", -22, heapDebugString(heap))
+       }
+
+       // Update the last element to be larger. There should be no change.
+       heap[len(heap)-1].ev.time = 21
+       heapUpdate(heap, len(heap)-1)
+       checkHeap(t, heap)
+       if heap[len(heap)-1].ev.time != 21 {
+               t.Fatalf("heap update failed, expected %d as heap min: %s", 21, heapDebugString(heap))
+       }
+
+       // Update the last element to be smaller.
+       heap[len(heap)-1].ev.time = 7
+       heapUpdate(heap, len(heap)-1)
+       checkHeap(t, heap)
+       if heap[len(heap)-1].ev.time == 21 {
+               t.Fatalf("heap update failed, unexpected %d as heap min: %s", 21, heapDebugString(heap))
+       }
+
+       // Remove an element in the middle.
+       for i := range heap {
+               if heap[i].ev.time == 5 {
+                       heap = heapRemove(heap, i)
+                       break
+               }
+       }
+       checkHeap(t, heap)
+       for i := range heap {
+               if heap[i].ev.time == 5 {
+                       t.Fatalf("failed to remove heap elem with time %d: %s", 5, heapDebugString(heap))
+               }
+       }
+
+       // Remove tail.
+       heap = heapRemove(heap, len(heap)-1)
+       checkHeap(t, heap)
+
+       // Remove from the head, and make sure the result is sorted.
+       l := len(heap)
+       var removed []*batchCursor
+       for i := 0; i < l; i++ {
+               removed = append(removed, heap[0])
+               heap = heapRemove(heap, 0)
+               checkHeap(t, heap)
+       }
+       if !slices.IsSortedFunc(removed, (*batchCursor).compare) {
+               t.Fatalf("heap elements not removed in sorted order, got: %s", heapDebugString(removed))
+       }
+}
+
+func makeBatchCursor(v int64) *batchCursor {
+       return &batchCursor{ev: baseEvent{time: Time(v)}}
+}
+
+func heapDebugString(heap []*batchCursor) string {
+       var sb strings.Builder
+       fmt.Fprintf(&sb, "[")
+       for i := range heap {
+               if i != 0 {
+                       fmt.Fprintf(&sb, ", ")
+               }
+               fmt.Fprintf(&sb, "%d", heap[i].ev.time)
+       }
+       fmt.Fprintf(&sb, "]")
+       return sb.String()
+}
+
+func checkHeap(t *testing.T, heap []*batchCursor) {
+       t.Helper()
+
+       for i := range heap {
+               if i == 0 {
+                       continue
+               }
+               if heap[(i-1)/2].compare(heap[i]) > 0 {
+                       t.Errorf("heap invariant not maintained between index %d and parent %d: %s", i, i/2, heapDebugString(heap))
+               }
+       }
+       if t.Failed() {
+               t.FailNow()
+       }
+}
diff --git a/src/internal/trace/v2/event.go b/src/internal/trace/v2/event.go
new file mode 100644 (file)
index 0000000..7ec4698
--- /dev/null
@@ -0,0 +1,768 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "fmt"
+       "math"
+       "strings"
+       "time"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+       "internal/trace/v2/version"
+)
+
+// EventKind indicates the kind of event this is.
+//
+// Use this information to obtain a more specific event that
+// allows access to more detailed information.
+type EventKind uint16
+
+const (
+       EventBad EventKind = iota
+
+       // EventKindSync is an event that indicates a global synchronization
+       // point in the trace. At the point of a sync event, the
+       // trace reader can be certain that all resources (e.g. threads,
+       // goroutines) that have existed until that point have been enumerated.
+       EventSync
+
+       // EventMetric is an event that represents the value of a metric at
+       // a particular point in time.
+       EventMetric
+
+       // EventLabel attaches a label to a resource.
+       EventLabel
+
+       // EventStackSample represents an execution sample, indicating what a
+       // thread/proc/goroutine was doing at a particular point in time via
+       // its backtrace.
+       //
+       // Note: Samples should be considered a close approximation of
+       // what a thread/proc/goroutine was executing at a given point in time.
+       // These events may slightly contradict the situation StateTransitions
+       // describe, so they should only be treated as a best-effort annotation.
+       EventStackSample
+
+       // EventRangeBegin and EventRangeEnd are a pair of generic events representing
+       // a special range of time. Ranges are named and scoped to some resource
+       // (identified via ResourceKind). A range that has begun but has not ended
+       // is considered active.
+       //
+       // EvRangeBegin and EvRangeEnd will share the same name, and an End will always
+       // follow a Begin on the same instance of the resource. The associated
+       // resource ID can be obtained from the Event. ResourceNone indicates the
+       // range is globally scoped. That is, any goroutine/proc/thread can start or
+       // stop, but only one such range may be active at any given time.
+       //
+       // EventRangeActive is like EventRangeBegin, but indicates that the range was
+       // already active. In this case, the resource referenced may not be in the current
+       // context.
+       EventRangeBegin
+       EventRangeActive
+       EventRangeEnd
+
+       // EvTaskBegin and EvTaskEnd are a pair of events representing a runtime/trace.Task.
+       EventTaskBegin
+       EventTaskEnd
+
+       // EventRegionBegin and EventRegionEnd are a pair of events represent a runtime/trace.Region.
+       EventRegionBegin
+       EventRegionEnd
+
+       // EventLog represents a runtime/trace.Log call.
+       EventLog
+
+       // Transitions in state for some resource.
+       EventStateTransition
+)
+
+// String returns a string form of the EventKind.
+func (e EventKind) String() string {
+       if int(e) >= len(eventKindStrings) {
+               return eventKindStrings[0]
+       }
+       return eventKindStrings[e]
+}
+
+var eventKindStrings = [...]string{
+       EventBad:             "Bad",
+       EventSync:            "Sync",
+       EventMetric:          "Metric",
+       EventLabel:           "Label",
+       EventStackSample:     "StackSample",
+       EventRangeBegin:      "RangeBegin",
+       EventRangeActive:     "RangeActive",
+       EventRangeEnd:        "RangeEnd",
+       EventTaskBegin:       "TaskBegin",
+       EventTaskEnd:         "TaskEnd",
+       EventRegionBegin:     "RegionBegin",
+       EventRegionEnd:       "RegionEnd",
+       EventLog:             "Log",
+       EventStateTransition: "StateTransition",
+}
+
+const maxTime = Time(math.MaxInt64)
+
+// Time is a timestamp in nanoseconds.
+//
+// It corresponds to the monotonic clock on the platform that the
+// trace was taken, and so is possible to correlate with timestamps
+// for other traces taken on the same machine using the same clock
+// (i.e. no reboots in between).
+//
+// The actual absolute value of the timestamp is only meaningful in
+// relation to other timestamps from the same clock.
+//
+// BUG: Timestamps coming from traces on Windows platforms are
+// only comparable with timestamps from the same trace. Timestamps
+// across traces cannot be compared, because the system clock is
+// not used as of Go 1.22.
+//
+// BUG: Traces produced by Go versions 1.21 and earlier cannot be
+// compared with timestamps from other traces taken on the same
+// machine. This is because the system clock was not used at all
+// to collect those timestamps.
+type Time int64
+
+// Sub subtracts t0 from t, returning the duration in nanoseconds.
+func (t Time) Sub(t0 Time) time.Duration {
+       return time.Duration(int64(t) - int64(t0))
+}
+
+// Metric provides details about a Metric event.
+type Metric struct {
+       // Name is the name of the sampled metric.
+       //
+       // Names follow the same convention as metric names in the
+       // runtime/metrics package, meaning they include the unit.
+       // Names that match with the runtime/metrics package represent
+       // the same quantity. Note that this corresponds to the
+       // runtime/metrics package for the Go version this trace was
+       // collected for.
+       Name string
+
+       // Value is the sampled value of the metric.
+       //
+       // The Value's Kind is tied to the name of the metric, and so is
+       // guaranteed to be the same for metric samples for the same metric.
+       Value Value
+}
+
+// Label provides details about a Label event.
+type Label struct {
+       // Label is the label applied to some resource.
+       Label string
+
+       // Resource is the resource to which this label should be applied.
+       Resource ResourceID
+}
+
+// Range provides details about a Range event.
+type Range struct {
+       // Name is a human-readable name for the range.
+       //
+       // This name can be used to identify the end of the range for the resource
+       // its scoped to, because only one of each type of range may be active on
+       // a particular resource. The relevant resource should be obtained from the
+       // Event that produced these details. The corresponding RangeEnd will have
+       // an identical name.
+       Name string
+
+       // Scope is the resource that the range is scoped to.
+       //
+       // For example, a ResourceGoroutine scope means that the same goroutine
+       // must have a start and end for the range, and that goroutine can only
+       // have one range of a particular name active at any given time. The
+       // ID that this range is scoped to may be obtained via Event.Goroutine.
+       //
+       // The ResourceNone scope means that the range is globally scoped. As a
+       // result, any goroutine/proc/thread may start or end the range, and only
+       // one such named range may be active globally at any given time.
+       //
+       // For RangeBegin and RangeEnd events, this will always reference some
+       // resource ID in the current execution context. For RangeActive events,
+       // this may reference a resource not in the current context. Prefer Scope
+       // over the current execution context.
+       Scope ResourceID
+}
+
+// RangeAttributes provides attributes about a completed Range.
+type RangeAttribute struct {
+       // Name is the human-readable name for the range.
+       Name string
+
+       // Value is the value of the attribute.
+       Value Value
+}
+
+// TaskID is the internal ID of a task used to disambiguate tasks (even if they
+// are of the same type).
+type TaskID uint64
+
+// NoTask indicates the lack of a task.
+const NoTask = TaskID(0)
+
+// Task provides details about a Task event.
+type Task struct {
+       // ID is a unique identifier for the task.
+       //
+       // This can be used to associate the beginning of a task with its end.
+       ID TaskID
+
+       // ParentID is the ID of the parent task.
+       Parent TaskID
+
+       // Type is the taskType that was passed to runtime/trace.NewTask.
+       //
+       // May be "" if a task's TaskBegin event isn't present in the trace.
+       Type string
+}
+
+// Region provides details about a Region event.
+type Region struct {
+       // Task is the ID of the task this region is associated with.
+       Task TaskID
+
+       // Type is the regionType that was passed to runtime/trace.StartRegion or runtime/trace.WithRegion.
+       Type string
+}
+
+// Log provides details about a Log event.
+type Log struct {
+       // Task is the ID of the task this region is associated with.
+       Task TaskID
+
+       // Category is the category that was passed to runtime/trace.Log or runtime/trace.Logf.
+       Category string
+
+       // Message is the message that was passed to runtime/trace.Log or runtime/trace.Logf.
+       Message string
+}
+
+// Stack represents a stack. It's really a handle to a stack and it's trivially comparable.
+//
+// If two Stacks are equal then their Frames are guaranteed to be identical. If they are not
+// equal, however, their Frames may still be equal.
+type Stack struct {
+       table *evTable
+       id    stackID
+}
+
+// Frames is an iterator over the frames in a Stack.
+func (s Stack) Frames(yield func(f StackFrame) bool) bool {
+       if s.id == 0 {
+               return true
+       }
+       stk := s.table.stacks.mustGet(s.id)
+       for _, f := range stk.frames {
+               sf := StackFrame{
+                       PC:   f.pc,
+                       Func: s.table.strings.mustGet(f.funcID),
+                       File: s.table.strings.mustGet(f.fileID),
+                       Line: f.line,
+               }
+               if !yield(sf) {
+                       return false
+               }
+       }
+       return true
+}
+
+// NoStack is a sentinel value that can be compared against any Stack value, indicating
+// a lack of a stack trace.
+var NoStack = Stack{}
+
+// StackFrame represents a single frame of a stack.
+type StackFrame struct {
+       // PC is the program counter of the function call if this
+       // is not a leaf frame. If it's a leaf frame, it's the point
+       // at which the stack trace was taken.
+       PC uint64
+
+       // Func is the name of the function this frame maps to.
+       Func string
+
+       // File is the file which contains the source code of Func.
+       File string
+
+       // Line is the line number within File which maps to PC.
+       Line uint64
+}
+
+// Event represents a single event in the trace.
+type Event struct {
+       table *evTable
+       ctx   schedCtx
+       base  baseEvent
+}
+
+// Kind returns the kind of event that this is.
+func (e Event) Kind() EventKind {
+       return go122Type2Kind[e.base.typ]
+}
+
+// Time returns the timestamp of the event.
+func (e Event) Time() Time {
+       return e.base.time
+}
+
+// Goroutine returns the ID of the goroutine that was executing when
+// this event happened. It describes part of the execution context
+// for this event.
+//
+// Note that for goroutine state transitions this always refers to the
+// state before the transition. For example, if a goroutine is just
+// starting to run on this thread and/or proc, then this will return
+// NoGoroutine. In this case, the goroutine starting to run will be
+// can be found at Event.StateTransition().Resource.
+func (e Event) Goroutine() GoID {
+       return e.ctx.G
+}
+
+// Proc returns the ID of the proc this event event pertains to.
+//
+// Note that for proc state transitions this always refers to the
+// state before the transition. For example, if a proc is just
+// starting to run on this thread, then this will return NoProc.
+func (e Event) Proc() ProcID {
+       return e.ctx.P
+}
+
+// Thread returns the ID of the thread this event pertains to.
+//
+// Note that for thread state transitions this always refers to the
+// state before the transition. For example, if a thread is just
+// starting to run, then this will return NoThread.
+//
+// Note: tracking thread state is not currently supported, so this
+// will always return a valid thread ID. However thread state transitions
+// may be tracked in the future, and callers must be robust to this
+// possibility.
+func (e Event) Thread() ThreadID {
+       return e.ctx.M
+}
+
+// Stack returns a handle to a stack associated with the event.
+//
+// This represents a stack trace at the current moment in time for
+// the current execution context.
+func (e Event) Stack() Stack {
+       if e.base.typ == evSync {
+               return NoStack
+       }
+       if e.base.typ == go122.EvCPUSample {
+               return Stack{table: e.table, id: stackID(e.base.args[0])}
+       }
+       spec := go122.Specs()[e.base.typ]
+       if len(spec.StackIDs) == 0 {
+               return NoStack
+       }
+       // The stack for the main execution context is always the
+       // first stack listed in StackIDs. Subtract one from this
+       // because we've peeled away the timestamp argument.
+       id := stackID(e.base.args[spec.StackIDs[0]-1])
+       if id == 0 {
+               return NoStack
+       }
+       return Stack{table: e.table, id: id}
+}
+
+// Metric returns details about a Metric event.
+//
+// Panics if Kind != EventMetric.
+func (e Event) Metric() Metric {
+       if e.Kind() != EventMetric {
+               panic("Metric called on non-Metric event")
+       }
+       var m Metric
+       switch e.base.typ {
+       case go122.EvProcsChange:
+               m.Name = "/sched/gomaxprocs:threads"
+               m.Value = Value{kind: ValueUint64, scalar: e.base.args[0]}
+       case go122.EvHeapAlloc:
+               m.Name = "/memory/classes/heap/objects:bytes"
+               m.Value = Value{kind: ValueUint64, scalar: e.base.args[0]}
+       case go122.EvHeapGoal:
+               m.Name = "/gc/heap/goal:bytes"
+               m.Value = Value{kind: ValueUint64, scalar: e.base.args[0]}
+       default:
+               panic(fmt.Sprintf("internal error: unexpected event type for Metric kind: %s", go122.EventString(e.base.typ)))
+       }
+       return m
+}
+
+// Label returns details about a Label event.
+//
+// Panics if Kind != EventLabel.
+func (e Event) Label() Label {
+       if e.Kind() != EventLabel {
+               panic("Label called on non-Label event")
+       }
+       if e.base.typ != go122.EvGoLabel {
+               panic(fmt.Sprintf("internal error: unexpected event type for Label kind: %s", go122.EventString(e.base.typ)))
+       }
+       return Label{
+               Label:    e.table.strings.mustGet(stringID(e.base.args[0])),
+               Resource: ResourceID{Kind: ResourceGoroutine, id: int64(e.ctx.G)},
+       }
+}
+
+// Range returns details about an EventRangeBegin, EventRangeActive, or EventRangeEnd event.
+//
+// Panics if Kind != EventRangeBegin, Kind != EventRangeActive, and Kind != EventRangeEnd.
+func (e Event) Range() Range {
+       if kind := e.Kind(); kind != EventRangeBegin && kind != EventRangeActive && kind != EventRangeEnd {
+               panic("Range called on non-Range event")
+       }
+       var r Range
+       switch e.base.typ {
+       case go122.EvSTWBegin, go122.EvSTWEnd:
+               // N.B. ordering.advance smuggles in the STW reason as e.base.args[0]
+               // for go122.EvSTWEnd (it's already there for Begin).
+               r.Name = "stop-the-world (" + e.table.strings.mustGet(stringID(e.base.args[0])) + ")"
+               r.Scope = ResourceID{Kind: ResourceGoroutine, id: int64(e.Goroutine())}
+       case go122.EvGCBegin, go122.EvGCActive, go122.EvGCEnd:
+               r.Name = "GC concurrent mark phase"
+               r.Scope = ResourceID{Kind: ResourceNone}
+       case go122.EvGCSweepBegin, go122.EvGCSweepActive, go122.EvGCSweepEnd:
+               r.Name = "GC incremental sweep"
+               r.Scope = ResourceID{Kind: ResourceProc}
+               if e.base.typ == go122.EvGCSweepActive {
+                       r.Scope.id = int64(e.base.args[0])
+               } else {
+                       r.Scope.id = int64(e.Proc())
+               }
+               r.Scope.id = int64(e.Proc())
+       case go122.EvGCMarkAssistBegin, go122.EvGCMarkAssistActive, go122.EvGCMarkAssistEnd:
+               r.Name = "GC mark assist"
+               r.Scope = ResourceID{Kind: ResourceGoroutine}
+               if e.base.typ == go122.EvGCMarkAssistActive {
+                       r.Scope.id = int64(e.base.args[0])
+               } else {
+                       r.Scope.id = int64(e.Goroutine())
+               }
+       default:
+               panic(fmt.Sprintf("internal error: unexpected event type for Range kind: %s", go122.EventString(e.base.typ)))
+       }
+       return r
+}
+
+// RangeAttributes returns attributes for a completed range.
+//
+// Panics if Kind != EventRangeEnd.
+func (e Event) RangeAttributes() []RangeAttribute {
+       if e.Kind() != EventRangeEnd {
+               panic("Range called on non-Range event")
+       }
+       if e.base.typ != go122.EvGCSweepEnd {
+               return nil
+       }
+       return []RangeAttribute{
+               {
+                       Name:  "bytes swept",
+                       Value: Value{kind: ValueUint64, scalar: e.base.args[0]},
+               },
+               {
+                       Name:  "bytes reclaimed",
+                       Value: Value{kind: ValueUint64, scalar: e.base.args[1]},
+               },
+       }
+}
+
+// Task returns details about a TaskBegin or TaskEnd event.
+//
+// Panics if Kind != EventTaskBegin and Kind != EventTaskEnd.
+func (e Event) Task() Task {
+       if kind := e.Kind(); kind != EventTaskBegin && kind != EventTaskEnd {
+               panic("Task called on non-Task event")
+       }
+       parentID := NoTask
+       var typ string
+       switch e.base.typ {
+       case go122.EvUserTaskBegin:
+               parentID = TaskID(e.base.args[1])
+               typ = e.table.strings.mustGet(stringID(e.base.args[2]))
+       case go122.EvUserTaskEnd:
+               parentID = TaskID(e.base.extra(version.Go122)[0])
+               typ = e.table.getExtraString(extraStringID(e.base.extra(version.Go122)[1]))
+       default:
+               panic(fmt.Sprintf("internal error: unexpected event type for Task kind: %s", go122.EventString(e.base.typ)))
+       }
+       return Task{
+               ID:     TaskID(e.base.args[0]),
+               Parent: parentID,
+               Type:   typ,
+       }
+}
+
+// Region returns details about a RegionBegin or RegionEnd event.
+//
+// Panics if Kind != EventRegionBegin and Kind != EventRegionEnd.
+func (e Event) Region() Region {
+       if kind := e.Kind(); kind != EventRegionBegin && kind != EventRegionEnd {
+               panic("Region called on non-Region event")
+       }
+       if e.base.typ != go122.EvUserRegionBegin && e.base.typ != go122.EvUserRegionEnd {
+               panic(fmt.Sprintf("internal error: unexpected event type for Region kind: %s", go122.EventString(e.base.typ)))
+       }
+       return Region{
+               Task: TaskID(e.base.args[0]),
+               Type: e.table.strings.mustGet(stringID(e.base.args[1])),
+       }
+}
+
+// Log returns details about a Log event.
+//
+// Panics if Kind != EventLog.
+func (e Event) Log() Log {
+       if e.Kind() != EventLog {
+               panic("Log called on non-Log event")
+       }
+       if e.base.typ != go122.EvUserLog {
+               panic(fmt.Sprintf("internal error: unexpected event type for Log kind: %s", go122.EventString(e.base.typ)))
+       }
+       return Log{
+               Task:     TaskID(e.base.args[0]),
+               Category: e.table.strings.mustGet(stringID(e.base.args[1])),
+               Message:  e.table.strings.mustGet(stringID(e.base.args[2])),
+       }
+}
+
+// StateTransition returns details about a StateTransition event.
+//
+// Panics if Kind != EventStateTransition.
+func (e Event) StateTransition() StateTransition {
+       if e.Kind() != EventStateTransition {
+               panic("StateTransition called on non-StateTransition event")
+       }
+       var s StateTransition
+       switch e.base.typ {
+       case go122.EvProcStart:
+               s = procStateTransition(ProcID(e.base.args[0]), ProcIdle, ProcRunning)
+       case go122.EvProcStop:
+               s = procStateTransition(e.ctx.P, ProcRunning, ProcIdle)
+       case go122.EvProcSteal:
+               // N.B. ordering.advance populates e.base.extra.
+               beforeState := ProcRunning
+               if go122.ProcStatus(e.base.extra(version.Go122)[0]) == go122.ProcSyscallAbandoned {
+                       // We've lost information because this ProcSteal advanced on a
+                       // SyscallAbandoned state. Treat the P as idle because ProcStatus
+                       // treats SyscallAbandoned as Idle. Otherwise we'll have an invalid
+                       // transition.
+                       beforeState = ProcIdle
+               }
+               s = procStateTransition(ProcID(e.base.args[0]), beforeState, ProcIdle)
+       case go122.EvProcStatus:
+               // N.B. ordering.advance populates e.base.extra.
+               s = procStateTransition(ProcID(e.base.args[0]), ProcState(e.base.extra(version.Go122)[0]), go122ProcStatus2ProcState[e.base.args[1]])
+       case go122.EvGoCreate:
+               s = goStateTransition(GoID(e.base.args[0]), GoNotExist, GoRunnable)
+               s.Stack = Stack{table: e.table, id: stackID(e.base.args[1])}
+       case go122.EvGoCreateSyscall:
+               s = goStateTransition(GoID(e.base.args[0]), GoNotExist, GoSyscall)
+       case go122.EvGoStart:
+               s = goStateTransition(GoID(e.base.args[0]), GoRunnable, GoRunning)
+       case go122.EvGoDestroy:
+               s = goStateTransition(e.ctx.G, GoRunning, GoNotExist)
+       case go122.EvGoDestroySyscall:
+               s = goStateTransition(e.ctx.G, GoSyscall, GoNotExist)
+       case go122.EvGoStop:
+               s = goStateTransition(e.ctx.G, GoRunning, GoRunnable)
+               s.Reason = e.table.strings.mustGet(stringID(e.base.args[0]))
+       case go122.EvGoBlock:
+               s = goStateTransition(e.ctx.G, GoRunning, GoWaiting)
+               s.Reason = e.table.strings.mustGet(stringID(e.base.args[0]))
+       case go122.EvGoUnblock:
+               s = goStateTransition(GoID(e.base.args[0]), GoWaiting, GoRunnable)
+       case go122.EvGoSyscallBegin:
+               s = goStateTransition(e.ctx.G, GoRunning, GoSyscall)
+       case go122.EvGoSyscallEnd:
+               s = goStateTransition(e.ctx.G, GoSyscall, GoRunning)
+       case go122.EvGoSyscallEndBlocked:
+               s = goStateTransition(e.ctx.G, GoSyscall, GoRunnable)
+       case go122.EvGoStatus:
+               // N.B. ordering.advance populates e.base.extra.
+               s = goStateTransition(GoID(e.base.args[0]), GoState(e.base.extra(version.Go122)[0]), go122GoStatus2GoState[e.base.args[2]])
+       default:
+               panic(fmt.Sprintf("internal error: unexpected event type for StateTransition kind: %s", go122.EventString(e.base.typ)))
+       }
+       return s
+}
+
+const evSync = ^event.Type(0)
+
+var go122Type2Kind = [...]EventKind{
+       go122.EvCPUSample:           EventStackSample,
+       go122.EvProcsChange:         EventMetric,
+       go122.EvProcStart:           EventStateTransition,
+       go122.EvProcStop:            EventStateTransition,
+       go122.EvProcSteal:           EventStateTransition,
+       go122.EvProcStatus:          EventStateTransition,
+       go122.EvGoCreate:            EventStateTransition,
+       go122.EvGoCreateSyscall:     EventStateTransition,
+       go122.EvGoStart:             EventStateTransition,
+       go122.EvGoDestroy:           EventStateTransition,
+       go122.EvGoDestroySyscall:    EventStateTransition,
+       go122.EvGoStop:              EventStateTransition,
+       go122.EvGoBlock:             EventStateTransition,
+       go122.EvGoUnblock:           EventStateTransition,
+       go122.EvGoSyscallBegin:      EventStateTransition,
+       go122.EvGoSyscallEnd:        EventStateTransition,
+       go122.EvGoSyscallEndBlocked: EventStateTransition,
+       go122.EvGoStatus:            EventStateTransition,
+       go122.EvSTWBegin:            EventRangeBegin,
+       go122.EvSTWEnd:              EventRangeEnd,
+       go122.EvGCActive:            EventRangeActive,
+       go122.EvGCBegin:             EventRangeBegin,
+       go122.EvGCEnd:               EventRangeEnd,
+       go122.EvGCSweepActive:       EventRangeActive,
+       go122.EvGCSweepBegin:        EventRangeBegin,
+       go122.EvGCSweepEnd:          EventRangeEnd,
+       go122.EvGCMarkAssistActive:  EventRangeActive,
+       go122.EvGCMarkAssistBegin:   EventRangeBegin,
+       go122.EvGCMarkAssistEnd:     EventRangeEnd,
+       go122.EvHeapAlloc:           EventMetric,
+       go122.EvHeapGoal:            EventMetric,
+       go122.EvGoLabel:             EventLabel,
+       go122.EvUserTaskBegin:       EventTaskBegin,
+       go122.EvUserTaskEnd:         EventTaskEnd,
+       go122.EvUserRegionBegin:     EventRegionBegin,
+       go122.EvUserRegionEnd:       EventRegionEnd,
+       go122.EvUserLog:             EventLog,
+       evSync:                      EventSync,
+}
+
+var go122GoStatus2GoState = [...]GoState{
+       go122.GoRunnable: GoRunnable,
+       go122.GoRunning:  GoRunning,
+       go122.GoWaiting:  GoWaiting,
+       go122.GoSyscall:  GoSyscall,
+}
+
+var go122ProcStatus2ProcState = [...]ProcState{
+       go122.ProcRunning:          ProcRunning,
+       go122.ProcIdle:             ProcIdle,
+       go122.ProcSyscall:          ProcRunning,
+       go122.ProcSyscallAbandoned: ProcIdle,
+}
+
+// String returns the event as a human-readable string.
+//
+// The format of the string is intended for debugging and is subject to change.
+func (e Event) String() string {
+       var sb strings.Builder
+       fmt.Fprintf(&sb, "M=%d P=%d G=%d", e.Thread(), e.Proc(), e.Goroutine())
+       fmt.Fprintf(&sb, " %s Time=%d", e.Kind(), e.Time())
+       // Kind-specific fields.
+       switch kind := e.Kind(); kind {
+       case EventMetric:
+               m := e.Metric()
+               fmt.Fprintf(&sb, " Name=%q Value=%s", m.Name, valueAsString(m.Value))
+       case EventLabel:
+               l := e.Label()
+               fmt.Fprintf(&sb, " Label=%q Resource=%s", l.Label, l.Resource)
+       case EventRangeBegin, EventRangeActive, EventRangeEnd:
+               r := e.Range()
+               fmt.Fprintf(&sb, " Name=%q Scope=%s", r.Name, r.Scope)
+               if kind == EventRangeEnd {
+                       fmt.Fprintf(&sb, " Attributes=[")
+                       for i, attr := range e.RangeAttributes() {
+                               if i != 0 {
+                                       fmt.Fprintf(&sb, " ")
+                               }
+                               fmt.Fprintf(&sb, "%q=%s", attr.Name, valueAsString(attr.Value))
+                       }
+                       fmt.Fprintf(&sb, "]")
+               }
+       case EventTaskBegin, EventTaskEnd:
+               t := e.Task()
+               fmt.Fprintf(&sb, " ID=%d Parent=%d Type=%q", t.ID, t.Parent, t.Type)
+       case EventRegionBegin, EventRegionEnd:
+               r := e.Region()
+               fmt.Fprintf(&sb, " Task=%d Type=%q", r.Task, r.Type)
+       case EventLog:
+               l := e.Log()
+               fmt.Fprintf(&sb, " Task=%d Category=%q Message=%q", l.Task, l.Category, l.Message)
+       case EventStateTransition:
+               s := e.StateTransition()
+               fmt.Fprintf(&sb, " Resource=%s Reason=%q", s.Resource, s.Reason)
+               switch s.Resource.Kind {
+               case ResourceGoroutine:
+                       id := s.Resource.Goroutine()
+                       old, new := s.Goroutine()
+                       fmt.Fprintf(&sb, " GoID=%d %s->%s", id, old, new)
+               case ResourceProc:
+                       id := s.Resource.Proc()
+                       old, new := s.Proc()
+                       fmt.Fprintf(&sb, " ProcID=%d %s->%s", id, old, new)
+               }
+               if s.Stack != NoStack {
+                       fmt.Fprintln(&sb)
+                       fmt.Fprintln(&sb, "TransitionStack=")
+                       s.Stack.Frames(func(f StackFrame) bool {
+                               fmt.Fprintf(&sb, "\t%s @ 0x%x\n", f.Func, f.PC)
+                               fmt.Fprintf(&sb, "\t\t%s:%d\n", f.File, f.Line)
+                               return true
+                       })
+               }
+       }
+       if stk := e.Stack(); stk != NoStack {
+               fmt.Fprintln(&sb)
+               fmt.Fprintln(&sb, "Stack=")
+               stk.Frames(func(f StackFrame) bool {
+                       fmt.Fprintf(&sb, "\t%s @ 0x%x\n", f.Func, f.PC)
+                       fmt.Fprintf(&sb, "\t\t%s:%d\n", f.File, f.Line)
+                       return true
+               })
+       }
+       return sb.String()
+}
+
+// validateTableIDs checks to make sure lookups in e.table
+// will work.
+func (e Event) validateTableIDs() error {
+       if e.base.typ == evSync {
+               return nil
+       }
+       spec := go122.Specs()[e.base.typ]
+
+       // Check stacks.
+       for _, i := range spec.StackIDs {
+               id := stackID(e.base.args[i-1])
+               _, ok := e.table.stacks.get(id)
+               if !ok {
+                       return fmt.Errorf("found invalid stack ID %d for event %s", id, spec.Name)
+               }
+       }
+       // N.B. Strings referenced by stack frames are validated
+       // early on, when reading the stacks in to begin with.
+
+       // Check strings.
+       for _, i := range spec.StringIDs {
+               id := stringID(e.base.args[i-1])
+               _, ok := e.table.strings.get(id)
+               if !ok {
+                       return fmt.Errorf("found invalid string ID %d for event %s", id, spec.Name)
+               }
+       }
+       return nil
+}
+
+func syncEvent(table *evTable, ts Time) Event {
+       return Event{
+               table: table,
+               ctx: schedCtx{
+                       G: NoGoroutine,
+                       P: NoProc,
+                       M: NoThread,
+               },
+               base: baseEvent{
+                       typ:  evSync,
+                       time: ts,
+               },
+       }
+}
diff --git a/src/internal/trace/v2/event/event.go b/src/internal/trace/v2/event/event.go
new file mode 100644 (file)
index 0000000..111dde6
--- /dev/null
@@ -0,0 +1,89 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package event
+
+// Type is the common in-memory representation of the low-leve
+type Type uint8
+
+// Spec is a specification for a trace event. It contains sufficient information
+// to perform basic parsing of any trace event for any version of Go.
+type Spec struct {
+       // Name is the human-readable name of the trace event.
+       Name string
+
+       // Args contains the names of each trace event's argument.
+       // Its length determines the number of arguments an event has.
+       //
+       // Argument names follow a certain structure and this structure
+       // is relied on by the testing framework to type-check arguments.
+       // The structure is is:
+       //
+       //     (?P<name>[A-Za-z]+_)?(?P<type>[A-Za-z]+)
+       //
+       // In sum, it's an optional name followed by a type. If the name
+       // is present, it is separated from the type with an underscore.
+       // The valid argument types and the Go types they map to are listed
+       // in the ArgTypes variable.
+       Args []string
+
+       // StartEv indicates the event type of the corresponding "start"
+       // event, if this event is an "end," for a pair of events that
+       // represent a time range.
+       StartEv Type
+
+       // IsTimedEvent indicates whether this is an event that both
+       // appears in the main event stream and is surfaced to the
+       // trace reader.
+       //
+       // Events that are not "timed" are considered "structural"
+       // since they either need significant reinterpretation or
+       // otherwise aren't actually surfaced by the trace reader.
+       IsTimedEvent bool
+
+       // HasData is true if the event has trailer consisting of a
+       // varint length followed by unencoded bytes of some data.
+       HasData bool
+
+       // StringIDs indicates which of the arguments are string IDs.
+       StringIDs []int
+
+       // StackIDs indicates which of the arguments are stack IDs.
+       //
+       // The list is not sorted. The first index always refers to
+       // the main stack for the current execution context of the event.
+       StackIDs []int
+
+       // IsStack indicates that the event represents a complete
+       // stack trace. Specifically, it means that after the arguments
+       // there's a varint length, followed by 4*length varints. Each
+       // group of 4 represents the PC, file ID, func ID, and line number
+       // in that order.
+       IsStack bool
+}
+
+// ArgTypes is a list of valid argument types for use in Args.
+//
+// See the documentation of Args for more details.
+var ArgTypes = [...]string{
+       "seq",     // sequence number
+       "pstatus", // P status
+       "gstatus", // G status
+       "g",       // trace.GoID
+       "m",       // trace.ThreadID
+       "p",       // trace.ProcID
+       "string",  // string ID
+       "stack",   // stack ID
+       "value",   // uint64
+       "task",    // trace.TaskID
+}
+
+// Names is a helper that produces a mapping of event names to event types.
+func Names(specs []Spec) map[string]Type {
+       nameToType := make(map[string]Type)
+       for i, spec := range specs {
+               nameToType[spec.Name] = Type(byte(i))
+       }
+       return nameToType
+}
diff --git a/src/internal/trace/v2/event/go122/event.go b/src/internal/trace/v2/event/go122/event.go
new file mode 100644 (file)
index 0000000..8a106c8
--- /dev/null
@@ -0,0 +1,388 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package go122
+
+import (
+       "fmt"
+       "internal/trace/v2/event"
+)
+
+const (
+       EvNone event.Type = iota // unused
+
+       // Structural events.
+       EvEventBatch // start of per-M batch of events [generation, M ID, timestamp, batch length]
+       EvStacks     // start of a section of the stack table [...EvStack]
+       EvStack      // stack table entry [ID, ...{PC, func string ID, file string ID, line #}]
+       EvStrings    // start of a section of the string dictionary [...EvString]
+       EvString     // string dictionary entry [ID, length, string]
+       EvCPUSamples // start of a section of CPU samples [...EvCPUSample]
+       EvCPUSample  // CPU profiling sample [timestamp, M ID, P ID, goroutine ID, stack ID]
+       EvFrequency  // timestamp units per sec [freq]
+
+       // Procs.
+       EvProcsChange // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack ID]
+       EvProcStart   // start of P [timestamp, P ID, P seq]
+       EvProcStop    // stop of P [timestamp]
+       EvProcSteal   // P was stolen [timestamp, P ID, P seq, M ID]
+       EvProcStatus  // P status at the start of a generation [timestamp, P ID, status]
+
+       // Goroutines.
+       EvGoCreate            // goroutine creation [timestamp, new goroutine ID, new stack ID, stack ID]
+       EvGoCreateSyscall     // goroutine appears in syscall (cgo callback) [timestamp, new goroutine ID]
+       EvGoStart             // goroutine starts running [timestamp, goroutine ID, goroutine seq]
+       EvGoDestroy           // goroutine ends [timestamp]
+       EvGoDestroySyscall    // goroutine ends in syscall (cgo callback) [timestamp]
+       EvGoStop              // goroutine yields its time, but is runnable [timestamp, reason, stack ID]
+       EvGoBlock             // goroutine blocks [timestamp, reason, stack ID]
+       EvGoUnblock           // goroutine is unblocked [timestamp, goroutine ID, goroutine seq, stack ID]
+       EvGoSyscallBegin      // syscall enter [timestamp, stack ID]
+       EvGoSyscallEnd        // syscall exit [timestamp]
+       EvGoSyscallEndBlocked // syscall exit and it blocked at some point [timestamp]
+       EvGoStatus            // goroutine status at the start of a generation [timestamp, goroutine ID, status]
+
+       // STW.
+       EvSTWBegin // STW start [timestamp, kind]
+       EvSTWEnd   // STW done [timestamp]
+
+       // GC events.
+       EvGCActive           // GC active [timestamp, seq]
+       EvGCBegin            // GC start [timestamp, seq, stack ID]
+       EvGCEnd              // GC done [timestamp, seq]
+       EvGCSweepActive      // GC sweep active [timestamp, P ID]
+       EvGCSweepBegin       // GC sweep start [timestamp, stack ID]
+       EvGCSweepEnd         // GC sweep done [timestamp, swept bytes, reclaimed bytes]
+       EvGCMarkAssistActive // GC mark assist active [timestamp, goroutine ID]
+       EvGCMarkAssistBegin  // GC mark assist start [timestamp, stack ID]
+       EvGCMarkAssistEnd    // GC mark assist done [timestamp]
+       EvHeapAlloc          // gcController.heapLive change [timestamp, heap alloc in bytes]
+       EvHeapGoal           // gcController.heapGoal() change [timestamp, heap goal in bytes]
+
+       // Annotations.
+       EvGoLabel         // apply string label to current running goroutine [timestamp, label string ID]
+       EvUserTaskBegin   // trace.NewTask [timestamp, internal task ID, internal parent task ID, name string ID, stack ID]
+       EvUserTaskEnd     // end of a task [timestamp, internal task ID, stack ID]
+       EvUserRegionBegin // trace.{Start,With}Region [timestamp, internal task ID, name string ID, stack ID]
+       EvUserRegionEnd   // trace.{End,With}Region [timestamp, internal task ID, name string ID, stack ID]
+       EvUserLog         // trace.Log [timestamp, internal task ID, key string ID, stack, value string ID]
+)
+
+// EventString returns the name of a Go 1.22 event.
+func EventString(typ event.Type) string {
+       if int(typ) < len(specs) {
+               return specs[typ].Name
+       }
+       return fmt.Sprintf("Invalid(%d)", typ)
+}
+
+func Specs() []event.Spec {
+       return specs[:]
+}
+
+var specs = [...]event.Spec{
+       // "Structural" Events.
+       EvEventBatch: event.Spec{
+               Name: "EventBatch",
+               Args: []string{"gen", "m", "time", "size"},
+       },
+       EvStacks: event.Spec{
+               Name: "Stacks",
+       },
+       EvStack: event.Spec{
+               Name:    "Stack",
+               Args:    []string{"id", "nframes"},
+               IsStack: true,
+       },
+       EvStrings: event.Spec{
+               Name: "Strings",
+       },
+       EvString: event.Spec{
+               Name:    "String",
+               Args:    []string{"id"},
+               HasData: true,
+       },
+       EvCPUSamples: event.Spec{
+               Name: "CPUSamples",
+       },
+       EvCPUSample: event.Spec{
+               Name: "CPUSample",
+               Args: []string{"time", "p", "g", "m", "stack"},
+               // N.B. There's clearly a timestamp here, but these Events
+               // are special in that they don't appear in the regular
+               // M streams.
+       },
+       EvFrequency: event.Spec{
+               Name: "Frequency",
+               Args: []string{"freq"},
+       },
+
+       // "Timed" Events.
+       EvProcsChange: event.Spec{
+               Name:         "ProcsChange",
+               Args:         []string{"dt", "procs_value", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+       },
+       EvProcStart: event.Spec{
+               Name:         "ProcStart",
+               Args:         []string{"dt", "p", "p_seq"},
+               IsTimedEvent: true,
+       },
+       EvProcStop: event.Spec{
+               Name:         "ProcStop",
+               Args:         []string{"dt"},
+               IsTimedEvent: true,
+       },
+       EvProcSteal: event.Spec{
+               Name:         "ProcSteal",
+               Args:         []string{"dt", "p", "p_seq", "m"},
+               IsTimedEvent: true,
+       },
+       EvProcStatus: event.Spec{
+               Name:         "ProcStatus",
+               Args:         []string{"dt", "p", "pstatus"},
+               IsTimedEvent: true,
+       },
+       EvGoCreate: event.Spec{
+               Name:         "GoCreate",
+               Args:         []string{"dt", "new_g", "new_stack", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{3, 2},
+       },
+       EvGoCreateSyscall: event.Spec{
+               Name:         "GoCreateSyscall",
+               Args:         []string{"dt", "new_g"},
+               IsTimedEvent: true,
+       },
+       EvGoStart: event.Spec{
+               Name:         "GoStart",
+               Args:         []string{"dt", "g", "g_seq"},
+               IsTimedEvent: true,
+       },
+       EvGoDestroy: event.Spec{
+               Name:         "GoDestroy",
+               Args:         []string{"dt"},
+               IsTimedEvent: true,
+       },
+       EvGoDestroySyscall: event.Spec{
+               Name:         "GoDestroySyscall",
+               Args:         []string{"dt"},
+               IsTimedEvent: true,
+       },
+       EvGoStop: event.Spec{
+               Name:         "GoStop",
+               Args:         []string{"dt", "reason_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+               StringIDs:    []int{1},
+       },
+       EvGoBlock: event.Spec{
+               Name:         "GoBlock",
+               Args:         []string{"dt", "reason_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+               StringIDs:    []int{1},
+       },
+       EvGoUnblock: event.Spec{
+               Name:         "GoUnblock",
+               Args:         []string{"dt", "g", "g_seq", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{3},
+       },
+       EvGoSyscallBegin: event.Spec{
+               Name:         "GoSyscallBegin",
+               Args:         []string{"dt", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{1},
+       },
+       EvGoSyscallEnd: event.Spec{
+               Name:         "GoSyscallEnd",
+               Args:         []string{"dt"},
+               StartEv:      EvGoSyscallBegin,
+               IsTimedEvent: true,
+       },
+       EvGoSyscallEndBlocked: event.Spec{
+               Name:         "GoSyscallEndBlocked",
+               Args:         []string{"dt"},
+               StartEv:      EvGoSyscallBegin,
+               IsTimedEvent: true,
+       },
+       EvGoStatus: event.Spec{
+               Name:         "GoStatus",
+               Args:         []string{"dt", "g", "m", "gstatus"},
+               IsTimedEvent: true,
+       },
+       EvSTWBegin: event.Spec{
+               Name:         "STWBegin",
+               Args:         []string{"dt", "kind_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+               StringIDs:    []int{1},
+       },
+       EvSTWEnd: event.Spec{
+               Name:         "STWEnd",
+               Args:         []string{"dt"},
+               StartEv:      EvSTWBegin,
+               IsTimedEvent: true,
+       },
+       EvGCActive: event.Spec{
+               Name:         "GCActive",
+               Args:         []string{"dt", "gc_seq"},
+               IsTimedEvent: true,
+               StartEv:      EvGCBegin,
+       },
+       EvGCBegin: event.Spec{
+               Name:         "GCBegin",
+               Args:         []string{"dt", "gc_seq", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+       },
+       EvGCEnd: event.Spec{
+               Name:         "GCEnd",
+               Args:         []string{"dt", "gc_seq"},
+               StartEv:      EvGCBegin,
+               IsTimedEvent: true,
+       },
+       EvGCSweepActive: event.Spec{
+               Name:         "GCSweepActive",
+               Args:         []string{"dt", "p"},
+               StartEv:      EvGCSweepBegin,
+               IsTimedEvent: true,
+       },
+       EvGCSweepBegin: event.Spec{
+               Name:         "GCSweepBegin",
+               Args:         []string{"dt", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{1},
+       },
+       EvGCSweepEnd: event.Spec{
+               Name:         "GCSweepEnd",
+               Args:         []string{"dt", "swept_value", "reclaimed_value"},
+               StartEv:      EvGCSweepBegin,
+               IsTimedEvent: true,
+       },
+       EvGCMarkAssistActive: event.Spec{
+               Name:         "GCMarkAssistActive",
+               Args:         []string{"dt", "g"},
+               StartEv:      EvGCMarkAssistBegin,
+               IsTimedEvent: true,
+       },
+       EvGCMarkAssistBegin: event.Spec{
+               Name:         "GCMarkAssistBegin",
+               Args:         []string{"dt", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{1},
+       },
+       EvGCMarkAssistEnd: event.Spec{
+               Name:         "GCMarkAssistEnd",
+               Args:         []string{"dt"},
+               StartEv:      EvGCMarkAssistBegin,
+               IsTimedEvent: true,
+       },
+       EvHeapAlloc: event.Spec{
+               Name:         "HeapAlloc",
+               Args:         []string{"dt", "heapalloc_value"},
+               IsTimedEvent: true,
+       },
+       EvHeapGoal: event.Spec{
+               Name:         "HeapGoal",
+               Args:         []string{"dt", "heapgoal_value"},
+               IsTimedEvent: true,
+       },
+       EvGoLabel: event.Spec{
+               Name:         "GoLabel",
+               Args:         []string{"dt", "label_string"},
+               IsTimedEvent: true,
+               StringIDs:    []int{1},
+       },
+       EvUserTaskBegin: event.Spec{
+               Name:         "UserTaskBegin",
+               Args:         []string{"dt", "task", "parent_task", "name_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{4},
+               StringIDs:    []int{3},
+       },
+       EvUserTaskEnd: event.Spec{
+               Name:         "UserTaskEnd",
+               Args:         []string{"dt", "task", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{2},
+       },
+       EvUserRegionBegin: event.Spec{
+               Name:         "UserRegionBegin",
+               Args:         []string{"dt", "task", "name_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{3},
+               StringIDs:    []int{2},
+       },
+       EvUserRegionEnd: event.Spec{
+               Name:         "UserRegionEnd",
+               Args:         []string{"dt", "task", "name_string", "stack"},
+               StartEv:      EvUserRegionBegin,
+               IsTimedEvent: true,
+               StackIDs:     []int{3},
+               StringIDs:    []int{2},
+       },
+       EvUserLog: event.Spec{
+               Name:         "UserLog",
+               Args:         []string{"dt", "task", "key_string", "value_string", "stack"},
+               IsTimedEvent: true,
+               StackIDs:     []int{4},
+               StringIDs:    []int{2, 3},
+       },
+}
+
+type GoStatus uint8
+
+const (
+       GoBad GoStatus = iota
+       GoRunnable
+       GoRunning
+       GoSyscall
+       GoWaiting
+)
+
+func (s GoStatus) String() string {
+       switch s {
+       case GoRunnable:
+               return "Runnable"
+       case GoRunning:
+               return "Running"
+       case GoSyscall:
+               return "Syscall"
+       case GoWaiting:
+               return "Waiting"
+       }
+       return "Bad"
+}
+
+type ProcStatus uint8
+
+const (
+       ProcBad ProcStatus = iota
+       ProcRunning
+       ProcIdle
+       ProcSyscall
+       ProcSyscallAbandoned
+)
+
+func (s ProcStatus) String() string {
+       switch s {
+       case ProcRunning:
+               return "Running"
+       case ProcIdle:
+               return "Idle"
+       case ProcSyscall:
+               return "Syscall"
+       }
+       return "Bad"
+}
+
+const (
+       // Various format-specific constants.
+       MaxBatchSize      = 64 << 10
+       MaxFramesPerStack = 128
+       MaxStringSize     = 1 << 10
+)
diff --git a/src/internal/trace/v2/event/requirements.go b/src/internal/trace/v2/event/requirements.go
new file mode 100644 (file)
index 0000000..c5adf2e
--- /dev/null
@@ -0,0 +1,26 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package event
+
+// SchedReqs is a set of constraints on what the scheduling
+// context must look like.
+type SchedReqs struct {
+       Thread    Constraint
+       Proc      Constraint
+       Goroutine Constraint
+}
+
+// Constraint represents a various presence requirements.
+type Constraint uint8
+
+const (
+       MustNotHave Constraint = iota
+       MayHave
+       MustHave
+)
+
+// UserGoReqs is a common requirement among events that are running
+// or are close to running user code.
+var UserGoReqs = SchedReqs{Thread: MustHave, Proc: MustHave, Goroutine: MustHave}
diff --git a/src/internal/trace/v2/event_test.go b/src/internal/trace/v2/event_test.go
new file mode 100644 (file)
index 0000000..c81a451
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "testing"
+
+func TestPanicEvent(t *testing.T) {
+       // Use a sync event for this because it doesn't have any extra metadata.
+       ev := syncEvent(nil, 0)
+
+       mustPanic(t, func() {
+               _ = ev.Range()
+       })
+       mustPanic(t, func() {
+               _ = ev.Metric()
+       })
+       mustPanic(t, func() {
+               _ = ev.Log()
+       })
+       mustPanic(t, func() {
+               _ = ev.Task()
+       })
+       mustPanic(t, func() {
+               _ = ev.Region()
+       })
+       mustPanic(t, func() {
+               _ = ev.Label()
+       })
+       mustPanic(t, func() {
+               _ = ev.RangeAttributes()
+       })
+}
+
+func mustPanic(t *testing.T, f func()) {
+       defer func() {
+               if r := recover(); r == nil {
+                       t.Fatal("failed to panic")
+               }
+       }()
+       f()
+}
diff --git a/src/internal/trace/v2/generation.go b/src/internal/trace/v2/generation.go
new file mode 100644 (file)
index 0000000..b430515
--- /dev/null
@@ -0,0 +1,403 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "bufio"
+       "bytes"
+       "cmp"
+       "encoding/binary"
+       "fmt"
+       "io"
+       "slices"
+       "strings"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+)
+
+// generation contains all the trace data for a single
+// trace generation. It is purely data: it does not
+// track any parse state nor does it contain a cursor
+// into the generation.
+type generation struct {
+       gen        uint64
+       batches    map[ThreadID][]batch
+       cpuSamples []cpuSample
+       *evTable
+}
+
+// spilledBatch represents a batch that was read out for the next generation,
+// while reading the previous one. It's passed on when parsing the next
+// generation.
+type spilledBatch struct {
+       gen uint64
+       *batch
+}
+
+// readGeneration buffers and decodes the structural elements of a trace generation
+// out of r. spill is the first batch of the new generation (already buffered and
+// parsed from reading the last generation). Returns the generation and the first
+// batch read of the next generation, if any.
+func readGeneration(r *bufio.Reader, spill *spilledBatch) (*generation, *spilledBatch, error) {
+       g := &generation{
+               evTable: new(evTable),
+               batches: make(map[ThreadID][]batch),
+       }
+       // Process the spilled batch.
+       if spill != nil {
+               g.gen = spill.gen
+               if err := processBatch(g, *spill.batch); err != nil {
+                       return nil, nil, err
+               }
+               spill = nil
+       }
+       // Read batches one at a time until we either hit EOF or
+       // the next generation.
+       for {
+               b, gen, err := readBatch(r)
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       return nil, nil, err
+               }
+               if gen == 0 {
+                       // 0 is a sentinel used by the runtime, so we'll never see it.
+                       return nil, nil, fmt.Errorf("invalid generation number %d", gen)
+               }
+               if g.gen == 0 {
+                       // Initialize gen.
+                       g.gen = gen
+               }
+               if gen == g.gen+1 { // TODO: advance this the same way the runtime does.
+                       spill = &spilledBatch{gen: gen, batch: &b}
+                       break
+               }
+               if gen != g.gen {
+                       // N.B. Fail as fast as possible if we see this. At first it
+                       // may seem prudent to be fault-tolerant and assume we have a
+                       // complete generation, parsing and returning that first. However,
+                       // if the batches are mixed across generations then it's likely
+                       // we won't be able to parse this generation correctly at all.
+                       // Rather than return a cryptic error in that case, indicate the
+                       // problem as soon as we see it.
+                       return nil, nil, fmt.Errorf("generations out of order")
+               }
+               if err := processBatch(g, b); err != nil {
+                       return nil, nil, err
+               }
+       }
+
+       // Check some invariants.
+       if g.freq == 0 {
+               return nil, nil, fmt.Errorf("no frequency event found")
+       }
+       for _, batches := range g.batches {
+               sorted := slices.IsSortedFunc(batches, func(a, b batch) int {
+                       return cmp.Compare(a.time, b.time)
+               })
+               if !sorted {
+                       // TODO(mknyszek): Consider just sorting here.
+                       return nil, nil, fmt.Errorf("per-M streams are out-of-order")
+               }
+       }
+
+       // Compactify stacks and strings for better lookup performance later.
+       g.stacks.compactify()
+       g.strings.compactify()
+
+       // Validate stacks.
+       if err := validateStackStrings(&g.stacks, &g.strings); err != nil {
+               return nil, nil, err
+       }
+
+       // Fix up the CPU sample timestamps, now that we have freq.
+       for i := range g.cpuSamples {
+               s := &g.cpuSamples[i]
+               s.time = g.freq.mul(timestamp(s.time))
+       }
+       // Sort the CPU samples.
+       slices.SortFunc(g.cpuSamples, func(a, b cpuSample) int {
+               return cmp.Compare(a.time, b.time)
+       })
+       return g, spill, nil
+}
+
+// processBatch adds the batch to the generation.
+func processBatch(g *generation, b batch) error {
+       switch {
+       case b.isStringsBatch():
+               if err := addStrings(&g.strings, b); err != nil {
+                       return err
+               }
+       case b.isStacksBatch():
+               if err := addStacks(&g.stacks, b); err != nil {
+                       return err
+               }
+       case b.isCPUSamplesBatch():
+               samples, err := addCPUSamples(g.cpuSamples, b)
+               if err != nil {
+                       return err
+               }
+               g.cpuSamples = samples
+       case b.isFreqBatch():
+               freq, err := parseFreq(b)
+               if err != nil {
+                       return err
+               }
+               if g.freq != 0 {
+                       return fmt.Errorf("found multiple frequency events")
+               }
+               g.freq = freq
+       default:
+               g.batches[b.m] = append(g.batches[b.m], b)
+       }
+       return nil
+}
+
+// validateStackStrings makes sure all the string references in
+// the stack table are present in the string table.
+func validateStackStrings(stacks *dataTable[stackID, stack], strings *dataTable[stringID, string]) error {
+       var err error
+       stacks.forEach(func(id stackID, stk stack) bool {
+               for _, frame := range stk.frames {
+                       _, ok := strings.get(frame.funcID)
+                       if !ok {
+                               err = fmt.Errorf("found invalid func string ID %d for stack %d", frame.funcID, id)
+                               return false
+                       }
+                       _, ok = strings.get(frame.fileID)
+                       if !ok {
+                               err = fmt.Errorf("found invalid file string ID %d for stack %d", frame.fileID, id)
+                               return false
+                       }
+               }
+               return true
+       })
+       return err
+}
+
+// addStrings takes a batch whose first byte is an EvStrings event
+// (indicating that the batch contains only strings) and adds each
+// string contained therein to the provided strings map.
+func addStrings(stringTable *dataTable[stringID, string], b batch) error {
+       if !b.isStringsBatch() {
+               return fmt.Errorf("internal error: addStrings called on non-string batch")
+       }
+       r := bytes.NewReader(b.data)
+       hdr, err := r.ReadByte() // Consume the EvStrings byte.
+       if err != nil || event.Type(hdr) != go122.EvStrings {
+               return fmt.Errorf("missing strings batch header")
+       }
+
+       var sb strings.Builder
+       for r.Len() != 0 {
+               // Read the header.
+               ev, err := r.ReadByte()
+               if err != nil {
+                       return err
+               }
+               if event.Type(ev) != go122.EvString {
+                       return fmt.Errorf("expected string event, got %d", ev)
+               }
+
+               // Read the string's ID.
+               id, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return err
+               }
+
+               // Read the string's length.
+               len, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return err
+               }
+               if len > go122.MaxStringSize {
+                       return fmt.Errorf("invalid string size %d, maximum is %d", len, go122.MaxStringSize)
+               }
+
+               // Copy out the string.
+               n, err := io.CopyN(&sb, r, int64(len))
+               if n != int64(len) {
+                       return fmt.Errorf("failed to read full string: read %d but wanted %d", n, len)
+               }
+               if err != nil {
+                       return fmt.Errorf("copying string data: %w", err)
+               }
+
+               // Add the string to the map.
+               s := sb.String()
+               sb.Reset()
+               if err := stringTable.insert(stringID(id), s); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+// addStacks takes a batch whose first byte is an EvStacks event
+// (indicating that the batch contains only stacks) and adds each
+// string contained therein to the provided stacks map.
+func addStacks(stackTable *dataTable[stackID, stack], b batch) error {
+       if !b.isStacksBatch() {
+               return fmt.Errorf("internal error: addStacks called on non-stacks batch")
+       }
+       r := bytes.NewReader(b.data)
+       hdr, err := r.ReadByte() // Consume the EvStacks byte.
+       if err != nil || event.Type(hdr) != go122.EvStacks {
+               return fmt.Errorf("missing stacks batch header")
+       }
+
+       for r.Len() != 0 {
+               // Read the header.
+               ev, err := r.ReadByte()
+               if err != nil {
+                       return err
+               }
+               if event.Type(ev) != go122.EvStack {
+                       return fmt.Errorf("expected stack event, got %d", ev)
+               }
+
+               // Read the stack's ID.
+               id, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return err
+               }
+
+               // Read how many frames are in each stack.
+               nFrames, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return err
+               }
+               if nFrames > go122.MaxFramesPerStack {
+                       return fmt.Errorf("invalid stack size %d, maximum is %d", nFrames, go122.MaxFramesPerStack)
+               }
+
+               // Each frame consists of 4 fields: pc, funcID (string), fileID (string), line.
+               frames := make([]frame, 0, nFrames)
+               for i := uint64(0); i < nFrames; i++ {
+                       // Read the frame data.
+                       pc, err := binary.ReadUvarint(r)
+                       if err != nil {
+                               return fmt.Errorf("reading frame %d's PC for stack %d: %w", i+1, id, err)
+                       }
+                       funcID, err := binary.ReadUvarint(r)
+                       if err != nil {
+                               return fmt.Errorf("reading frame %d's funcID for stack %d: %w", i+1, id, err)
+                       }
+                       fileID, err := binary.ReadUvarint(r)
+                       if err != nil {
+                               return fmt.Errorf("reading frame %d's fileID for stack %d: %w", i+1, id, err)
+                       }
+                       line, err := binary.ReadUvarint(r)
+                       if err != nil {
+                               return fmt.Errorf("reading frame %d's line for stack %d: %w", i+1, id, err)
+                       }
+                       frames = append(frames, frame{
+                               pc:     pc,
+                               funcID: stringID(funcID),
+                               fileID: stringID(fileID),
+                               line:   line,
+                       })
+               }
+
+               // Add the stack to the map.
+               if err := stackTable.insert(stackID(id), stack{frames: frames}); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+// addCPUSamples takes a batch whose first byte is an EvCPUSamples event
+// (indicating that the batch contains only CPU samples) and adds each
+// sample contained therein to the provided samples list.
+func addCPUSamples(samples []cpuSample, b batch) ([]cpuSample, error) {
+       if !b.isCPUSamplesBatch() {
+               return nil, fmt.Errorf("internal error: addStrings called on non-string batch")
+       }
+       r := bytes.NewReader(b.data)
+       hdr, err := r.ReadByte() // Consume the EvCPUSamples byte.
+       if err != nil || event.Type(hdr) != go122.EvCPUSamples {
+               return nil, fmt.Errorf("missing CPU samples batch header")
+       }
+
+       for r.Len() != 0 {
+               // Read the header.
+               ev, err := r.ReadByte()
+               if err != nil {
+                       return nil, err
+               }
+               if event.Type(ev) != go122.EvCPUSample {
+                       return nil, fmt.Errorf("expected CPU sample event, got %d", ev)
+               }
+
+               // Read the sample's timestamp.
+               ts, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return nil, err
+               }
+
+               // Read the sample's M.
+               m, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return nil, err
+               }
+               mid := ThreadID(m)
+
+               // Read the sample's P.
+               p, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return nil, err
+               }
+               pid := ProcID(p)
+
+               // Read the sample's G.
+               g, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return nil, err
+               }
+               goid := GoID(g)
+               if g == 0 {
+                       goid = NoGoroutine
+               }
+
+               // Read the sample's stack.
+               s, err := binary.ReadUvarint(r)
+               if err != nil {
+                       return nil, err
+               }
+
+               // Add the sample to the slice.
+               samples = append(samples, cpuSample{
+                       schedCtx: schedCtx{
+                               M: mid,
+                               P: pid,
+                               G: goid,
+                       },
+                       time:  Time(ts), // N.B. this is really a "timestamp," not a Time.
+                       stack: stackID(s),
+               })
+       }
+       return samples, nil
+}
+
+// parseFreq parses out a lone EvFrequency from a batch.
+func parseFreq(b batch) (frequency, error) {
+       if !b.isFreqBatch() {
+               return 0, fmt.Errorf("internal error: parseFreq called on non-frequency batch")
+       }
+       r := bytes.NewReader(b.data)
+       r.ReadByte() // Consume the EvFrequency byte.
+
+       // Read the frequency. It'll come out as timestamp units per second.
+       f, err := binary.ReadUvarint(r)
+       if err != nil {
+               return 0, err
+       }
+       // Convert to nanoseconds per timestamp unit.
+       return frequency(1.0 / (float64(f) / 1e9)), nil
+}
diff --git a/src/internal/trace/v2/internal/testgen/go122/trace.go b/src/internal/trace/v2/internal/testgen/go122/trace.go
new file mode 100644 (file)
index 0000000..f912564
--- /dev/null
@@ -0,0 +1,385 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testkit
+
+import (
+       "bytes"
+       "encoding/binary"
+       "fmt"
+       "os"
+       "regexp"
+       "strings"
+
+       "internal/trace/v2"
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+       "internal/trace/v2/raw"
+       "internal/trace/v2/version"
+       "internal/txtar"
+)
+
+func Main(f func(*Trace)) {
+       // Create an output file.
+       out, err := os.Create(os.Args[1])
+       if err != nil {
+               panic(err.Error())
+       }
+       defer out.Close()
+
+       // Create a new trace.
+       trace := NewTrace()
+
+       // Call the generator.
+       f(trace)
+
+       // Write out the generator's state.
+       if _, err := out.Write(trace.Generate()); err != nil {
+               panic(err.Error())
+       }
+}
+
+// Trace represents an execution trace for testing.
+//
+// It does a little bit of work to ensure that the produced trace is valid,
+// just for convenience. It mainly tracks batches and batch sizes (so they're
+// trivially correct), tracks strings and stacks, and makes sure emitted string
+// and stack batches are valid. That last part can be controlled by a few options.
+//
+// Otherwise, it performs no validation on the trace at all.
+type Trace struct {
+       // Trace data state.
+       ver    version.Version
+       names  map[string]event.Type
+       specs  []event.Spec
+       events []raw.Event
+       gens   []*Generation
+
+       // Expectation state.
+       bad      bool
+       badMatch *regexp.Regexp
+}
+
+// NewTrace creates a new trace.
+func NewTrace() *Trace {
+       ver := version.Go122
+       return &Trace{
+               names: event.Names(ver.Specs()),
+               specs: ver.Specs(),
+       }
+}
+
+// ExpectFailure writes down that the trace should be broken. The caller
+// must provide a pattern matching the expected error produced by the parser.
+func (t *Trace) ExpectFailure(pattern string) {
+       t.bad = true
+       t.badMatch = regexp.MustCompile(pattern)
+}
+
+// ExpectSuccess writes down that the trace should successfully parse.
+func (t *Trace) ExpectSuccess() {
+       t.bad = false
+}
+
+// RawEvent emits an event into the trace. name must correspond to one
+// of the names in Specs() result for the version that was passed to
+// this trace.
+func (t *Trace) RawEvent(typ event.Type, data []byte, args ...uint64) {
+       t.events = append(t.events, t.createEvent(typ, data, args...))
+}
+
+// Generation creates a new trace generation.
+//
+// This provides more structure than Event to allow for more easily
+// creating complex traces that are mostly or completely correct.
+func (t *Trace) Generation(gen uint64) *Generation {
+       g := &Generation{
+               trace:   t,
+               gen:     gen,
+               strings: make(map[string]uint64),
+               stacks:  make(map[stack]uint64),
+       }
+       t.gens = append(t.gens, g)
+       return g
+}
+
+// Generate creates a test file for the trace.
+func (t *Trace) Generate() []byte {
+       // Trace file contents.
+       var buf bytes.Buffer
+       tw, err := raw.NewTextWriter(&buf, version.Go122)
+       if err != nil {
+               panic(err.Error())
+       }
+
+       // Write raw top-level events.
+       for _, e := range t.events {
+               tw.WriteEvent(e)
+       }
+
+       // Write generations.
+       for _, g := range t.gens {
+               g.writeEventsTo(tw)
+       }
+
+       // Expectation file contents.
+       expect := []byte("SUCCESS\n")
+       if t.bad {
+               expect = []byte(fmt.Sprintf("FAILURE %q\n", t.badMatch))
+       }
+
+       // Create the test file's contents.
+       return txtar.Format(&txtar.Archive{
+               Files: []txtar.File{
+                       {Name: "expect", Data: expect},
+                       {Name: "trace", Data: buf.Bytes()},
+               },
+       })
+}
+
+func (t *Trace) createEvent(ev event.Type, data []byte, args ...uint64) raw.Event {
+       spec := t.specs[ev]
+       if ev != go122.EvStack {
+               if arity := len(spec.Args); len(args) != arity {
+                       panic(fmt.Sprintf("expected %d args for %s, got %d", arity, spec.Name, len(args)))
+               }
+       }
+       return raw.Event{
+               Version: version.Go122,
+               Ev:      ev,
+               Args:    args,
+               Data:    data,
+       }
+}
+
+type stack struct {
+       stk [32]trace.StackFrame
+       len int
+}
+
+var (
+       NoString = ""
+       NoStack  = []trace.StackFrame{}
+)
+
+// Generation represents a single generation in the trace.
+type Generation struct {
+       trace   *Trace
+       gen     uint64
+       batches []*Batch
+       strings map[string]uint64
+       stacks  map[stack]uint64
+
+       // Options applied when Trace.Generate is called.
+       ignoreStringBatchSizeLimit bool
+       ignoreStackBatchSizeLimit  bool
+}
+
+// Batch starts a new event batch in the trace data.
+//
+// This is convenience function for generating correct batches.
+func (g *Generation) Batch(thread trace.ThreadID, time Time) *Batch {
+       b := &Batch{
+               gen:       g,
+               thread:    thread,
+               timestamp: time,
+       }
+       g.batches = append(g.batches, b)
+       return b
+}
+
+// String registers a string with the trace.
+//
+// This is a convenience function for easily adding correct
+// strings to traces.
+func (g *Generation) String(s string) uint64 {
+       if len(s) == 0 {
+               return 0
+       }
+       if id, ok := g.strings[s]; ok {
+               return id
+       }
+       id := uint64(len(g.strings) + 1)
+       g.strings[s] = id
+       return id
+}
+
+// Stack registers a stack with the trace.
+//
+// This is a convenience function for easily adding correct
+// stacks to traces.
+func (g *Generation) Stack(stk []trace.StackFrame) uint64 {
+       if len(stk) == 0 {
+               return 0
+       }
+       if len(stk) > 32 {
+               panic("stack too big for test")
+       }
+       var stkc stack
+       copy(stkc.stk[:], stk)
+       stkc.len = len(stk)
+       if id, ok := g.stacks[stkc]; ok {
+               return id
+       }
+       id := uint64(len(g.stacks) + 1)
+       g.stacks[stkc] = id
+       return id
+}
+
+// writeEventsTo emits event batches in the generation to tw.
+func (g *Generation) writeEventsTo(tw *raw.TextWriter) {
+       // Write event batches for the generation.
+       for _, b := range g.batches {
+               b.writeEventsTo(tw)
+       }
+
+       // Write frequency.
+       b := g.newStructuralBatch()
+       b.RawEvent(go122.EvFrequency, nil, 15625000)
+       b.writeEventsTo(tw)
+
+       // Write stacks.
+       b = g.newStructuralBatch()
+       b.RawEvent(go122.EvStacks, nil)
+       for stk, id := range g.stacks {
+               stk := stk.stk[:stk.len]
+               args := []uint64{id}
+               for _, f := range stk {
+                       args = append(args, f.PC, g.String(f.Func), g.String(f.File), f.Line)
+               }
+               b.RawEvent(go122.EvStack, nil, args...)
+
+               // Flush the batch if necessary.
+               if !g.ignoreStackBatchSizeLimit && b.size > go122.MaxBatchSize/2 {
+                       b.writeEventsTo(tw)
+                       b = g.newStructuralBatch()
+               }
+       }
+       b.writeEventsTo(tw)
+
+       // Write strings.
+       b = g.newStructuralBatch()
+       b.RawEvent(go122.EvStrings, nil)
+       for s, id := range g.strings {
+               b.RawEvent(go122.EvString, []byte(s), id)
+
+               // Flush the batch if necessary.
+               if !g.ignoreStringBatchSizeLimit && b.size > go122.MaxBatchSize/2 {
+                       b.writeEventsTo(tw)
+                       b = g.newStructuralBatch()
+               }
+       }
+       b.writeEventsTo(tw)
+}
+
+func (g *Generation) newStructuralBatch() *Batch {
+       return &Batch{gen: g, thread: trace.NoThread}
+}
+
+// Batch represents an event batch.
+type Batch struct {
+       gen       *Generation
+       thread    trace.ThreadID
+       timestamp Time
+       size      uint64
+       events    []raw.Event
+}
+
+// Event emits an event into a batch. name must correspond to one
+// of the names in Specs() result for the version that was passed to
+// this trace. Callers must omit the timestamp delta.
+func (b *Batch) Event(name string, args ...any) {
+       ev, ok := b.gen.trace.names[name]
+       if !ok {
+               panic(fmt.Sprintf("invalid or unknown event %s", name))
+       }
+       var uintArgs []uint64
+       argOff := 0
+       if b.gen.trace.specs[ev].IsTimedEvent {
+               uintArgs = []uint64{1}
+               argOff = 1
+       }
+       spec := b.gen.trace.specs[ev]
+       if arity := len(spec.Args) - argOff; len(args) != arity {
+               panic(fmt.Sprintf("expected %d args for %s, got %d", arity, spec.Name, len(args)))
+       }
+       for i, arg := range args {
+               uintArgs = append(uintArgs, b.uintArgFor(arg, spec.Args[i+argOff]))
+       }
+       b.RawEvent(ev, nil, uintArgs...)
+}
+
+func (b *Batch) uintArgFor(arg any, argSpec string) uint64 {
+       components := strings.SplitN(argSpec, "_", 2)
+       typStr := components[0]
+       if len(components) == 2 {
+               typStr = components[1]
+       }
+       var u uint64
+       switch typStr {
+       case "value":
+               u = arg.(uint64)
+       case "stack":
+               u = b.gen.Stack(arg.([]trace.StackFrame))
+       case "seq":
+               u = uint64(arg.(Seq))
+       case "pstatus":
+               u = uint64(arg.(go122.ProcStatus))
+       case "gstatus":
+               u = uint64(arg.(go122.GoStatus))
+       case "g":
+               u = uint64(arg.(trace.GoID))
+       case "m":
+               u = uint64(arg.(trace.ThreadID))
+       case "p":
+               u = uint64(arg.(trace.ProcID))
+       case "string":
+               u = b.gen.String(arg.(string))
+       case "task":
+               u = uint64(arg.(trace.TaskID))
+       default:
+               panic(fmt.Sprintf("unsupported arg type %q for spec %q", typStr, argSpec))
+       }
+       return u
+}
+
+// RawEvent emits an event into a batch. name must correspond to one
+// of the names in Specs() result for the version that was passed to
+// this trace.
+func (b *Batch) RawEvent(typ event.Type, data []byte, args ...uint64) {
+       ev := b.gen.trace.createEvent(typ, data, args...)
+
+       // Compute the size of the event and add it to the batch.
+       b.size += 1 // One byte for the event header.
+       var buf [binary.MaxVarintLen64]byte
+       for _, arg := range args {
+               b.size += uint64(binary.PutUvarint(buf[:], arg))
+       }
+       if len(data) != 0 {
+               b.size += uint64(binary.PutUvarint(buf[:], uint64(len(data))))
+               b.size += uint64(len(data))
+       }
+
+       // Add the event.
+       b.events = append(b.events, ev)
+}
+
+// writeEventsTo emits events in the batch, including the batch header, to tw.
+func (b *Batch) writeEventsTo(tw *raw.TextWriter) {
+       tw.WriteEvent(raw.Event{
+               Version: version.Go122,
+               Ev:      go122.EvEventBatch,
+               Args:    []uint64{b.gen.gen, uint64(b.thread), uint64(b.timestamp), b.size},
+       })
+       for _, e := range b.events {
+               tw.WriteEvent(e)
+       }
+}
+
+// Seq represents a sequence counter.
+type Seq uint64
+
+// Time represents a low-level trace timestamp (which does not necessarily
+// correspond to nanoseconds, like trace.Time does).
+type Time uint64
diff --git a/src/internal/trace/v2/mkexp.bash b/src/internal/trace/v2/mkexp.bash
new file mode 100755 (executable)
index 0000000..8a73719
--- /dev/null
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Copyright 2023 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This script copies this directory to golang.org/x/exp/trace.
+# Just point it at an golang.org/x/exp checkout.
+
+set -e
+if [ ! -f mkexp.bash ]; then
+       echo 'mkexp.bash must be run from $GOROOT/src/internal/trace/v2' 1>&2
+       exit 1
+fi
+
+if [ "$#" -ne 1 ]; then
+    echo 'mkexp.bash expects one argument: a path to a golang.org/x/exp git checkout'
+       exit 1
+fi
+
+# Copy.
+mkdir -p $1/trace
+cp -r ./* $1/trace
+
+# Cleanup.
+
+# Delete mkexp.bash.
+rm $1/trace/mkexp.bash
+
+# Move tools to cmd. Can't be cmd here because dist will try to build them.
+mv $1/trace/tools $1/trace/cmd
+
+# Make some packages internal.
+mv $1/trace/raw $1/trace/internal/raw
+mv $1/trace/event $1/trace/internal/event
+mv $1/trace/version $1/trace/internal/version
+mv $1/trace/testtrace $1/trace/internal/testtrace
+
+# Move the debug commands out of testdata.
+mv $1/trace/testdata/cmd $1/trace/cmd
+
+# Fix up import paths.
+find $1/trace -name '*.go' | xargs -- sed -i 's/internal\/trace\/v2/golang.org\/x\/exp\/trace/'
+find $1/trace -name '*.go' | xargs -- sed -i 's/golang.org\/x\/exp\/trace\/raw/golang.org\/x\/exp\/trace\/internal\/raw/'
+find $1/trace -name '*.go' | xargs -- sed -i 's/golang.org\/x\/exp\/trace\/event/golang.org\/x\/exp\/trace\/internal\/event/'
+find $1/trace -name '*.go' | xargs -- sed -i 's/golang.org\/x\/exp\/trace\/event\/go122/golang.org\/x\/exp\/trace\/internal\/event\/go122/'
+find $1/trace -name '*.go' | xargs -- sed -i 's/golang.org\/x\/exp\/trace\/version/golang.org\/x\/exp\/trace\/internal\/version/'
+find $1/trace -name '*.go' | xargs -- sed -i 's/golang.org\/x\/exp\/trace\/testtrace/golang.org\/x\/exp\/trace\/internal\/testtrace/'
+
+# Format the files.
+find $1/trace -name '*.go' | xargs -- gofmt -w -s
diff --git a/src/internal/trace/v2/order.go b/src/internal/trace/v2/order.go
new file mode 100644 (file)
index 0000000..7288792
--- /dev/null
@@ -0,0 +1,943 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "fmt"
+       "strings"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+       "internal/trace/v2/version"
+)
+
+// ordering emulates Go scheduler state for both validation and
+// for putting events in the right order.
+type ordering struct {
+       gStates     map[GoID]*gState
+       pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
+       mStates     map[ThreadID]*mState
+       activeTasks map[TaskID]taskState
+       gcSeq       uint64
+       gcState     gcState
+       initialGen  uint64
+}
+
+// advance checks if it's valid to proceed with ev which came from thread m.
+//
+// Returns the schedCtx at the point of the event, whether it's OK to advance
+// with this event, and any error encountered in validation.
+//
+// It assumes the gen value passed to it is monotonically increasing across calls.
+//
+// If any error is returned, then the trace is broken and trace parsing must cease.
+// If it's not valid to advance with ev, but no error was encountered, the caller
+// should attempt to advance with other candidate events from other threads. If the
+// caller runs out of candidates, the trace is invalid.
+func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (schedCtx, bool, error) {
+       if o.initialGen == 0 {
+               // Set the initial gen if necessary.
+               o.initialGen = gen
+       }
+
+       var curCtx, newCtx schedCtx
+       curCtx.M = m
+       newCtx.M = m
+
+       if m == NoThread {
+               curCtx.P = NoProc
+               curCtx.G = NoGoroutine
+               newCtx = curCtx
+       } else {
+               // Pull out or create the mState for this event.
+               ms, ok := o.mStates[m]
+               if !ok {
+                       ms = &mState{
+                               g: NoGoroutine,
+                               p: NoProc,
+                       }
+                       o.mStates[m] = ms
+               }
+               curCtx.P = ms.p
+               curCtx.G = ms.g
+               newCtx = curCtx
+               defer func() {
+                       // Update the mState for this event.
+                       ms.p = newCtx.P
+                       ms.g = newCtx.G
+               }()
+       }
+
+       switch typ := ev.typ; typ {
+       // Handle procs.
+       case go122.EvProcStatus:
+               pid := ProcID(ev.args[0])
+               status := go122.ProcStatus(ev.args[1])
+               oldState := go122ProcStatus2ProcState[status]
+               if s, ok := o.pStates[pid]; ok {
+                       if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall {
+                               // ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
+                               // potential loss of information, but if we're already in ProcSyscall,
+                               // we haven't lost the relevant information. Promote the status and advance.
+                               oldState = ProcRunning
+                               ev.args[1] = uint64(go122.ProcSyscall)
+                       } else if s.status != status {
+                               return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
+                       }
+                       s.seq = makeSeq(gen, 0) // Reset seq.
+               } else {
+                       o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
+                       if gen == o.initialGen {
+                               oldState = ProcUndetermined
+                       } else {
+                               oldState = ProcNotExist
+                       }
+               }
+               ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
+
+               // Bind the proc to the new context, if it's running.
+               if status == go122.ProcRunning || status == go122.ProcSyscall {
+                       newCtx.P = pid
+               }
+               // Set the current context to the state of the M current running this G. Otherwise
+               // we'll emit a Running -> Running event that doesn't correspond to the right M.
+               if status == go122.ProcSyscallAbandoned && oldState != ProcUndetermined {
+                       // N.B. This is slow but it should be fairly rare.
+                       found := false
+                       for mid, ms := range o.mStates {
+                               if ms.p == pid {
+                                       curCtx.M = mid
+                                       curCtx.P = pid
+                                       curCtx.G = ms.g
+                                       found = true
+                               }
+                       }
+                       if !found {
+                               return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
+                       }
+               }
+               return curCtx, true, nil
+       case go122.EvProcStart:
+               pid := ProcID(ev.args[0])
+               seq := makeSeq(gen, ev.args[1])
+
+               // Try to advance. We might fail here due to sequencing, because the P hasn't
+               // had a status emitted, or because we already have a P and we're in a syscall,
+               // and we haven't observed that it was stolen from us yet.
+               state, ok := o.pStates[pid]
+               if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
+                       // We can't make an inference as to whether this is bad. We could just be seeing
+                       // a ProcStart on a different M before the proc's state was emitted, or before we
+                       // got to the right point in the trace.
+                       //
+                       // Note that we also don't advance here if we have a P and we're in a syscall.
+                       return curCtx, false, nil
+               }
+               // We can advance this P. Check some invariants.
+               //
+               // We might have a goroutine if a goroutine is exiting a syscall.
+               reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave}
+               if err := validateCtx(curCtx, reqs); err != nil {
+                       return curCtx, false, err
+               }
+               state.status = go122.ProcRunning
+               state.seq = seq
+               newCtx.P = pid
+               return curCtx, true, nil
+       case go122.EvProcStop:
+               // We must be able to advance this P.
+               //
+               // There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
+               // is stopped by the same M that started it, while ProcSteal is used when another M
+               // steals the P by stopping it from a distance.
+               //
+               // Since a P is bound to an M, and we're stopping on the same M we started, it must
+               // always be possible to advance the current M's P from a ProcStop. This is also why
+               // ProcStop doesn't need a sequence number.
+               state, ok := o.pStates[curCtx.P]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(typ), curCtx.P)
+               }
+               if state.status != go122.ProcRunning && state.status != go122.ProcSyscall {
+                       return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(typ), go122.ProcRunning, go122.ProcSyscall)
+               }
+               reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
+               if err := validateCtx(curCtx, reqs); err != nil {
+                       return curCtx, false, err
+               }
+               state.status = go122.ProcIdle
+               newCtx.P = NoProc
+               return curCtx, true, nil
+       case go122.EvProcSteal:
+               pid := ProcID(ev.args[0])
+               seq := makeSeq(gen, ev.args[1])
+               state, ok := o.pStates[pid]
+               if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
+                       // We can't make an inference as to whether this is bad. We could just be seeing
+                       // a ProcStart on a different M before the proc's state was emitted, or before we
+                       // got to the right point in the trace.
+                       return curCtx, false, nil
+               }
+               // We can advance this P. Check some invariants.
+               reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}
+               if err := validateCtx(curCtx, reqs); err != nil {
+                       return curCtx, false, err
+               }
+               // Smuggle in the P state that let us advance so we can surface information to the event.
+               // Specifically, we need to make sure that the event is interpreted not as a transition of
+               // ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
+               //
+               // ProcRunning is binding, but we may be running with a P on the current M and we can't
+               // bind another P. This P is about to go ProcIdle anyway.
+               oldStatus := state.status
+               ev.extra(version.Go122)[0] = uint64(oldStatus)
+
+               // Update the P's status and sequence number.
+               state.status = go122.ProcIdle
+               state.seq = seq
+
+               // If we've lost information then don't try to do anything with the M.
+               // It may have moved on and we can't be sure.
+               if oldStatus == go122.ProcSyscallAbandoned {
+                       return curCtx, true, nil
+               }
+
+               // Validate that the M we're stealing from is what we expect.
+               mid := ThreadID(ev.args[2]) // The M we're stealing from.
+               mState, ok := o.mStates[mid]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
+               }
+
+               // Make sure we're actually stealing the right P.
+               if mState.p != pid {
+                       return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
+               }
+
+               // Tell the M it has no P so it can proceed.
+               //
+               // This is safe because we know the P was in a syscall and
+               // the other M must be trying to get out of the syscall.
+               // GoSyscallEndBlocked cannot advance until the corresponding
+               // M loses its P.
+               mState.p = NoProc
+               return curCtx, true, nil
+
+       // Handle goroutines.
+       case go122.EvGoStatus:
+               gid := GoID(ev.args[0])
+               mid := ThreadID(ev.args[1])
+               status := go122.GoStatus(ev.args[2])
+               oldState := go122GoStatus2GoState[status]
+               if s, ok := o.gStates[gid]; ok {
+                       if s.status != status {
+                               return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
+                       }
+                       s.seq = makeSeq(gen, 0) // Reset seq.
+               } else if gen == o.initialGen {
+                       // Set the state.
+                       o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
+                       oldState = GoUndetermined
+               } else {
+                       return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
+               }
+               ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
+
+               switch status {
+               case go122.GoRunning:
+                       // Bind the goroutine to the new context, since it's running.
+                       newCtx.G = gid
+               case go122.GoSyscall:
+                       if mid == NoThread {
+                               return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
+                       }
+                       // Is the syscall on this thread? If so, bind it to the context.
+                       // Otherwise, we're talking about a G sitting in a syscall on an M.
+                       // Validate the named M.
+                       if mid == curCtx.M {
+                               newCtx.G = gid
+                               break
+                       }
+                       // Now we're talking about a thread and goroutine that have been
+                       // blocked on a syscall for the entire generation. This case must
+                       // not have a P; the runtime makes sure that all Ps are traced at
+                       // the beginning of a generation, which involves taking a P back
+                       // from every thread.
+                       ms, ok := o.mStates[mid]
+                       if ok {
+                               // This M has been seen. That means we must have seen this
+                               // goroutine go into a syscall on this thread at some point.
+                               if ms.g != gid {
+                                       // But the G on the M doesn't match. Something's wrong.
+                                       return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
+                               }
+                               // This case is just a Syscall->Syscall event, which needs to
+                               // appear as having the G currently bound to this M.
+                               curCtx.G = ms.g
+                       } else if !ok {
+                               // The M hasn't been seen yet. That means this goroutine
+                               // has just been sitting in a syscall on this M. Create
+                               // a state for it.
+                               o.mStates[mid] = &mState{g: gid, p: NoProc}
+                               // Don't set curCtx.G in this case because this event is the
+                               // binding event (and curCtx represents the "before" state).
+                       }
+                       // Update the current context to the M we're talking about.
+                       curCtx.M = mid
+               }
+               return curCtx, true, nil
+       case go122.EvGoCreate:
+               // Goroutines must be created on a running P, but may or may not be created
+               // by a running goroutine.
+               reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
+               if err := validateCtx(curCtx, reqs); err != nil {
+                       return curCtx, false, err
+               }
+               // If we have a goroutine, it must be running.
+               if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning {
+                       return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+               }
+               // This goroutine created another. Add a state for it.
+               newgid := GoID(ev.args[0])
+               if _, ok := o.gStates[newgid]; ok {
+                       return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
+               }
+               o.gStates[newgid] = &gState{id: newgid, status: go122.GoRunnable, seq: makeSeq(gen, 0)}
+               return curCtx, true, nil
+       case go122.EvGoDestroy, go122.EvGoStop, go122.EvGoBlock, go122.EvGoSyscallBegin:
+               // These are goroutine events that all require an active running
+               // goroutine on some thread. They must *always* be advance-able,
+               // since running goroutines are bound to their M.
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               state, ok := o.gStates[curCtx.G]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+               }
+               if state.status != go122.GoRunning {
+                       return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+               }
+               // Handle each case slightly differently; we just group them together
+               // because they have shared preconditions.
+               switch typ {
+               case go122.EvGoDestroy:
+                       // This goroutine is exiting itself.
+                       delete(o.gStates, curCtx.G)
+                       newCtx.G = NoGoroutine
+               case go122.EvGoStop:
+                       // Goroutine stopped (yielded). It's runnable but not running on this M.
+                       state.status = go122.GoRunnable
+                       newCtx.G = NoGoroutine
+               case go122.EvGoBlock:
+                       // Goroutine blocked. It's waiting now and not running on this M.
+                       state.status = go122.GoWaiting
+                       newCtx.G = NoGoroutine
+               case go122.EvGoSyscallBegin:
+                       // Goroutine entered a syscall. It's still running on this P and M.
+                       state.status = go122.GoSyscall
+                       pState, ok := o.pStates[curCtx.P]
+                       if !ok {
+                               return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+                       }
+                       pState.status = go122.ProcSyscall
+               }
+               return curCtx, true, nil
+       case go122.EvGoStart:
+               gid := GoID(ev.args[0])
+               seq := makeSeq(gen, ev.args[1])
+               state, ok := o.gStates[gid]
+               if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) {
+                       // We can't make an inference as to whether this is bad. We could just be seeing
+                       // a GoStart on a different M before the goroutine was created, before it had its
+                       // state emitted, or before we got to the right point in the trace yet.
+                       return curCtx, false, nil
+               }
+               // We can advance this goroutine. Check some invariants.
+               reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave}
+               if err := validateCtx(curCtx, reqs); err != nil {
+                       return curCtx, false, err
+               }
+               state.status = go122.GoRunning
+               state.seq = seq
+               newCtx.G = gid
+               return curCtx, true, nil
+       case go122.EvGoUnblock:
+               // N.B. These both reference the goroutine to unblock, not the current goroutine.
+               gid := GoID(ev.args[0])
+               seq := makeSeq(gen, ev.args[1])
+               state, ok := o.gStates[gid]
+               if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) {
+                       // We can't make an inference as to whether this is bad. We could just be seeing
+                       // a GoUnblock on a different M before the goroutine was created and blocked itself,
+                       // before it had its state emitted, or before we got to the right point in the trace yet.
+                       return curCtx, false, nil
+               }
+               state.status = go122.GoRunnable
+               state.seq = seq
+               // N.B. No context to validate. Basically anything can unblock
+               // a goroutine (e.g. sysmon).
+               return curCtx, true, nil
+       case go122.EvGoSyscallEnd:
+               // This event is always advance-able because it happens on the same
+               // thread that EvGoSyscallStart happened, and the goroutine can't leave
+               // that thread until its done.
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               state, ok := o.gStates[curCtx.G]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+               }
+               if state.status != go122.GoSyscall {
+                       return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+               }
+               state.status = go122.GoRunning
+
+               // Transfer the P back to running from syscall.
+               pState, ok := o.pStates[curCtx.P]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+               }
+               if pState.status != go122.ProcSyscall {
+                       return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
+               }
+               pState.status = go122.ProcRunning
+               return curCtx, true, nil
+       case go122.EvGoSyscallEndBlocked:
+               // This event becomes advanceable when its P is not in a syscall state
+               // (lack of a P altogether is also acceptable for advancing).
+               // The transfer out of ProcSyscall can happen either voluntarily via
+               // ProcStop or involuntarily via ProcSteal. We may also acquire a new P
+               // before we get here (after the transfer out) but that's OK: that new
+               // P won't be in the ProcSyscall state anymore.
+               //
+               // Basically: while we have a preemptible P, don't advance, because we
+               // *know* from the event that we're going to lose it at some point during
+               // the syscall. We shouldn't advance until that happens.
+               if curCtx.P != NoProc {
+                       pState, ok := o.pStates[curCtx.P]
+                       if !ok {
+                               return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+                       }
+                       if pState.status == go122.ProcSyscall {
+                               return curCtx, false, nil
+                       }
+               }
+               // As mentioned above, we may have a P here if we ProcStart
+               // before this event.
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
+                       return curCtx, false, err
+               }
+               state, ok := o.gStates[curCtx.G]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+               }
+               if state.status != go122.GoSyscall {
+                       return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+               }
+               newCtx.G = NoGoroutine
+               state.status = go122.GoRunnable
+               return curCtx, true, nil
+       case go122.EvGoCreateSyscall:
+               // This event indicates that a goroutine is effectively
+               // being created out of a cgo callback. Such a goroutine
+               // is 'created' in the syscall state.
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MustNotHave}); err != nil {
+                       return curCtx, false, err
+               }
+               // This goroutine is effectively being created. Add a state for it.
+               newgid := GoID(ev.args[0])
+               if _, ok := o.gStates[newgid]; ok {
+                       return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
+               }
+               o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)}
+               // Goroutine is executing. Bind it to the context.
+               newCtx.G = newgid
+               return curCtx, true, nil
+       case go122.EvGoDestroySyscall:
+               // This event indicates that a goroutine created for a
+               // cgo callback is disappearing, either because the callback
+               // ending or the C thread that called it is being destroyed.
+               //
+               // Note: we might have a P here. The P might not be released
+               // eagerly by the runtime, and it might get stolen back later
+               // (or never again, if the program is going to exit).
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
+                       return curCtx, false, err
+               }
+               // Check to make sure the goroutine exists in the right state.
+               state, ok := o.gStates[curCtx.G]
+               if !ok {
+                       return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+               }
+               if state.status != go122.GoSyscall {
+                       return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(typ), GoSyscall)
+               }
+               // This goroutine is exiting itself.
+               delete(o.gStates, curCtx.G)
+               newCtx.G = NoGoroutine
+               return curCtx, true, nil
+
+       // Handle tasks. Tasks are interesting because:
+       // - There's no Begin event required to reference a task.
+       // - End for a particular task ID can appear multiple times.
+       // As a result, there's very little to validate. The only
+       // thing we have to be sure of is that a task didn't begin
+       // after it had already begun. Task IDs are allowed to be
+       // reused, so we don't care about a Begin after an End.
+       case go122.EvUserTaskBegin:
+               id := TaskID(ev.args[0])
+               if _, ok := o.activeTasks[id]; ok {
+                       return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
+               }
+               // Get the parent ID, but don't validate it. There's no guarantee
+               // we actually have information on whether it's active.
+               parentID := TaskID(ev.args[1])
+
+               // Validate the name and record it. We'll need to pass it through to
+               // EvUserTaskEnd.
+               nameID := stringID(ev.args[2])
+               name, ok := evt.strings.get(nameID)
+               if !ok {
+                       return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+               }
+               o.activeTasks[id] = taskState{name: name, parentID: parentID}
+               return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
+       case go122.EvUserTaskEnd:
+               id := TaskID(ev.args[0])
+               if ts, ok := o.activeTasks[id]; ok {
+                       // Smuggle the task info. This may happen in a different generation,
+                       // which may not have the name in its string table. Add it to the extra
+                       // strings table so we can look it up later.
+                       ev.extra(version.Go122)[0] = uint64(ts.parentID)
+                       ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
+                       delete(o.activeTasks, id)
+               } else {
+                       // Explicitly clear the task info.
+                       ev.extra(version.Go122)[0] = uint64(NoTask)
+                       ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
+               }
+               return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
+
+       // Handle user regions.
+       case go122.EvUserRegionBegin:
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               tid := TaskID(ev.args[0])
+               nameID := stringID(ev.args[1])
+               name, ok := evt.strings.get(nameID)
+               if !ok {
+                       return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+               }
+               if err := o.gStates[curCtx.G].beginRegion(userRegion{tid, name}); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvUserRegionEnd:
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               tid := TaskID(ev.args[0])
+               nameID := stringID(ev.args[1])
+               name, ok := evt.strings.get(nameID)
+               if !ok {
+                       return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+               }
+               if err := o.gStates[curCtx.G].endRegion(userRegion{tid, name}); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+
+       // Handle the GC mark phase.
+       //
+       // We have sequence numbers for both start and end because they
+       // can happen on completely different threads. We want an explicit
+       // partial order edge between start and end here, otherwise we're
+       // relying entirely on timestamps to make sure we don't advance a
+       // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
+       case go122.EvGCActive:
+               seq := ev.args[0]
+               if gen == o.initialGen {
+                       if o.gcState != gcUndetermined {
+                               return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
+                       }
+                       o.gcSeq = seq
+                       o.gcState = gcRunning
+                       return curCtx, true, nil
+               }
+               if seq != o.gcSeq+1 {
+                       // This is not the right GC cycle.
+                       return curCtx, false, nil
+               }
+               if o.gcState != gcRunning {
+                       return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
+               }
+               o.gcSeq = seq
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvGCBegin:
+               seq := ev.args[0]
+               if o.gcState == gcUndetermined {
+                       o.gcSeq = seq
+                       o.gcState = gcRunning
+                       return curCtx, true, nil
+               }
+               if seq != o.gcSeq+1 {
+                       // This is not the right GC cycle.
+                       return curCtx, false, nil
+               }
+               if o.gcState == gcRunning {
+                       return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
+               }
+               o.gcSeq = seq
+               o.gcState = gcRunning
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvGCEnd:
+               seq := ev.args[0]
+               if seq != o.gcSeq+1 {
+                       // This is not the right GC cycle.
+                       return curCtx, false, nil
+               }
+               if o.gcState == gcNotRunning {
+                       return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
+               }
+               if o.gcState == gcUndetermined {
+                       return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
+               }
+               o.gcSeq = seq
+               o.gcState = gcNotRunning
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+
+       // Handle simple instantaneous events that require a G.
+       case go122.EvGoLabel, go122.EvProcsChange, go122.EvUserLog:
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+
+       // Handle allocation states, which don't require a G.
+       case go122.EvHeapAlloc, go122.EvHeapGoal:
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+
+       // Handle sweep, which is bound to a P and doesn't require a G.
+       case go122.EvGCSweepBegin:
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
+                       return curCtx, false, err
+               }
+               if err := o.pStates[curCtx.P].beginRange(makeRangeType(typ, 0)); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvGCSweepActive:
+               pid := ProcID(ev.args[0])
+               // N.B. In practice Ps can't block while they're sweeping, so this can only
+               // ever reference curCtx.P. However, be lenient about this like we are with
+               // GCMarkAssistActive; there's no reason the runtime couldn't change to block
+               // in the middle of a sweep.
+               if err := o.pStates[pid].activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvGCSweepEnd:
+               if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
+                       return curCtx, false, err
+               }
+               _, err := o.pStates[curCtx.P].endRange(typ)
+               if err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+
+       // Handle special goroutine-bound event ranges.
+       case go122.EvSTWBegin, go122.EvGCMarkAssistBegin:
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               desc := stringID(0)
+               if typ == go122.EvSTWBegin {
+                       desc = stringID(ev.args[0])
+               }
+               if err := o.gStates[curCtx.G].beginRange(makeRangeType(typ, desc)); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvGCMarkAssistActive:
+               gid := GoID(ev.args[0])
+               // N.B. Like GoStatus, this can happen at any time, because it can
+               // reference a non-running goroutine. Don't check anything about the
+               // current scheduler context.
+               if err := o.gStates[gid].activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
+                       return curCtx, false, err
+               }
+               return curCtx, true, nil
+       case go122.EvSTWEnd, go122.EvGCMarkAssistEnd:
+               if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+                       return curCtx, false, err
+               }
+               desc, err := o.gStates[curCtx.G].endRange(typ)
+               if err != nil {
+                       return curCtx, false, err
+               }
+               if typ == go122.EvSTWEnd {
+                       // Smuggle the kind into the event.
+                       // Don't use ev.extra here so we have symmetry with STWBegin.
+                       ev.args[0] = uint64(desc)
+               }
+               return curCtx, true, nil
+       }
+       return curCtx, false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
+}
+
+// schedCtx represents the scheduling resources associated with an event.
+type schedCtx struct {
+       G GoID
+       P ProcID
+       M ThreadID
+}
+
+// validateCtx ensures that ctx conforms to some reqs, returning an error if
+// it doesn't.
+func validateCtx(ctx schedCtx, reqs event.SchedReqs) error {
+       // Check thread requirements.
+       if reqs.Thread == event.MustHave && ctx.M == NoThread {
+               return fmt.Errorf("expected a thread but didn't have one")
+       } else if reqs.Thread == event.MustNotHave && ctx.M != NoThread {
+               return fmt.Errorf("expected no thread but had one")
+       }
+
+       // Check proc requirements.
+       if reqs.Proc == event.MustHave && ctx.P == NoProc {
+               return fmt.Errorf("expected a proc but didn't have one")
+       } else if reqs.Proc == event.MustNotHave && ctx.P != NoProc {
+               return fmt.Errorf("expected no proc but had one")
+       }
+
+       // Check goroutine requirements.
+       if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine {
+               return fmt.Errorf("expected a goroutine but didn't have one")
+       } else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine {
+               return fmt.Errorf("expected no goroutine but had one")
+       }
+       return nil
+}
+
+// gcState is a trinary variable for the current state of the GC.
+//
+// The third state besides "enabled" and "disabled" is "undetermined."
+type gcState uint8
+
+const (
+       gcUndetermined gcState = iota
+       gcNotRunning
+       gcRunning
+)
+
+// String returns a human-readable string for the GC state.
+func (s gcState) String() string {
+       switch s {
+       case gcUndetermined:
+               return "Undetermined"
+       case gcNotRunning:
+               return "NotRunning"
+       case gcRunning:
+               return "Running"
+       }
+       return "Bad"
+}
+
+// userRegion represents a unique user region when attached to some gState.
+type userRegion struct {
+       // name must be a resolved string because the string ID for the same
+       // string may change across generations, but we care about checking
+       // the value itself.
+       taskID TaskID
+       name   string
+}
+
+// rangeType is a way to classify special ranges of time.
+//
+// These typically correspond 1:1 with "Begin" events, but
+// they may have an optional subtype that describes the range
+// in more detail.
+type rangeType struct {
+       typ  event.Type // "Begin" event.
+       desc stringID   // Optional subtype.
+}
+
+// makeRangeType constructs a new rangeType.
+func makeRangeType(typ event.Type, desc stringID) rangeType {
+       if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone {
+               typ = styp
+       }
+       return rangeType{typ, desc}
+}
+
+// gState is the state of a goroutine at a point in the trace.
+type gState struct {
+       id     GoID
+       status go122.GoStatus
+       seq    seqCounter
+
+       // regions are the active user regions for this goroutine.
+       regions []userRegion
+
+       // rangeState is the state of special time ranges bound to this goroutine.
+       rangeState
+}
+
+// beginRegion starts a user region on the goroutine.
+func (s *gState) beginRegion(r userRegion) error {
+       s.regions = append(s.regions, r)
+       return nil
+}
+
+// endRegion ends a user region on the goroutine.
+func (s *gState) endRegion(r userRegion) error {
+       if next := s.regions[len(s.regions)-1]; next != r {
+               return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
+       }
+       s.regions = s.regions[:len(s.regions)-1]
+       return nil
+}
+
+// pState is the state of a proc at a point in the trace.
+type pState struct {
+       id     ProcID
+       status go122.ProcStatus
+       seq    seqCounter
+
+       // rangeState is the state of special time ranges bound to this proc.
+       rangeState
+}
+
+// mState is the state of a thread at a point in the trace.
+type mState struct {
+       g GoID   // Goroutine bound to this M. (The goroutine's state is Executing.)
+       p ProcID // Proc bound to this M. (The proc's state is Executing.)
+}
+
+// rangeState represents the state of special time ranges.
+type rangeState struct {
+       // inFlight contains the rangeTypes of any ranges bound to a resource.
+       inFlight []rangeType
+}
+
+// beginRange begins a special range in time on the goroutine.
+//
+// Returns an error if the range is already in progress.
+func (s *rangeState) beginRange(typ rangeType) error {
+       if s.hasRange(typ) {
+               return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name)
+       }
+       s.inFlight = append(s.inFlight, typ)
+       return nil
+}
+
+// activeRange marks special range in time on the goroutine as active in the
+// initial generation, or confirms that it is indeed active in later generations.
+func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
+       if isInitialGen {
+               if s.hasRange(typ) {
+                       return fmt.Errorf("found named active range already in first gen: %v", typ)
+               }
+               s.inFlight = append(s.inFlight, typ)
+       } else if !s.hasRange(typ) {
+               return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight)
+       }
+       return nil
+}
+
+// hasRange returns true if a special time range on the goroutine as in progress.
+func (s *rangeState) hasRange(typ rangeType) bool {
+       for _, ftyp := range s.inFlight {
+               if ftyp == typ {
+                       return true
+               }
+       }
+       return false
+}
+
+// endsRange ends a special range in time on the goroutine.
+//
+// This must line up with the start event type  of the range the goroutine is currently in.
+func (s *rangeState) endRange(typ event.Type) (stringID, error) {
+       st := go122.Specs()[typ].StartEv
+       idx := -1
+       for i, r := range s.inFlight {
+               if r.typ == st {
+                       idx = i
+                       break
+               }
+       }
+       if idx < 0 {
+               return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name)
+       }
+       // Swap remove.
+       desc := s.inFlight[idx].desc
+       s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
+       s.inFlight = s.inFlight[:len(s.inFlight)-1]
+       return desc, nil
+}
+
+// seqCounter represents a global sequence counter for a resource.
+type seqCounter struct {
+       gen uint64 // The generation for the local sequence counter seq.
+       seq uint64 // The sequence number local to the generation.
+}
+
+// makeSeq creates a new seqCounter.
+func makeSeq(gen, seq uint64) seqCounter {
+       return seqCounter{gen: gen, seq: seq}
+}
+
+// succeeds returns true if a is the immediate successor of b.
+func (a seqCounter) succeeds(b seqCounter) bool {
+       return a.gen == b.gen && a.seq == b.seq+1
+}
+
+// String returns a debug string representation of the seqCounter.
+func (c seqCounter) String() string {
+       return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
+}
+
+func dumpOrdering(order *ordering) string {
+       var sb strings.Builder
+       for id, state := range order.gStates {
+               fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
+       }
+       fmt.Fprintln(&sb)
+       for id, state := range order.pStates {
+               fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
+       }
+       fmt.Fprintln(&sb)
+       for id, state := range order.mStates {
+               fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
+       }
+       fmt.Fprintln(&sb)
+       fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
+       return sb.String()
+}
+
+// taskState represents an active task.
+type taskState struct {
+       // name is the type of the active task.
+       name string
+
+       // parentID is the parent ID of the active task.
+       parentID TaskID
+}
diff --git a/src/internal/trace/v2/raw/doc.go b/src/internal/trace/v2/raw/doc.go
new file mode 100644 (file)
index 0000000..5348737
--- /dev/null
@@ -0,0 +1,66 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package raw provides an interface to interpret and emit Go execution traces.
+It can interpret and emit execution traces in its wire format as well as a
+bespoke but simple text format.
+
+The readers and writers in this package perform no validation on or ordering of
+the input, and so are generally unsuitable for analysis. However, they're very
+useful for testing and debugging the tracer in the runtime and more sophisticated
+trace parsers.
+
+# Text format specification
+
+The trace text format produced and consumed by this package is a line-oriented
+format.
+
+The first line in each text trace is the header line.
+
+       Trace Go1.XX
+
+Following that is a series of event lines. Each event begins with an
+event name, followed by zero or more named unsigned integer arguments.
+Names are separated from their integer values by an '=' sign. Names can
+consist of any UTF-8 character except '='.
+
+For example:
+
+       EventName arg1=23 arg2=55 arg3=53
+
+Any amount of whitespace is allowed to separate each token. Whitespace
+is identified via unicode.IsSpace.
+
+Some events have additional data on following lines. There are two such
+special cases.
+
+The first special case consists of events with trailing byte-oriented data.
+The trailer begins on the following line from the event. That line consists
+of a single argument 'data' and a Go-quoted string representing the byte data
+within. Note: an explicit argument for the length is elided, because it's
+just the length of the unquoted string.
+
+For example:
+
+       String id=5
+               data="hello world\x00"
+
+These events are identified in their spec by the HasData flag.
+
+The second special case consists of stack events. These events are identified
+by the IsStack flag. These events also have a trailing unsigned integer argument
+describing the number of stack frame descriptors that follow. Each stack frame
+descriptor is on its own line following the event, consisting of four signed
+integer arguments: the PC, an integer describing the function name, an integer
+describing the file name, and the line number in that file that function was at
+at the time the stack trace was taken.
+
+For example:
+
+       Stack id=5 n=2
+               pc=1241251 func=3 file=6 line=124
+               pc=7534345 func=6 file=3 line=64
+*/
+package raw
diff --git a/src/internal/trace/v2/raw/event.go b/src/internal/trace/v2/raw/event.go
new file mode 100644 (file)
index 0000000..6f09f1f
--- /dev/null
@@ -0,0 +1,60 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package raw
+
+import (
+       "strconv"
+       "strings"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/version"
+)
+
+// Event is a simple representation of a trace event.
+//
+// Note that this typically includes much more than just
+// timestamped events, and it also represents parts of the
+// trace format's framing. (But not interpreted.)
+type Event struct {
+       Version version.Version
+       Ev      event.Type
+       Args    []uint64
+       Data    []byte
+}
+
+// String returns the canonical string representation of the event.
+//
+// This format is the same format that is parsed by the TextReader
+// and emitted by the TextWriter.
+func (e *Event) String() string {
+       spec := e.Version.Specs()[e.Ev]
+
+       var s strings.Builder
+       s.WriteString(spec.Name)
+       for i := range spec.Args {
+               s.WriteString(" ")
+               s.WriteString(spec.Args[i])
+               s.WriteString("=")
+               s.WriteString(strconv.FormatUint(e.Args[i], 10))
+       }
+       if spec.IsStack {
+               frames := e.Args[len(spec.Args):]
+               for i := 0; i < len(frames); i++ {
+                       if i%4 == 0 {
+                               s.WriteString("\n\t")
+                       } else {
+                               s.WriteString(" ")
+                       }
+                       s.WriteString(frameFields[i%4])
+                       s.WriteString("=")
+                       s.WriteString(strconv.FormatUint(frames[i], 10))
+               }
+       }
+       if e.Data != nil {
+               s.WriteString("\n\tdata=")
+               s.WriteString(strconv.Quote(string(e.Data)))
+       }
+       return s.String()
+}
diff --git a/src/internal/trace/v2/raw/reader.go b/src/internal/trace/v2/raw/reader.go
new file mode 100644 (file)
index 0000000..fdcd47f
--- /dev/null
@@ -0,0 +1,110 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package raw
+
+import (
+       "bufio"
+       "encoding/binary"
+       "fmt"
+       "io"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/version"
+)
+
+// Reader parses trace bytes with only very basic validation
+// into an event stream.
+type Reader struct {
+       r     *bufio.Reader
+       v     version.Version
+       specs []event.Spec
+}
+
+// NewReader creates a new reader for the trace wire format.
+func NewReader(r io.Reader) (*Reader, error) {
+       br := bufio.NewReader(r)
+       v, err := version.ReadHeader(br)
+       if err != nil {
+               return nil, err
+       }
+       return &Reader{r: br, v: v, specs: v.Specs()}, nil
+}
+
+// Version returns the version of the trace that we're reading.
+func (r *Reader) Version() version.Version {
+       return r.v
+}
+
+// ReadEvent reads and returns the next trace event in the byte stream.
+func (r *Reader) ReadEvent() (Event, error) {
+       evb, err := r.r.ReadByte()
+       if err == io.EOF {
+               return Event{}, io.EOF
+       }
+       if err != nil {
+               return Event{}, err
+       }
+       if int(evb) >= len(r.specs) || evb == 0 {
+               return Event{}, fmt.Errorf("invalid event type: %d", evb)
+       }
+       ev := event.Type(evb)
+       spec := r.specs[ev]
+       args, err := r.readArgs(len(spec.Args))
+       if err != nil {
+               return Event{}, err
+       }
+       if spec.IsStack {
+               len := int(args[1])
+               for i := 0; i < len; i++ {
+                       // Each stack frame has four args: pc, func ID, file ID, line number.
+                       frame, err := r.readArgs(4)
+                       if err != nil {
+                               return Event{}, err
+                       }
+                       args = append(args, frame...)
+               }
+       }
+       var data []byte
+       if spec.HasData {
+               data, err = r.readData()
+               if err != nil {
+                       return Event{}, err
+               }
+       }
+       return Event{
+               Version: r.v,
+               Ev:      ev,
+               Args:    args,
+               Data:    data,
+       }, nil
+}
+
+func (r *Reader) readArgs(n int) ([]uint64, error) {
+       var args []uint64
+       for i := 0; i < n; i++ {
+               val, err := binary.ReadUvarint(r.r)
+               if err != nil {
+                       return nil, err
+               }
+               args = append(args, val)
+       }
+       return args, nil
+}
+
+func (r *Reader) readData() ([]byte, error) {
+       len, err := binary.ReadUvarint(r.r)
+       if err != nil {
+               return nil, err
+       }
+       var data []byte
+       for i := 0; i < int(len); i++ {
+               b, err := r.r.ReadByte()
+               if err != nil {
+                       return nil, err
+               }
+               data = append(data, b)
+       }
+       return data, nil
+}
diff --git a/src/internal/trace/v2/raw/textreader.go b/src/internal/trace/v2/raw/textreader.go
new file mode 100644 (file)
index 0000000..07785f3
--- /dev/null
@@ -0,0 +1,217 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package raw
+
+import (
+       "bufio"
+       "fmt"
+       "io"
+       "strconv"
+       "strings"
+       "unicode"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/version"
+)
+
+// TextReader parses a text format trace with only very basic validation
+// into an event stream.
+type TextReader struct {
+       v     version.Version
+       specs []event.Spec
+       names map[string]event.Type
+       s     *bufio.Scanner
+}
+
+// NewTextReader creates a new reader for the trace text format.
+func NewTextReader(r io.Reader) (*TextReader, error) {
+       tr := &TextReader{s: bufio.NewScanner(r)}
+       line, err := tr.nextLine()
+       if err != nil {
+               return nil, err
+       }
+       trace, line := readToken(line)
+       if trace != "Trace" {
+               return nil, fmt.Errorf("failed to parse header")
+       }
+       gover, line := readToken(line)
+       if !strings.HasPrefix(gover, "Go1.") {
+               return nil, fmt.Errorf("failed to parse header Go version")
+       }
+       rawv, err := strconv.ParseUint(gover[len("Go1."):], 10, 64)
+       if err != nil {
+               return nil, fmt.Errorf("failed to parse header Go version: %v", err)
+       }
+       v := version.Version(rawv)
+       if !v.Valid() {
+               return nil, fmt.Errorf("unknown or unsupported Go version 1.%d", v)
+       }
+       tr.v = v
+       tr.specs = v.Specs()
+       tr.names = event.Names(tr.specs)
+       for _, r := range line {
+               if !unicode.IsSpace(r) {
+                       return nil, fmt.Errorf("encountered unexpected non-space at the end of the header: %q", line)
+               }
+       }
+       return tr, nil
+}
+
+// Version returns the version of the trace that we're reading.
+func (r *TextReader) Version() version.Version {
+       return r.v
+}
+
+// ReadEvent reads and returns the next trace event in the text stream.
+func (r *TextReader) ReadEvent() (Event, error) {
+       line, err := r.nextLine()
+       if err != nil {
+               return Event{}, err
+       }
+       evStr, line := readToken(line)
+       ev, ok := r.names[evStr]
+       if !ok {
+               return Event{}, fmt.Errorf("unidentified event: %s", evStr)
+       }
+       spec := r.specs[ev]
+       args, err := readArgs(line, spec.Args)
+       if err != nil {
+               return Event{}, fmt.Errorf("reading args for %s: %v", evStr, err)
+       }
+       if spec.IsStack {
+               len := int(args[1])
+               for i := 0; i < len; i++ {
+                       line, err := r.nextLine()
+                       if err == io.EOF {
+                               return Event{}, fmt.Errorf("unexpected EOF while reading stack: args=%v", args)
+                       }
+                       if err != nil {
+                               return Event{}, err
+                       }
+                       frame, err := readArgs(line, frameFields)
+                       if err != nil {
+                               return Event{}, err
+                       }
+                       args = append(args, frame...)
+               }
+       }
+       var data []byte
+       if spec.HasData {
+               line, err := r.nextLine()
+               if err == io.EOF {
+                       return Event{}, fmt.Errorf("unexpected EOF while reading data for %s: args=%v", evStr, args)
+               }
+               if err != nil {
+                       return Event{}, err
+               }
+               data, err = readData(line)
+               if err != nil {
+                       return Event{}, err
+               }
+       }
+       return Event{
+               Version: r.v,
+               Ev:      ev,
+               Args:    args,
+               Data:    data,
+       }, nil
+}
+
+func (r *TextReader) nextLine() (string, error) {
+       for {
+               if !r.s.Scan() {
+                       if err := r.s.Err(); err != nil {
+                               return "", err
+                       }
+                       return "", io.EOF
+               }
+               txt := r.s.Text()
+               tok, _ := readToken(txt)
+               if tok == "" {
+                       continue // Empty line or comment.
+               }
+               return txt, nil
+       }
+}
+
+var frameFields = []string{"pc", "func", "file", "line"}
+
+func readArgs(s string, names []string) ([]uint64, error) {
+       var args []uint64
+       for _, name := range names {
+               arg, value, rest, err := readArg(s)
+               if err != nil {
+                       return nil, err
+               }
+               if arg != name {
+                       return nil, fmt.Errorf("expected argument %q, but got %q", name, arg)
+               }
+               args = append(args, value)
+               s = rest
+       }
+       for _, r := range s {
+               if !unicode.IsSpace(r) {
+                       return nil, fmt.Errorf("encountered unexpected non-space at the end of an event: %q", s)
+               }
+       }
+       return args, nil
+}
+
+func readArg(s string) (arg string, value uint64, rest string, err error) {
+       var tok string
+       tok, rest = readToken(s)
+       if len(tok) == 0 {
+               return "", 0, s, fmt.Errorf("no argument")
+       }
+       parts := strings.SplitN(tok, "=", 2)
+       if len(parts) < 2 {
+               return "", 0, s, fmt.Errorf("malformed argument: %q", tok)
+       }
+       arg = parts[0]
+       value, err = strconv.ParseUint(parts[1], 10, 64)
+       if err != nil {
+               return arg, value, s, fmt.Errorf("failed to parse argument value %q for arg %q", parts[1], parts[0])
+       }
+       return
+}
+
+func readToken(s string) (token, rest string) {
+       tkStart := -1
+       for i, r := range s {
+               if r == '#' {
+                       return "", ""
+               }
+               if !unicode.IsSpace(r) {
+                       tkStart = i
+                       break
+               }
+       }
+       if tkStart < 0 {
+               return "", ""
+       }
+       tkEnd := -1
+       for i, r := range s[tkStart:] {
+               if unicode.IsSpace(r) || r == '#' {
+                       tkEnd = i + tkStart
+                       break
+               }
+       }
+       if tkEnd < 0 {
+               return s[tkStart:], ""
+       }
+       return s[tkStart:tkEnd], s[tkEnd:]
+}
+
+func readData(line string) ([]byte, error) {
+       parts := strings.SplitN(line, "=", 2)
+       if len(parts) < 2 || strings.TrimSpace(parts[0]) != "data" {
+               return nil, fmt.Errorf("malformed data: %q", line)
+       }
+       data, err := strconv.Unquote(strings.TrimSpace(parts[1]))
+       if err != nil {
+               return nil, fmt.Errorf("failed to parse data: %q: %v", line, err)
+       }
+       return []byte(data), nil
+}
diff --git a/src/internal/trace/v2/raw/textwriter.go b/src/internal/trace/v2/raw/textwriter.go
new file mode 100644 (file)
index 0000000..367a80b
--- /dev/null
@@ -0,0 +1,39 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package raw
+
+import (
+       "fmt"
+       "io"
+
+       "internal/trace/v2/version"
+)
+
+// TextWriter emits the text format of a trace.
+type TextWriter struct {
+       w io.Writer
+       v version.Version
+}
+
+// NewTextWriter creates a new write for the trace text format.
+func NewTextWriter(w io.Writer, v version.Version) (*TextWriter, error) {
+       _, err := fmt.Fprintf(w, "Trace Go1.%d\n", v)
+       if err != nil {
+               return nil, err
+       }
+       return &TextWriter{w: w, v: v}, nil
+}
+
+// WriteEvent writes a single event to the stream.
+func (w *TextWriter) WriteEvent(e Event) error {
+       // Check version.
+       if e.Version != w.v {
+               return fmt.Errorf("mismatched version between writer (go 1.%d) and event (go 1.%d)", w.v, e.Version)
+       }
+
+       // Write event.
+       _, err := fmt.Fprintln(w.w, e.String())
+       return err
+}
diff --git a/src/internal/trace/v2/raw/writer.go b/src/internal/trace/v2/raw/writer.go
new file mode 100644 (file)
index 0000000..80596eb
--- /dev/null
@@ -0,0 +1,75 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package raw
+
+import (
+       "encoding/binary"
+       "fmt"
+       "io"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/version"
+)
+
+// Writer emits the wire format of a trace.
+//
+// It may not produce a byte-for-byte compatible trace from what is
+// produced by the runtime, because it may be missing extra padding
+// in the LEB128 encoding that the runtime adds but isn't necessary
+// when you know the data up-front.
+type Writer struct {
+       w     io.Writer
+       buf   []byte
+       v     version.Version
+       specs []event.Spec
+}
+
+// NewWriter creates a new byte format writer.
+func NewWriter(w io.Writer, v version.Version) (*Writer, error) {
+       _, err := version.WriteHeader(w, v)
+       return &Writer{w: w, v: v, specs: v.Specs()}, err
+}
+
+// WriteEvent writes a single event to the trace wire format stream.
+func (w *Writer) WriteEvent(e Event) error {
+       // Check version.
+       if e.Version != w.v {
+               return fmt.Errorf("mismatched version between writer (go 1.%d) and event (go 1.%d)", w.v, e.Version)
+       }
+
+       // Write event header byte.
+       w.buf = append(w.buf, uint8(e.Ev))
+
+       // Write out all arguments.
+       spec := w.specs[e.Ev]
+       for _, arg := range e.Args[:len(spec.Args)] {
+               w.buf = binary.AppendUvarint(w.buf, arg)
+       }
+       if spec.IsStack {
+               frameArgs := e.Args[len(spec.Args):]
+               for i := 0; i < len(frameArgs); i++ {
+                       w.buf = binary.AppendUvarint(w.buf, frameArgs[i])
+               }
+       }
+
+       // Write out the length of the data.
+       if spec.HasData {
+               w.buf = binary.AppendUvarint(w.buf, uint64(len(e.Data)))
+       }
+
+       // Write out varint events.
+       _, err := w.w.Write(w.buf)
+       w.buf = w.buf[:0]
+       if err != nil {
+               return err
+       }
+
+       // Write out data.
+       if spec.HasData {
+               _, err := w.w.Write(e.Data)
+               return err
+       }
+       return nil
+}
diff --git a/src/internal/trace/v2/reader.go b/src/internal/trace/v2/reader.go
new file mode 100644 (file)
index 0000000..b58cc6f
--- /dev/null
@@ -0,0 +1,190 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+       "bufio"
+       "fmt"
+       "io"
+       "slices"
+       "strings"
+
+       "internal/trace/v2/event/go122"
+       "internal/trace/v2/version"
+)
+
+// Reader reads a byte stream, validates it, and produces trace events.
+type Reader struct {
+       r           *bufio.Reader
+       lastTs      Time
+       gen         *generation
+       spill       *spilledBatch
+       frontier    []*batchCursor
+       cpuSamples  []cpuSample
+       order       ordering
+       emittedSync bool
+}
+
+// NewReader creates a new trace reader.
+func NewReader(r io.Reader) (*Reader, error) {
+       br := bufio.NewReader(r)
+       v, err := version.ReadHeader(br)
+       if err != nil {
+               return nil, err
+       }
+       if v != version.Go122 {
+               return nil, fmt.Errorf("unknown or unsupported version go 1.%d", v)
+       }
+       return &Reader{
+               r: br,
+               order: ordering{
+                       mStates:     make(map[ThreadID]*mState),
+                       pStates:     make(map[ProcID]*pState),
+                       gStates:     make(map[GoID]*gState),
+                       activeTasks: make(map[TaskID]taskState),
+               },
+               // Don't emit a sync event when we first go to emit events.
+               emittedSync: true,
+       }, nil
+}
+
+// ReadEvent reads a single event from the stream.
+//
+// If the stream has been exhausted, it returns an invalid
+// event and io.EOF.
+func (r *Reader) ReadEvent() (e Event, err error) {
+       // Go 1.22+ trace parsing algorithm.
+       //
+       // (1) Read in all the batches for the next generation from the stream.
+       //   (a) Use the size field in the header to quickly find all batches.
+       // (2) Parse out the strings, stacks, CPU samples, and timestamp conversion data.
+       // (3) Group each event batch by M, sorted by timestamp. (batchCursor contains the groups.)
+       // (4) Organize batchCursors in a min-heap, ordered by the timestamp of the next event for each M.
+       // (5) Try to advance the next event for the M at the top of the min-heap.
+       //   (a) On success, select that M.
+       //   (b) On failure, sort the min-heap and try to advance other Ms. Select the first M that advances.
+       //   (c) If there's nothing left to advance, goto (1).
+       // (6) Select the latest event for the selected M and get it ready to be returned.
+       // (7) Read the next event for the selected M and update the min-heap.
+       // (8) Return the selected event, goto (5) on the next call.
+
+       // Set us up to track the last timestamp and fix up
+       // the timestamp of any event that comes through.
+       defer func() {
+               if err != nil {
+                       return
+               }
+               if err = e.validateTableIDs(); err != nil {
+                       return
+               }
+               if e.base.time <= r.lastTs {
+                       e.base.time = r.lastTs + 1
+               }
+               r.lastTs = e.base.time
+       }()
+
+       // Check if we need to refresh the generation.
+       if len(r.frontier) == 0 && len(r.cpuSamples) == 0 {
+               if !r.emittedSync {
+                       r.emittedSync = true
+                       return syncEvent(r.gen.evTable, r.lastTs), nil
+               }
+               if r.gen != nil && r.spill == nil {
+                       // If we have a generation from the last read,
+                       // and there's nothing left in the frontier, and
+                       // there's no spilled batch, indicating that there's
+                       // no further generation, it means we're done.
+                       // Return io.EOF.
+                       return Event{}, io.EOF
+               }
+               // Read the next generation.
+               r.gen, r.spill, err = readGeneration(r.r, r.spill)
+               if err != nil {
+                       return Event{}, err
+               }
+
+               // Reset CPU samples cursor.
+               r.cpuSamples = r.gen.cpuSamples
+
+               // Reset frontier.
+               for m, batches := range r.gen.batches {
+                       bc := &batchCursor{m: m}
+                       ok, err := bc.nextEvent(batches, r.gen.freq)
+                       if err != nil {
+                               return Event{}, err
+                       }
+                       if !ok {
+                               // Turns out there aren't actually any events in these batches.
+                               continue
+                       }
+                       r.frontier = heapInsert(r.frontier, bc)
+               }
+
+               // Reset emittedSync.
+               r.emittedSync = false
+       }
+       refresh := func(i int) error {
+               bc := r.frontier[i]
+
+               // Refresh the cursor's event.
+               ok, err := bc.nextEvent(r.gen.batches[bc.m], r.gen.freq)
+               if err != nil {
+                       return err
+               }
+               if ok {
+                       // If we successfully refreshed, update the heap.
+                       heapUpdate(r.frontier, i)
+               } else {
+                       // There's nothing else to read. Delete this cursor from the frontier.
+                       r.frontier = heapRemove(r.frontier, i)
+               }
+               return nil
+       }
+       // Inject a CPU sample if it comes next.
+       if len(r.cpuSamples) != 0 {
+               if len(r.frontier) == 0 || r.cpuSamples[0].time < r.frontier[0].ev.time {
+                       e := r.cpuSamples[0].asEvent(r.gen.evTable)
+                       r.cpuSamples = r.cpuSamples[1:]
+                       return e, nil
+               }
+       }
+       // Try to advance the head of the frontier, which should have the minimum timestamp.
+       // This should be by far the most common case
+       bc := r.frontier[0]
+       if ctx, ok, err := r.order.advance(&bc.ev, r.gen.evTable, bc.m, r.gen.gen); err != nil {
+               return Event{}, err
+       } else if ok {
+               e := Event{table: r.gen.evTable, ctx: ctx, base: bc.ev}
+               return e, refresh(0)
+       }
+       // Sort the min-heap. A sorted min-heap is still a min-heap,
+       // but now we can iterate over the rest and try to advance in
+       // order. This path should be rare.
+       slices.SortFunc(r.frontier, (*batchCursor).compare)
+       // Try to advance the rest of the frontier, in timestamp order.
+       for i := 1; i < len(r.frontier); i++ {
+               bc := r.frontier[i]
+               if ctx, ok, err := r.order.advance(&bc.ev, r.gen.evTable, bc.m, r.gen.gen); err != nil {
+                       return Event{}, err
+               } else if ok {
+                       e := Event{table: r.gen.evTable, ctx: ctx, base: bc.ev}
+                       return e, refresh(i)
+               }
+       }
+       return Event{}, fmt.Errorf("broken trace: failed to advance: frontier:\n[gen=%d]\n\n%s\n%s\n", r.gen.gen, dumpFrontier(r.frontier), dumpOrdering(&r.order))
+}
+
+func dumpFrontier(frontier []*batchCursor) string {
+       var sb strings.Builder
+       for _, bc := range frontier {
+               spec := go122.Specs()[bc.ev.typ]
+               fmt.Fprintf(&sb, "M %d [%s time=%d", bc.m, spec.Name, bc.ev.time)
+               for i, arg := range spec.Args[1:] {
+                       fmt.Fprintf(&sb, " %s=%d", arg, bc.ev.args[i])
+               }
+               fmt.Fprintf(&sb, "]\n")
+       }
+       return sb.String()
+}
diff --git a/src/internal/trace/v2/reader_test.go b/src/internal/trace/v2/reader_test.go
new file mode 100644 (file)
index 0000000..4f00002
--- /dev/null
@@ -0,0 +1,125 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+       "bytes"
+       "flag"
+       "fmt"
+       "io"
+       "os"
+       "path/filepath"
+       "strings"
+       "testing"
+
+       "internal/trace/v2"
+       "internal/trace/v2/raw"
+       "internal/trace/v2/testtrace"
+       "internal/trace/v2/version"
+)
+
+var (
+       logEvents  = flag.Bool("log-events", false, "whether to log high-level events; significantly slows down tests")
+       dumpTraces = flag.Bool("dump-traces", false, "dump traces even on success")
+)
+
+func TestReaderGolden(t *testing.T) {
+       matches, err := filepath.Glob("./testdata/tests/*.test")
+       if err != nil {
+               t.Fatalf("failed to glob for tests: %v", err)
+       }
+       for _, testPath := range matches {
+               testPath := testPath
+               testName, err := filepath.Rel("./testdata", testPath)
+               if err != nil {
+                       t.Fatalf("failed to relativize testdata path: %v", err)
+               }
+               t.Run(testName, func(t *testing.T) {
+                       tr, exp, err := testtrace.ParseFile(testPath)
+                       if err != nil {
+                               t.Fatalf("failed to parse test file at %s: %v", testPath, err)
+                       }
+                       testReader(t, tr, exp)
+               })
+       }
+}
+
+func testReader(t *testing.T, tr io.Reader, exp *testtrace.Expectation) {
+       r, err := trace.NewReader(tr)
+       if err != nil {
+               if err := exp.Check(err); err != nil {
+                       t.Error(err)
+               }
+               return
+       }
+       v := testtrace.NewValidator()
+       for {
+               ev, err := r.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       if err := exp.Check(err); err != nil {
+                               t.Error(err)
+                       }
+                       return
+               }
+               if *logEvents {
+                       t.Log(ev.String())
+               }
+               if err := v.Event(ev); err != nil {
+                       t.Error(err)
+               }
+       }
+       if err := exp.Check(nil); err != nil {
+               t.Error(err)
+       }
+}
+
+func dumpTraceToText(t *testing.T, b []byte) string {
+       t.Helper()
+
+       br, err := raw.NewReader(bytes.NewReader(b))
+       if err != nil {
+               t.Fatalf("dumping trace: %v", err)
+       }
+       var sb strings.Builder
+       tw, err := raw.NewTextWriter(&sb, version.Go122)
+       if err != nil {
+               t.Fatalf("dumping trace: %v", err)
+       }
+       for {
+               ev, err := br.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       t.Fatalf("dumping trace: %v", err)
+               }
+               if err := tw.WriteEvent(ev); err != nil {
+                       t.Fatalf("dumping trace: %v", err)
+               }
+       }
+       return sb.String()
+}
+
+func dumpTraceToFile(t *testing.T, testName string, stress bool, b []byte) string {
+       t.Helper()
+
+       desc := "default"
+       if stress {
+               desc = "stress"
+       }
+       name := fmt.Sprintf("%s.%s.trace.", testName, desc)
+       f, err := os.CreateTemp("", name)
+       if err != nil {
+               t.Fatalf("creating temp file: %v", err)
+       }
+       defer f.Close()
+       if _, err := io.Copy(f, bytes.NewReader(b)); err != nil {
+               t.Fatalf("writing trace dump to %q: %v", f.Name(), err)
+       }
+       return f.Name()
+}
diff --git a/src/internal/trace/v2/resources.go b/src/internal/trace/v2/resources.go
new file mode 100644 (file)
index 0000000..f49696f
--- /dev/null
@@ -0,0 +1,274 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "fmt"
+
+// ThreadID is the runtime-internal M structure's ID. This is unique
+// for each OS thread.
+type ThreadID int64
+
+// NoThread indicates that the relevant events don't correspond to any
+// thread in particular.
+const NoThread = ThreadID(-1)
+
+// ProcID is the runtime-internal G structure's id field. This is unique
+// for each P.
+type ProcID int64
+
+// NoProc indicates that the relevant events don't correspond to any
+// P in particular.
+const NoProc = ProcID(-1)
+
+// GoID is the runtime-internal G structure's goid field. This is unique
+// for each goroutine.
+type GoID int64
+
+// NoGoroutine indicates that the relevant events don't correspond to any
+// goroutine in particular.
+const NoGoroutine = GoID(-1)
+
+// GoState represents the state of a goroutine.
+//
+// New GoStates may be added in the future. Users of this type must be robust
+// to that possibility.
+type GoState uint8
+
+const (
+       GoUndetermined GoState = iota // No information is known about the goroutine.
+       GoNotExist                    // Goroutine does not exist.
+       GoRunnable                    // Goroutine is runnable but not running.
+       GoRunning                     // Goroutine is running.
+       GoWaiting                     // Goroutine is waiting on something to happen.
+       GoSyscall                     // Goroutine is in a system call.
+)
+
+// Executing returns true if the state indicates that the goroutine is executing
+// and bound to its thread.
+func (s GoState) Executing() bool {
+       return s == GoRunning || s == GoSyscall
+}
+
+// String returns a human-readable representation of a GoState.
+//
+// The format of the returned string is for debugging purposes and is subject to change.
+func (s GoState) String() string {
+       switch s {
+       case GoUndetermined:
+               return "Undetermined"
+       case GoNotExist:
+               return "NotExist"
+       case GoRunnable:
+               return "Runnable"
+       case GoRunning:
+               return "Running"
+       case GoWaiting:
+               return "Waiting"
+       case GoSyscall:
+               return "Syscall"
+       }
+       return "Bad"
+}
+
+// ProcState represents the state of a proc.
+//
+// New ProcStates may be added in the future. Users of this type must be robust
+// to that possibility.
+type ProcState uint8
+
+const (
+       ProcUndetermined ProcState = iota // No information is known about the proc.
+       ProcNotExist                      // Proc does not exist.
+       ProcRunning                       // Proc is running.
+       ProcIdle                          // Proc is idle.
+)
+
+// Executing returns true if the state indicates that the proc is executing
+// and bound to its thread.
+func (s ProcState) Executing() bool {
+       return s == ProcRunning
+}
+
+// String returns a human-readable representation of a ProcState.
+//
+// The format of the returned string is for debugging purposes and is subject to change.
+func (s ProcState) String() string {
+       switch s {
+       case ProcUndetermined:
+               return "Undetermined"
+       case ProcNotExist:
+               return "NotExist"
+       case ProcRunning:
+               return "Running"
+       case ProcIdle:
+               return "Idle"
+       }
+       return "Bad"
+}
+
+// ResourceKind indicates a kind of resource that has a state machine.
+//
+// New ResourceKinds may be added in the future. Users of this type must be robust
+// to that possibility.
+type ResourceKind uint8
+
+const (
+       ResourceNone      ResourceKind = iota // No resource.
+       ResourceGoroutine                     // Goroutine.
+       ResourceProc                          // Proc.
+       ResourceThread                        // Thread.
+)
+
+// String returns a human-readable representation of a ResourceKind.
+//
+// The format of the returned string is for debugging purposes and is subject to change.
+func (r ResourceKind) String() string {
+       switch r {
+       case ResourceNone:
+               return "None"
+       case ResourceGoroutine:
+               return "Goroutine"
+       case ResourceProc:
+               return "Proc"
+       case ResourceThread:
+               return "Thread"
+       }
+       return "Bad"
+}
+
+// ResourceID represents a generic resource ID.
+type ResourceID struct {
+       // Kind is the kind of resource this ID is for.
+       Kind ResourceKind
+       id   int64
+}
+
+// MakeResourceID creates a general resource ID from a specific resource's ID.
+func MakeResourceID[T interface{ GoID | ProcID | ThreadID }](id T) ResourceID {
+       var rd ResourceID
+       var a any = id
+       switch a.(type) {
+       case GoID:
+               rd.Kind = ResourceGoroutine
+       case ProcID:
+               rd.Kind = ResourceProc
+       case ThreadID:
+               rd.Kind = ResourceThread
+       }
+       rd.id = int64(id)
+       return rd
+}
+
+// Goroutine obtains a GoID from the resource ID.
+//
+// r.Kind must be ResourceGoroutine or this function will panic.
+func (r ResourceID) Goroutine() GoID {
+       if r.Kind != ResourceGoroutine {
+               panic(fmt.Sprintf("attempted to get GoID from %s resource ID", r.Kind))
+       }
+       return GoID(r.id)
+}
+
+// Proc obtains a ProcID from the resource ID.
+//
+// r.Kind must be ResourceProc or this function will panic.
+func (r ResourceID) Proc() ProcID {
+       if r.Kind != ResourceProc {
+               panic(fmt.Sprintf("attempted to get ProcID from %s resource ID", r.Kind))
+       }
+       return ProcID(r.id)
+}
+
+// Thread obtains a ThreadID from the resource ID.
+//
+// r.Kind must be ResourceThread or this function will panic.
+func (r ResourceID) Thread() ThreadID {
+       if r.Kind != ResourceThread {
+               panic(fmt.Sprintf("attempted to get ThreadID from %s resource ID", r.Kind))
+       }
+       return ThreadID(r.id)
+}
+
+// String returns a human-readable string representation of the ResourceID.
+//
+// This representation is subject to change and is intended primarily for debugging.
+func (r ResourceID) String() string {
+       if r.Kind == ResourceNone {
+               return r.Kind.String()
+       }
+       return fmt.Sprintf("%s(%d)", r.Kind, r.id)
+}
+
+// StateTransition provides details about a StateTransition event.
+type StateTransition struct {
+       // Resource is the resource this state transition is for.
+       Resource ResourceID
+
+       // Reason is a human-readable reason for the state transition.
+       Reason string
+
+       // Stack is the stack trace of the resource making the state transition.
+       //
+       // This is distinct from the result (Event).Stack because it pertains to
+       // the transitioning resource, not any of the ones executing the event
+       // this StateTransition came from.
+       //
+       // An example of this difference is the NotExist -> Runnable transition for
+       // goroutines, which indicates goroutine creation. In this particular case,
+       // a Stack here would refer to the starting stack of the new goroutine, and
+       // an (Event).Stack would refer to the stack trace of whoever created the
+       // goroutine.
+       Stack Stack
+
+       // The actual transition data. Stored in a neutral form so that
+       // we don't need fields for every kind of resource.
+       id       int64
+       oldState uint8
+       newState uint8
+}
+
+func goStateTransition(id GoID, from, to GoState) StateTransition {
+       return StateTransition{
+               Resource: ResourceID{Kind: ResourceGoroutine, id: int64(id)},
+               oldState: uint8(from),
+               newState: uint8(to),
+       }
+}
+
+func procStateTransition(id ProcID, from, to ProcState) StateTransition {
+       return StateTransition{
+               Resource: ResourceID{Kind: ResourceProc, id: int64(id)},
+               oldState: uint8(from),
+               newState: uint8(to),
+       }
+}
+
+// Goroutine returns the state transition for a goroutine.
+//
+// Transitions to and from states that are Executing are special in that
+// they change the future execution context. In other words, future events
+// on the same thread will feature the same goroutine until it stops running.
+//
+// Panics if d.Resource.Kind is not ResourceGoroutine.
+func (d StateTransition) Goroutine() (from, to GoState) {
+       if d.Resource.Kind != ResourceGoroutine {
+               panic("Goroutine called on non-Goroutine state transition")
+       }
+       return GoState(d.oldState), GoState(d.newState)
+}
+
+// Proc returns the state transition for a proc.
+//
+// Transitions to and from states that are Executing are special in that
+// they change the future execution context. In other words, future events
+// on the same thread will feature the same goroutine until it stops running.
+//
+// Panics if d.Resource.Kind is not ResourceProc.
+func (d StateTransition) Proc() (from, to ProcState) {
+       if d.Resource.Kind != ResourceProc {
+               panic("Proc called on non-Proc state transition")
+       }
+       return ProcState(d.oldState), ProcState(d.newState)
+}
diff --git a/src/internal/trace/v2/testdata/README.md b/src/internal/trace/v2/testdata/README.md
new file mode 100644 (file)
index 0000000..0fae9ca
--- /dev/null
@@ -0,0 +1,38 @@
+# Trace test data
+
+## Trace golden tests
+
+Trace tests can be generated by running
+
+```
+go generate .
+```
+
+with the relevant toolchain in this directory.
+
+This will put the tests into a `tests` directory where the trace reader
+tests will find them.
+
+A subset of tests can be regenerated by specifying a regexp pattern for
+the names of tests to generate in the `GOTRACETEST` environment
+variable.
+Test names are defined as the name of the `.go` file that generates the
+trace, but with the `.go` extension removed.
+
+## Trace test programs
+
+The trace test programs in the `testprog` directory generate traces to
+stdout.
+Otherwise they're just normal programs.
+
+## Trace debug commands
+
+The `cmd` directory contains helpful tools for debugging traces.
+
+* `gotraceraw` parses traces without validation.
+  It can produce a text version of the trace wire format, or convert
+  the text format back into bytes.
+* `gotracevalidate` parses traces and validates them.
+  It performs more rigorous checks than the parser does on its own,
+  which helps for debugging the parser as well.
+  In fact, it performs the exact same checks that the tests do.
diff --git a/src/internal/trace/v2/testdata/cmd/gotraceraw/main.go b/src/internal/trace/v2/testdata/cmd/gotraceraw/main.go
new file mode 100644 (file)
index 0000000..a0d595d
--- /dev/null
@@ -0,0 +1,88 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "os"
+
+       "internal/trace/v2/raw"
+       "internal/trace/v2/version"
+)
+
+func init() {
+       flag.Usage = func() {
+               fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [mode]\n", os.Args[0])
+               fmt.Fprintf(flag.CommandLine.Output(), "\n")
+               fmt.Fprintf(flag.CommandLine.Output(), "Supported modes:")
+               fmt.Fprintf(flag.CommandLine.Output(), "\n")
+               fmt.Fprintf(flag.CommandLine.Output(), "* text2bytes - converts a text format trace to bytes\n")
+               fmt.Fprintf(flag.CommandLine.Output(), "* bytes2text - converts a byte format trace to text\n")
+               fmt.Fprintf(flag.CommandLine.Output(), "\n")
+               flag.PrintDefaults()
+       }
+       log.SetFlags(0)
+}
+
+func main() {
+       flag.Parse()
+       if narg := flag.NArg(); narg != 1 {
+               log.Fatal("expected exactly one positional argument: the mode to operate in; see -h output")
+       }
+
+       r := os.Stdin
+       w := os.Stdout
+
+       var tr traceReader
+       var tw traceWriter
+       var err error
+
+       switch flag.Arg(0) {
+       case "text2bytes":
+               tr, err = raw.NewTextReader(r)
+               if err != nil {
+                       log.Fatal(err)
+               }
+               tw, err = raw.NewWriter(w, tr.Version())
+               if err != nil {
+                       log.Fatal(err)
+               }
+       case "bytes2text":
+               tr, err = raw.NewReader(r)
+               if err != nil {
+                       log.Fatal(err)
+               }
+               tw, err = raw.NewTextWriter(w, tr.Version())
+               if err != nil {
+                       log.Fatal(err)
+               }
+       }
+       for {
+               ev, err := tr.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       log.Fatal(err)
+                       break
+               }
+               if err := tw.WriteEvent(ev); err != nil {
+                       log.Fatal(err)
+                       break
+               }
+       }
+}
+
+type traceReader interface {
+       Version() version.Version
+       ReadEvent() (raw.Event, error)
+}
+
+type traceWriter interface {
+       WriteEvent(raw.Event) error
+}
diff --git a/src/internal/trace/v2/testdata/cmd/gotracevalidate/main.go b/src/internal/trace/v2/testdata/cmd/gotracevalidate/main.go
new file mode 100644 (file)
index 0000000..944d19f
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "os"
+
+       "internal/trace/v2"
+       "internal/trace/v2/testtrace"
+)
+
+func init() {
+       flag.Usage = func() {
+               fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s\n", os.Args[0])
+               fmt.Fprintf(flag.CommandLine.Output(), "\n")
+               fmt.Fprintf(flag.CommandLine.Output(), "Accepts a trace at stdin and validates it.\n")
+               flag.PrintDefaults()
+       }
+       log.SetFlags(0)
+}
+
+var logEvents = flag.Bool("log-events", false, "whether to log events")
+
+func main() {
+       flag.Parse()
+
+       r, err := trace.NewReader(os.Stdin)
+       if err != nil {
+               log.Fatal(err)
+       }
+       v := testtrace.NewValidator()
+       for {
+               ev, err := r.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       log.Fatal(err)
+               }
+               if *logEvents {
+                       log.Println(ev.String())
+               }
+               if err := v.Event(ev); err != nil {
+                       log.Fatal(err)
+               }
+       }
+}
diff --git a/src/internal/trace/v2/testdata/generate.go b/src/internal/trace/v2/testdata/generate.go
new file mode 100644 (file)
index 0000000..c0658b2
--- /dev/null
@@ -0,0 +1,6 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run mktests.go
+package testdata
diff --git a/src/internal/trace/v2/testdata/generators/go122-confuse-seq-across-generations.go b/src/internal/trace/v2/testdata/generators/go122-confuse-seq-across-generations.go
new file mode 100644 (file)
index 0000000..f618c41
--- /dev/null
@@ -0,0 +1,62 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regression test for an issue found in development.
+//
+// The core of the issue is that if generation counters
+// aren't considered as part of sequence numbers, then
+// it's possible to accidentally advance without a
+// GoStatus event.
+//
+// The situation is one in which it just so happens that
+// an event on the frontier for a following generation
+// has a sequence number exactly one higher than the last
+// sequence number for e.g. a goroutine in the previous
+// generation. The parser should wait to find a GoStatus
+// event before advancing into the next generation at all.
+// It turns out this situation is pretty rare; the GoStatus
+// event almost always shows up first in practice. But it
+// can and did happen.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g1 := t.Generation(1)
+
+       // A running goroutine blocks.
+       b10 := g1.Batch(trace.ThreadID(0), 0)
+       b10.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b10.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b10.Event("GoStop", "whatever", testgen.NoStack)
+
+       // The running goroutine gets unblocked.
+       b11 := g1.Batch(trace.ThreadID(1), 0)
+       b11.Event("ProcStatus", trace.ProcID(1), go122.ProcRunning)
+       b11.Event("GoStart", trace.GoID(1), testgen.Seq(1))
+       b11.Event("GoStop", "whatever", testgen.NoStack)
+
+       g2 := t.Generation(2)
+
+       // Start running the goroutine, but later.
+       b21 := g2.Batch(trace.ThreadID(1), 3)
+       b21.Event("ProcStatus", trace.ProcID(1), go122.ProcRunning)
+       b21.Event("GoStart", trace.GoID(1), testgen.Seq(2))
+
+       // The goroutine starts running, then stops, then starts again.
+       b20 := g2.Batch(trace.ThreadID(0), 5)
+       b20.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b20.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunnable)
+       b20.Event("GoStart", trace.GoID(1), testgen.Seq(1))
+       b20.Event("GoStop", "whatever", testgen.NoStack)
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-go-create-without-running-g.go b/src/internal/trace/v2/testdata/generators/go122-go-create-without-running-g.go
new file mode 100644 (file)
index 0000000..b693245
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regression test for an issue found in development.
+//
+// GoCreate events can happen on bare Ps in a variety of situations and
+// and earlier version of the parser assumed this wasn't possible. At
+// the time of writing, one such example is goroutines created by expiring
+// timers.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g1 := t.Generation(1)
+
+       // A goroutine gets created on a running P, then starts running.
+       b0 := g1.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b0.Event("GoCreate", trace.GoID(5), testgen.NoStack, testgen.NoStack)
+       b0.Event("GoStart", trace.GoID(5), testgen.Seq(1))
+       b0.Event("GoStop", "whatever", testgen.NoStack)
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-bare-m.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-bare-m.go
new file mode 100644 (file)
index 0000000..f4c9f6e
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing at a generation boundary.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine is exiting with a syscall. It already
+       // acquired a new P.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoSyscall)
+       b0.Event("GoSyscallEndBlocked")
+
+       // A bare M stole the goroutine's P at the generation boundary.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(0), go122.ProcSyscallAbandoned)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.go
new file mode 100644 (file)
index 0000000..e6023ba
--- /dev/null
@@ -0,0 +1,34 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing at a generation boundary.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine is exiting with a syscall. It already
+       // acquired a new P.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoSyscall)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
+       b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
+       b0.Event("GoSyscallEndBlocked")
+
+       // A bare M stole the goroutine's P at the generation boundary.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(0), go122.ProcSyscallAbandoned)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.go
new file mode 100644 (file)
index 0000000..2232dca
--- /dev/null
@@ -0,0 +1,36 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing at a generation boundary.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine is exiting with a syscall. It already
+       // acquired a new P.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoSyscall)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
+       b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
+       b0.Event("GoSyscallEndBlocked")
+
+       // A running goroutine stole P0 at the generation boundary.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
+       b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
+       b1.Event("ProcStatus", trace.ProcID(0), go122.ProcSyscallAbandoned)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-gen-boundary.go
new file mode 100644 (file)
index 0000000..710827a
--- /dev/null
@@ -0,0 +1,35 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing at a generation boundary.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine is exiting with a syscall. It already
+       // acquired a new P.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoSyscall)
+       b0.Event("GoSyscallEndBlocked")
+
+       // A running goroutine stole P0 at the generation boundary.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
+       b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
+       b1.Event("ProcStatus", trace.ProcID(0), go122.ProcSyscallAbandoned)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc-bare-m.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc-bare-m.go
new file mode 100644 (file)
index 0000000..95a549c
--- /dev/null
@@ -0,0 +1,34 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine enters a syscall, grabs a P, and starts running.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b0.Event("GoSyscallBegin", testgen.NoStack)
+       b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
+       b0.Event("GoSyscallEndBlocked")
+
+       // A bare M steals the goroutine's P.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-reacquire-new-proc.go
new file mode 100644 (file)
index 0000000..774c04d
--- /dev/null
@@ -0,0 +1,36 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine enters a syscall, grabs a P, and starts running.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(1), go122.ProcIdle)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b0.Event("GoSyscallBegin", testgen.NoStack)
+       b0.Event("ProcStart", trace.ProcID(1), testgen.Seq(1))
+       b0.Event("GoSyscallEndBlocked")
+
+       // A running goroutine steals proc 0.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
+       b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple-bare-m.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple-bare-m.go
new file mode 100644 (file)
index 0000000..c463acb
--- /dev/null
@@ -0,0 +1,32 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine enters a syscall.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b0.Event("GoSyscallBegin", testgen.NoStack)
+       b0.Event("GoSyscallEndBlocked")
+
+       // A bare M steals the goroutine's P.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-simple.go
new file mode 100644 (file)
index 0000000..1b51781
--- /dev/null
@@ -0,0 +1,34 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // One goroutine enters a syscall.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b0.Event("GoSyscallBegin", testgen.NoStack)
+       b0.Event("GoSyscallEndBlocked")
+
+       // A running goroutine steals proc 0.
+       b1 := g.Batch(trace.ThreadID(1), 0)
+       b1.Event("ProcStatus", trace.ProcID(2), go122.ProcRunning)
+       b1.Event("GoStatus", trace.GoID(2), trace.ThreadID(1), go122.GoRunning)
+       b1.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(0))
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-sitting-in-syscall.go b/src/internal/trace/v2/testdata/generators/go122-syscall-steal-proc-sitting-in-syscall.go
new file mode 100644 (file)
index 0000000..870f8f6
--- /dev/null
@@ -0,0 +1,32 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests syscall P stealing from a goroutine and thread
+// that have been in a syscall the entire generation.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g := t.Generation(1)
+
+       // Steal proc from a goroutine that's been blocked
+       // in a syscall the entire generation.
+       b0 := g.Batch(trace.ThreadID(0), 0)
+       b0.Event("ProcStatus", trace.ProcID(0), go122.ProcSyscallAbandoned)
+       b0.Event("ProcSteal", trace.ProcID(0), testgen.Seq(1), trace.ThreadID(1))
+
+       // Status event for a goroutine blocked in a syscall for the entire generation.
+       bz := g.Batch(trace.NoThread, 0)
+       bz.Event("GoStatus", trace.GoID(1), trace.ThreadID(1), go122.GoSyscall)
+}
diff --git a/src/internal/trace/v2/testdata/generators/go122-task-across-generations.go b/src/internal/trace/v2/testdata/generators/go122-task-across-generations.go
new file mode 100644 (file)
index 0000000..94e9933
--- /dev/null
@@ -0,0 +1,41 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regression test for an issue found in development.
+//
+// The issue is that EvUserTaskEnd events don't carry the
+// task type with them, so the parser needs to track that
+// information. But if the parser just tracks the string ID
+// and not the string itself, that string ID may not be valid
+// for use in future generations.
+
+package main
+
+import (
+       "internal/trace/v2"
+       "internal/trace/v2/event/go122"
+       testgen "internal/trace/v2/internal/testgen/go122"
+)
+
+func main() {
+       testgen.Main(gen)
+}
+
+func gen(t *testgen.Trace) {
+       g1 := t.Generation(1)
+
+       // A running goroutine emits a task begin.
+       b1 := g1.Batch(trace.ThreadID(0), 0)
+       b1.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b1.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b1.Event("UserTaskBegin", trace.TaskID(2), trace.NoTask, "my task", testgen.NoStack)
+
+       g2 := t.Generation(2)
+
+       // That same goroutine emits a task end in the following generation.
+       b2 := g2.Batch(trace.ThreadID(0), 5)
+       b2.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
+       b2.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
+       b2.Event("UserTaskEnd", trace.TaskID(2), testgen.NoStack)
+}
diff --git a/src/internal/trace/v2/testdata/mktests.go b/src/internal/trace/v2/testdata/mktests.go
new file mode 100644 (file)
index 0000000..5242163
--- /dev/null
@@ -0,0 +1,66 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+import (
+       "fmt"
+       "log"
+       "os"
+       "os/exec"
+       "path/filepath"
+       "regexp"
+)
+
+func main() {
+       log.SetFlags(0)
+       if err := run(); err != nil {
+               log.Fatal(err)
+       }
+}
+
+func run() error {
+       generators, err := filepath.Glob("./generators/*.go")
+       if err != nil {
+               return fmt.Errorf("reading generators: %v", err)
+       }
+       genroot := "./tests"
+
+       // Grab a pattern, if any.
+       var re *regexp.Regexp
+       if pattern := os.Getenv("GOTRACETEST"); pattern != "" {
+               re, err = regexp.Compile(pattern)
+               if err != nil {
+                       return fmt.Errorf("compiling regexp %q for GOTRACETEST: %v", pattern, err)
+               }
+       }
+
+       if err := os.MkdirAll(genroot, 0777); err != nil {
+               return fmt.Errorf("creating generated root: %v", err)
+       }
+       for _, path := range generators {
+               name := filepath.Base(path)
+               name = name[:len(name)-len(filepath.Ext(name))]
+
+               // Skip if we have a pattern and this test doesn't match.
+               if re != nil && !re.MatchString(name) {
+                       continue
+               }
+
+               fmt.Fprintf(os.Stderr, "generating %s... ", name)
+
+               // Get the test path.
+               testPath := filepath.Join(genroot, fmt.Sprintf("%s.test", name))
+
+               // Run generator.
+               cmd := exec.Command("go", "run", path, testPath)
+               if out, err := cmd.CombinedOutput(); err != nil {
+                       return fmt.Errorf("running generator %s: %v:\n%s", name, err, out)
+               }
+               fmt.Fprintln(os.Stderr)
+       }
+       return nil
+}
diff --git a/src/internal/trace/v2/testdata/testprog/annotations-stress.go b/src/internal/trace/v2/testdata/testprog/annotations-stress.go
new file mode 100644 (file)
index 0000000..511d6ed
--- /dev/null
@@ -0,0 +1,84 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests user tasks, regions, and logging.
+
+//go:build ignore
+
+package main
+
+import (
+       "context"
+       "fmt"
+       "log"
+       "os"
+       "runtime/trace"
+       "time"
+)
+
+func main() {
+       baseCtx := context.Background()
+
+       // Create a task that starts and ends entirely outside of the trace.
+       ctx0, t0 := trace.NewTask(baseCtx, "parent")
+
+       // Create a task that starts before the trace and ends during the trace.
+       ctx1, t1 := trace.NewTask(ctx0, "type1")
+
+       // Start tracing.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       t1.End()
+
+       // Create a task that starts during the trace and ends after.
+       ctx2, t2 := trace.NewTask(ctx0, "type2")
+
+       // Create a task that starts and ends during the trace.
+       ctx3, t3 := trace.NewTask(baseCtx, "type3")
+
+       // Generate some events.
+       for i := 0; i < 2; i++ {
+               do(baseCtx, 4)
+               do(ctx0, 2)
+               do(ctx1, 3)
+               do(ctx2, 6)
+               do(ctx3, 5)
+       }
+
+       // Finish up tasks according to their lifetime relative to the trace.
+       t3.End()
+       trace.Stop()
+       t2.End()
+       t0.End()
+}
+
+func do(ctx context.Context, k int) {
+       trace.Log(ctx, "log", "before do")
+
+       var t *trace.Task
+       ctx, t = trace.NewTask(ctx, "do")
+       defer t.End()
+
+       trace.Log(ctx, "log2", "do")
+
+       // Create a region and spawn more tasks and more workers.
+       trace.WithRegion(ctx, "fanout", func() {
+               for i := 0; i < k; i++ {
+                       go func(i int) {
+                               trace.WithRegion(ctx, fmt.Sprintf("region%d", i), func() {
+                                       trace.Logf(ctx, "log", "fanout region%d", i)
+                                       if i == 2 {
+                                               do(ctx, 0)
+                                               return
+                                       }
+                               })
+                       }(i)
+               }
+       })
+
+       // Sleep to let things happen, but also increase the chance that we
+       // advance a generation.
+       time.Sleep(10 * time.Millisecond)
+}
diff --git a/src/internal/trace/v2/testdata/testprog/annotations.go b/src/internal/trace/v2/testdata/testprog/annotations.go
new file mode 100644 (file)
index 0000000..33180d1
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests user tasks, regions, and logging.
+
+//go:build ignore
+
+package main
+
+import (
+       "context"
+       "log"
+       "os"
+       "runtime/trace"
+       "sync"
+)
+
+func main() {
+       bgctx, cancel := context.WithCancel(context.Background())
+       defer cancel()
+
+       // Create a pre-existing region. This won't end up in the trace.
+       preExistingRegion := trace.StartRegion(bgctx, "pre-existing region")
+
+       // Start tracing.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+
+       // Beginning of traced execution.
+       var wg sync.WaitGroup
+       ctx, task := trace.NewTask(bgctx, "task0") // EvUserTaskCreate("task0")
+       wg.Add(1)
+       go func() {
+               defer wg.Done()
+               defer task.End() // EvUserTaskEnd("task0")
+
+               trace.WithRegion(ctx, "region0", func() {
+                       // EvUserRegionBegin("region0", start)
+                       trace.WithRegion(ctx, "region1", func() {
+                               trace.Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f")
+                       })
+                       // EvUserRegionEnd("region0", end)
+               })
+       }()
+       wg.Wait()
+
+       preExistingRegion.End()
+       postExistingRegion := trace.StartRegion(bgctx, "post-existing region")
+
+       // End of traced execution.
+       trace.Stop()
+
+       postExistingRegion.End()
+}
diff --git a/src/internal/trace/v2/testdata/testprog/cgo-callback.go b/src/internal/trace/v2/testdata/testprog/cgo-callback.go
new file mode 100644 (file)
index 0000000..d636500
--- /dev/null
@@ -0,0 +1,80 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests CPU profiling.
+
+//go:build ignore
+
+package main
+
+/*
+#include <pthread.h>
+
+void go_callback();
+void go_callback2();
+
+static void *thr(void *arg) {
+    go_callback();
+    return 0;
+}
+
+static void foo() {
+    pthread_t th;
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, 256 << 10);
+    pthread_create(&th, &attr, thr, 0);
+    pthread_join(th, 0);
+}
+
+static void bar() {
+    go_callback2();
+}
+*/
+import "C"
+
+import (
+       "log"
+       "os"
+       "runtime"
+       "runtime/trace"
+)
+
+//export go_callback
+func go_callback() {
+       // Do another call into C, just to test that path too.
+       C.bar()
+}
+
+//export go_callback2
+func go_callback2() {
+       runtime.GC()
+}
+
+func main() {
+       // Start tracing.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+
+       // Do a whole bunch of cgocallbacks.
+       const n = 10
+       done := make(chan bool)
+       for i := 0; i < n; i++ {
+               go func() {
+                       C.foo()
+                       done <- true
+               }()
+       }
+       for i := 0; i < n; i++ {
+               <-done
+       }
+
+       // Do something to steal back any Ps from the Ms, just
+       // for coverage.
+       runtime.GC()
+
+       // End of traced execution.
+       trace.Stop()
+}
diff --git a/src/internal/trace/v2/testdata/testprog/cpu-profile.go b/src/internal/trace/v2/testdata/testprog/cpu-profile.go
new file mode 100644 (file)
index 0000000..293a2ac
--- /dev/null
@@ -0,0 +1,137 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests CPU profiling.
+
+//go:build ignore
+
+package main
+
+import (
+       "bytes"
+       "context"
+       "fmt"
+       "internal/profile"
+       "log"
+       "os"
+       "runtime"
+       "runtime/pprof"
+       "runtime/trace"
+       "strings"
+       "time"
+)
+
+func main() {
+       cpuBuf := new(bytes.Buffer)
+       if err := pprof.StartCPUProfile(cpuBuf); err != nil {
+               log.Fatalf("failed to start CPU profile: %v", err)
+       }
+
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+
+       dur := 100 * time.Millisecond
+       func() {
+               // Create a region in the execution trace. Set and clear goroutine
+               // labels fully within that region, so we know that any CPU profile
+               // sample with the label must also be eligible for inclusion in the
+               // execution trace.
+               ctx := context.Background()
+               defer trace.StartRegion(ctx, "cpuHogger").End()
+               pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) {
+                       cpuHogger(cpuHog1, &salt1, dur)
+               })
+               // Be sure the execution trace's view, when filtered to this goroutine
+               // via the explicit goroutine ID in each event, gets many more samples
+               // than the CPU profiler when filtered to this goroutine via labels.
+               cpuHogger(cpuHog1, &salt1, dur)
+       }()
+
+       trace.Stop()
+       pprof.StopCPUProfile()
+
+       // Summarize the CPU profile to stderr so the test can check against it.
+
+       prof, err := profile.Parse(cpuBuf)
+       if err != nil {
+               log.Fatalf("failed to parse CPU profile: %v", err)
+       }
+       // Examine the CPU profiler's view. Filter it to only include samples from
+       // the single test goroutine. Use labels to execute that filter: they should
+       // apply to all work done while that goroutine is getg().m.curg, and they
+       // should apply to no other goroutines.
+       pprofStacks := make(map[string]int)
+       for _, s := range prof.Sample {
+               if s.Label["tracing"] != nil {
+                       var fns []string
+                       var leaf string
+                       for _, loc := range s.Location {
+                               for _, line := range loc.Line {
+                                       fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line))
+                                       leaf = line.Function.Name
+                               }
+                       }
+                       // runtime.sigprof synthesizes call stacks when "normal traceback is
+                       // impossible or has failed", using particular placeholder functions
+                       // to represent common failure cases. Look for those functions in
+                       // the leaf position as a sign that the call stack and its
+                       // symbolization are more complex than this test can handle.
+                       //
+                       // TODO: Make the symbolization done by the execution tracer and CPU
+                       // profiler match up even in these harder cases. See #53378.
+                       switch leaf {
+                       case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO":
+                               continue
+                       }
+                       stack := strings.Join(fns, "|")
+                       samples := int(s.Value[0])
+                       pprofStacks[stack] += samples
+               }
+       }
+       for stack, samples := range pprofStacks {
+               fmt.Fprintf(os.Stderr, "%s\t%d\n", stack, samples)
+       }
+}
+
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
+       // We only need to get one 100 Hz clock tick, so we've got
+       // a large safety buffer.
+       // But do at least 500 iterations (which should take about 100ms),
+       // otherwise TestCPUProfileMultithreaded can fail if only one
+       // thread is scheduled during the testing period.
+       t0 := time.Now()
+       accum := *y
+       for i := 0; i < 500 || time.Since(t0) < dur; i++ {
+               accum = f(accum)
+       }
+       *y = accum
+}
+
+var (
+       salt1 = 0
+)
+
+// The actual CPU hogging function.
+// Must not call other functions nor access heap/globals in the loop,
+// otherwise under race detector the samples will be in the race runtime.
+func cpuHog1(x int) int {
+       return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
+       foo := x
+       for i := 0; i < n; i++ {
+               if i%1000 == 0 {
+                       // Spend time in mcall, stored as gp.m.curg, with g0 running
+                       runtime.Gosched()
+               }
+               if foo > 0 {
+                       foo *= foo
+               } else {
+                       foo *= foo + 1
+               }
+       }
+       return foo
+}
diff --git a/src/internal/trace/v2/testdata/testprog/futile-wakeup.go b/src/internal/trace/v2/testdata/testprog/futile-wakeup.go
new file mode 100644 (file)
index 0000000..cc48981
--- /dev/null
@@ -0,0 +1,84 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests to make sure the runtime doesn't generate futile wakeups. For example,
+// it makes sure that a block on a channel send that unblocks briefly only to
+// immediately go back to sleep (in such a way that doesn't reveal any useful
+// information, and is purely an artifact of the runtime implementation) doesn't
+// make it into the trace.
+
+//go:build ignore
+
+package main
+
+import (
+       "context"
+       "log"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "sync"
+)
+
+func main() {
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+
+       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
+       c0 := make(chan int, 1)
+       c1 := make(chan int, 1)
+       c2 := make(chan int, 1)
+       const procs = 2
+       var done sync.WaitGroup
+       done.Add(4 * procs)
+       for p := 0; p < procs; p++ {
+               const iters = 1e3
+               go func() {
+                       trace.WithRegion(context.Background(), "special", func() {
+                               for i := 0; i < iters; i++ {
+                                       runtime.Gosched()
+                                       c0 <- 0
+                               }
+                               done.Done()
+                       })
+               }()
+               go func() {
+                       trace.WithRegion(context.Background(), "special", func() {
+                               for i := 0; i < iters; i++ {
+                                       runtime.Gosched()
+                                       <-c0
+                               }
+                               done.Done()
+                       })
+               }()
+               go func() {
+                       trace.WithRegion(context.Background(), "special", func() {
+                               for i := 0; i < iters; i++ {
+                                       runtime.Gosched()
+                                       select {
+                                       case c1 <- 0:
+                                       case c2 <- 0:
+                                       }
+                               }
+                               done.Done()
+                       })
+               }()
+               go func() {
+                       trace.WithRegion(context.Background(), "special", func() {
+                               for i := 0; i < iters; i++ {
+                                       runtime.Gosched()
+                                       select {
+                                       case <-c1:
+                                       case <-c2:
+                                       }
+                               }
+                               done.Done()
+                       })
+               }()
+       }
+       done.Wait()
+
+       trace.Stop()
+}
diff --git a/src/internal/trace/v2/testdata/testprog/gc-stress.go b/src/internal/trace/v2/testdata/testprog/gc-stress.go
new file mode 100644 (file)
index 0000000..7a1fdfd
--- /dev/null
@@ -0,0 +1,76 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests a GC-heavy program. This is useful for shaking out
+// all sorts of corner cases about GC-related ranges.
+
+//go:build ignore
+
+package main
+
+import (
+       "log"
+       "math/rand"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "time"
+)
+
+type node struct {
+       children [4]*node
+       data     [128]byte
+}
+
+func makeTree(depth int) *node {
+       if depth == 0 {
+               return new(node)
+       }
+       return &node{
+               children: [4]*node{
+                       makeTree(depth - 1),
+                       makeTree(depth - 1),
+                       makeTree(depth - 1),
+                       makeTree(depth - 1),
+               },
+       }
+}
+
+var trees [32]*node
+var ballast *[32]*[8192]*node
+var sink []byte
+
+func main() {
+       for i := range trees {
+               trees[i] = makeTree(6)
+       }
+       ballast = new([32]*[8192]*node)
+       for i := range ballast {
+               ballast[i] = new([8192]*node)
+               for j := range ballast[i] {
+                       ballast[i][j] = &node{
+                               data: [128]byte{1, 2, 3, 4},
+                       }
+               }
+       }
+       for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
+               go func() {
+                       for {
+                               sink = make([]byte, rand.Intn(65536))
+                       }
+               }()
+       }
+       // Increase the chance that we end up starting and stopping
+       // mid-GC by only starting to trace after a few milliseconds.
+       time.Sleep(5 * time.Millisecond)
+
+       // Start tracing.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       defer trace.Stop()
+
+       // Let the tracing happen for a bit.
+       time.Sleep(400 * time.Millisecond)
+}
diff --git a/src/internal/trace/v2/testdata/testprog/gomaxprocs.go b/src/internal/trace/v2/testdata/testprog/gomaxprocs.go
new file mode 100644 (file)
index 0000000..2651207
--- /dev/null
@@ -0,0 +1,46 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests increasing and decreasing GOMAXPROCS to try and
+// catch issues with stale proc state.
+
+//go:build ignore
+
+package main
+
+import (
+       "log"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "time"
+)
+
+func main() {
+       // Start a goroutine that calls runtime.GC to try and
+       // introduce some interesting events in between the
+       // GOMAXPROCS calls.
+       go func() {
+               for {
+                       runtime.GC()
+                       time.Sleep(1 * time.Millisecond)
+               }
+       }()
+
+       // Start tracing.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       // Run GOMAXPROCS a bunch of times, up and down.
+       for i := 1; i <= 16; i *= 2 {
+               runtime.GOMAXPROCS(i)
+               time.Sleep(1 * time.Millisecond)
+       }
+       for i := 16; i >= 1; i /= 2 {
+               runtime.GOMAXPROCS(i)
+               time.Sleep(1 * time.Millisecond)
+       }
+       // Stop tracing.
+       trace.Stop()
+}
diff --git a/src/internal/trace/v2/testdata/testprog/many-start-stop.go b/src/internal/trace/v2/testdata/testprog/many-start-stop.go
new file mode 100644 (file)
index 0000000..2d5d063
--- /dev/null
@@ -0,0 +1,38 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests simply starting and stopping tracing multiple times.
+//
+// This is useful for finding bugs in trace state reset.
+
+//go:build ignore
+
+package main
+
+import (
+       "bytes"
+       "log"
+       "os"
+       "runtime"
+       "runtime/trace"
+)
+
+func main() {
+       // Trace a few times.
+       for i := 0; i < 10; i++ {
+               var buf bytes.Buffer
+               if err := trace.Start(&buf); err != nil {
+                       log.Fatalf("failed to start tracing: %v", err)
+               }
+               runtime.GC()
+               trace.Stop()
+       }
+
+       // Start tracing again, this time writing out the result.
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       runtime.GC()
+       trace.Stop()
+}
diff --git a/src/internal/trace/v2/testdata/testprog/stacks.go b/src/internal/trace/v2/testdata/testprog/stacks.go
new file mode 100644 (file)
index 0000000..e64bc86
--- /dev/null
@@ -0,0 +1,129 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests stack symbolization.
+
+//go:build ignore
+
+package main
+
+import (
+       "log"
+       "net"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "sync"
+       "time"
+)
+
+func main() {
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       defer trace.Stop() // in case of early return
+
+       // Now we will do a bunch of things for which we verify stacks later.
+       // It is impossible to ensure that a goroutine has actually blocked
+       // on a channel, in a select or otherwise. So we kick off goroutines
+       // that need to block first in the hope that while we are executing
+       // the rest of the test, they will block.
+       go func() { // func1
+               select {}
+       }()
+       go func() { // func2
+               var c chan int
+               c <- 0
+       }()
+       go func() { // func3
+               var c chan int
+               <-c
+       }()
+       done1 := make(chan bool)
+       go func() { // func4
+               <-done1
+       }()
+       done2 := make(chan bool)
+       go func() { // func5
+               done2 <- true
+       }()
+       c1 := make(chan int)
+       c2 := make(chan int)
+       go func() { // func6
+               select {
+               case <-c1:
+               case <-c2:
+               }
+       }()
+       var mu sync.Mutex
+       mu.Lock()
+       go func() { // func7
+               mu.Lock()
+               mu.Unlock()
+       }()
+       var wg sync.WaitGroup
+       wg.Add(1)
+       go func() { // func8
+               wg.Wait()
+       }()
+       cv := sync.NewCond(&sync.Mutex{})
+       go func() { // func9
+               cv.L.Lock()
+               cv.Wait()
+               cv.L.Unlock()
+       }()
+       ln, err := net.Listen("tcp", "127.0.0.1:0")
+       if err != nil {
+               log.Fatalf("failed to listen: %v", err)
+       }
+       go func() { // func10
+               c, err := ln.Accept()
+               if err != nil {
+                       log.Printf("failed to accept: %v", err)
+                       return
+               }
+               c.Close()
+       }()
+       rp, wp, err := os.Pipe()
+       if err != nil {
+               log.Fatalf("failed to create a pipe: %v", err)
+       }
+       defer rp.Close()
+       defer wp.Close()
+       pipeReadDone := make(chan bool)
+       go func() { // func11
+               var data [1]byte
+               rp.Read(data[:])
+               pipeReadDone <- true
+       }()
+
+       time.Sleep(100 * time.Millisecond)
+       runtime.GC()
+       runtime.Gosched()
+       time.Sleep(100 * time.Millisecond) // the last chance for the goroutines above to block
+       done1 <- true
+       <-done2
+       select {
+       case c1 <- 0:
+       case c2 <- 0:
+       }
+       mu.Unlock()
+       wg.Done()
+       cv.Signal()
+       c, err := net.Dial("tcp", ln.Addr().String())
+       if err != nil {
+               log.Fatalf("failed to dial: %v", err)
+       }
+       c.Close()
+       var data [1]byte
+       wp.Write(data[:])
+       <-pipeReadDone
+
+       oldGoMaxProcs := runtime.GOMAXPROCS(0)
+       runtime.GOMAXPROCS(oldGoMaxProcs + 1)
+
+       trace.Stop()
+
+       runtime.GOMAXPROCS(oldGoMaxProcs)
+}
diff --git a/src/internal/trace/v2/testdata/testprog/stress-start-stop.go b/src/internal/trace/v2/testdata/testprog/stress-start-stop.go
new file mode 100644 (file)
index 0000000..72c1c59
--- /dev/null
@@ -0,0 +1,166 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests a many interesting cases (network, syscalls, a little GC, busy goroutines,
+// blocked goroutines, LockOSThread, pipes, and GOMAXPROCS).
+
+//go:build ignore
+
+package main
+
+import (
+       "bytes"
+       "io"
+       "log"
+       "net"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "sync"
+       "time"
+)
+
+func main() {
+       defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
+       outerDone := make(chan bool)
+
+       go func() {
+               defer func() {
+                       outerDone <- true
+               }()
+
+               var wg sync.WaitGroup
+               done := make(chan bool)
+
+               wg.Add(1)
+               go func() {
+                       <-done
+                       wg.Done()
+               }()
+
+               rp, wp, err := os.Pipe()
+               if err != nil {
+                       log.Fatalf("failed to create pipe: %v", err)
+                       return
+               }
+               defer func() {
+                       rp.Close()
+                       wp.Close()
+               }()
+               wg.Add(1)
+               go func() {
+                       var tmp [1]byte
+                       rp.Read(tmp[:])
+                       <-done
+                       wg.Done()
+               }()
+               time.Sleep(time.Millisecond)
+
+               go func() {
+                       runtime.LockOSThread()
+                       for {
+                               select {
+                               case <-done:
+                                       return
+                               default:
+                                       runtime.Gosched()
+                               }
+                       }
+               }()
+
+               runtime.GC()
+               // Trigger GC from malloc.
+               n := 512
+               for i := 0; i < n; i++ {
+                       _ = make([]byte, 1<<20)
+               }
+
+               // Create a bunch of busy goroutines to load all Ps.
+               for p := 0; p < 10; p++ {
+                       wg.Add(1)
+                       go func() {
+                               // Do something useful.
+                               tmp := make([]byte, 1<<16)
+                               for i := range tmp {
+                                       tmp[i]++
+                               }
+                               _ = tmp
+                               <-done
+                               wg.Done()
+                       }()
+               }
+
+               // Block in syscall.
+               wg.Add(1)
+               go func() {
+                       var tmp [1]byte
+                       rp.Read(tmp[:])
+                       <-done
+                       wg.Done()
+               }()
+
+               runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+
+               // Test timers.
+               timerDone := make(chan bool)
+               go func() {
+                       time.Sleep(time.Millisecond)
+                       timerDone <- true
+               }()
+               <-timerDone
+
+               // A bit of network.
+               ln, err := net.Listen("tcp", "127.0.0.1:0")
+               if err != nil {
+                       log.Fatalf("listen failed: %v", err)
+                       return
+               }
+               defer ln.Close()
+               go func() {
+                       c, err := ln.Accept()
+                       if err != nil {
+                               return
+                       }
+                       time.Sleep(time.Millisecond)
+                       var buf [1]byte
+                       c.Write(buf[:])
+                       c.Close()
+               }()
+               c, err := net.Dial("tcp", ln.Addr().String())
+               if err != nil {
+                       log.Fatalf("dial failed: %v", err)
+                       return
+               }
+               var tmp [1]byte
+               c.Read(tmp[:])
+               c.Close()
+
+               go func() {
+                       runtime.Gosched()
+                       select {}
+               }()
+
+               // Unblock helper goroutines and wait them to finish.
+               wp.Write(tmp[:])
+               wp.Write(tmp[:])
+               close(done)
+               wg.Wait()
+       }()
+
+       const iters = 5
+       for i := 0; i < iters; i++ {
+               var w io.Writer
+               if i == iters-1 {
+                       w = os.Stdout
+               } else {
+                       w = new(bytes.Buffer)
+               }
+               if err := trace.Start(w); err != nil {
+                       log.Fatalf("failed to start tracing: %v", err)
+               }
+               time.Sleep(time.Millisecond)
+               trace.Stop()
+       }
+       <-outerDone
+}
diff --git a/src/internal/trace/v2/testdata/testprog/stress.go b/src/internal/trace/v2/testdata/testprog/stress.go
new file mode 100644 (file)
index 0000000..99696d1
--- /dev/null
@@ -0,0 +1,146 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests a many interesting cases (network, syscalls, a little GC, busy goroutines,
+// blocked goroutines, LockOSThread, pipes, and GOMAXPROCS).
+
+//go:build ignore
+
+package main
+
+import (
+       "log"
+       "net"
+       "os"
+       "runtime"
+       "runtime/trace"
+       "sync"
+       "time"
+)
+
+func main() {
+       var wg sync.WaitGroup
+       done := make(chan bool)
+
+       // Create a goroutine blocked before tracing.
+       wg.Add(1)
+       go func() {
+               <-done
+               wg.Done()
+       }()
+
+       // Create a goroutine blocked in syscall before tracing.
+       rp, wp, err := os.Pipe()
+       if err != nil {
+               log.Fatalf("failed to create pipe: %v", err)
+       }
+       defer func() {
+               rp.Close()
+               wp.Close()
+       }()
+       wg.Add(1)
+       go func() {
+               var tmp [1]byte
+               rp.Read(tmp[:])
+               <-done
+               wg.Done()
+       }()
+       time.Sleep(time.Millisecond) // give the goroutine above time to block
+
+       if err := trace.Start(os.Stdout); err != nil {
+               log.Fatalf("failed to start tracing: %v", err)
+       }
+       defer trace.Stop()
+
+       procs := runtime.GOMAXPROCS(10)
+       time.Sleep(50 * time.Millisecond) // test proc stop/start events
+
+       go func() {
+               runtime.LockOSThread()
+               for {
+                       select {
+                       case <-done:
+                               return
+                       default:
+                               runtime.Gosched()
+                       }
+               }
+       }()
+
+       runtime.GC()
+       // Trigger GC from malloc.
+       n := 512
+       for i := 0; i < n; i++ {
+               _ = make([]byte, 1<<20)
+       }
+
+       // Create a bunch of busy goroutines to load all Ps.
+       for p := 0; p < 10; p++ {
+               wg.Add(1)
+               go func() {
+                       // Do something useful.
+                       tmp := make([]byte, 1<<16)
+                       for i := range tmp {
+                               tmp[i]++
+                       }
+                       _ = tmp
+                       <-done
+                       wg.Done()
+               }()
+       }
+
+       // Block in syscall.
+       wg.Add(1)
+       go func() {
+               var tmp [1]byte
+               rp.Read(tmp[:])
+               <-done
+               wg.Done()
+       }()
+
+       // Test timers.
+       timerDone := make(chan bool)
+       go func() {
+               time.Sleep(time.Millisecond)
+               timerDone <- true
+       }()
+       <-timerDone
+
+       // A bit of network.
+       ln, err := net.Listen("tcp", "127.0.0.1:0")
+       if err != nil {
+               log.Fatalf("listen failed: %v", err)
+       }
+       defer ln.Close()
+       go func() {
+               c, err := ln.Accept()
+               if err != nil {
+                       return
+               }
+               time.Sleep(time.Millisecond)
+               var buf [1]byte
+               c.Write(buf[:])
+               c.Close()
+       }()
+       c, err := net.Dial("tcp", ln.Addr().String())
+       if err != nil {
+               log.Fatalf("dial failed: %v", err)
+       }
+       var tmp [1]byte
+       c.Read(tmp[:])
+       c.Close()
+
+       go func() {
+               runtime.Gosched()
+               select {}
+       }()
+
+       // Unblock helper goroutines and wait them to finish.
+       wp.Write(tmp[:])
+       wp.Write(tmp[:])
+       close(done)
+       wg.Wait()
+
+       runtime.GOMAXPROCS(procs)
+}
diff --git a/src/internal/trace/v2/testdata/tests/go122-confuse-seq-across-generations.test b/src/internal/trace/v2/testdata/tests/go122-confuse-seq-across-generations.test
new file mode 100644 (file)
index 0000000..c0d6f0d
--- /dev/null
@@ -0,0 +1,36 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=13
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+GoStop dt=1 reason_string=1 stack=0
+EventBatch gen=1 m=1 time=0 size=12
+ProcStatus dt=1 p=1 pstatus=1
+GoStart dt=1 g=1 g_seq=1
+GoStop dt=1 reason_string=1 stack=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=12
+Strings
+String id=1
+       data="whatever"
+EventBatch gen=2 m=1 time=3 size=8
+ProcStatus dt=1 p=1 pstatus=1
+GoStart dt=1 g=1 g_seq=2
+EventBatch gen=2 m=0 time=5 size=17
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=1
+GoStart dt=1 g=1 g_seq=1
+GoStop dt=1 reason_string=1 stack=0
+EventBatch gen=2 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=2 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=2 m=18446744073709551615 time=0 size=12
+Strings
+String id=1
+       data="whatever"
diff --git a/src/internal/trace/v2/testdata/tests/go122-go-create-without-running-g.test b/src/internal/trace/v2/testdata/tests/go122-go-create-without-running-g.test
new file mode 100644 (file)
index 0000000..494c444
--- /dev/null
@@ -0,0 +1,17 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=17
+ProcStatus dt=1 p=0 pstatus=1
+GoCreate dt=1 new_g=5 new_stack=0 stack=0
+GoStart dt=1 g=5 g_seq=1
+GoStop dt=1 reason_string=1 stack=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=12
+Strings
+String id=1
+       data="whatever"
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-bare-m.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-bare-m.test
new file mode 100644 (file)
index 0000000..bbfc9cc
--- /dev/null
@@ -0,0 +1,17 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=11
+ProcStatus dt=1 p=1 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=3
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=9
+ProcStatus dt=1 p=0 pstatus=4
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc-bare-m.test
new file mode 100644 (file)
index 0000000..8e29132
--- /dev/null
@@ -0,0 +1,18 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=15
+GoStatus dt=1 g=1 m=0 gstatus=3
+ProcStatus dt=1 p=1 pstatus=2
+ProcStart dt=1 p=1 p_seq=1
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=9
+ProcStatus dt=1 p=0 pstatus=4
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary-reacquire-new-proc.test
new file mode 100644 (file)
index 0000000..3b26e8f
--- /dev/null
@@ -0,0 +1,20 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=15
+GoStatus dt=1 g=1 m=0 gstatus=3
+ProcStatus dt=1 p=1 pstatus=2
+ProcStart dt=1 p=1 p_seq=1
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=18
+ProcStatus dt=1 p=2 pstatus=1
+GoStatus dt=1 g=2 m=1 gstatus=2
+ProcStatus dt=1 p=0 pstatus=4
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-gen-boundary.test
new file mode 100644 (file)
index 0000000..133d8a5
--- /dev/null
@@ -0,0 +1,19 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=11
+ProcStatus dt=1 p=1 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=3
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=18
+ProcStatus dt=1 p=2 pstatus=1
+GoStatus dt=1 g=2 m=1 gstatus=2
+ProcStatus dt=1 p=0 pstatus=4
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc-bare-m.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc-bare-m.test
new file mode 100644 (file)
index 0000000..638cc0d
--- /dev/null
@@ -0,0 +1,19 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=22
+ProcStatus dt=1 p=1 pstatus=2
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+GoSyscallBegin dt=1 stack=0
+ProcStart dt=1 p=1 p_seq=1
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=5
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-reacquire-new-proc.test
new file mode 100644 (file)
index 0000000..78f20e5
--- /dev/null
@@ -0,0 +1,21 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=22
+ProcStatus dt=1 p=1 pstatus=2
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+GoSyscallBegin dt=1 stack=0
+ProcStart dt=1 p=1 p_seq=1
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=14
+ProcStatus dt=1 p=2 pstatus=1
+GoStatus dt=1 g=2 m=1 gstatus=2
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple-bare-m.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple-bare-m.test
new file mode 100644 (file)
index 0000000..fe2d089
--- /dev/null
@@ -0,0 +1,17 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=14
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+GoSyscallBegin dt=1 stack=0
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=5
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-simple.test
new file mode 100644 (file)
index 0000000..2b33dce
--- /dev/null
@@ -0,0 +1,19 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=14
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+GoSyscallBegin dt=1 stack=0
+GoSyscallEndBlocked dt=1
+EventBatch gen=1 m=1 time=0 size=14
+ProcStatus dt=1 p=2 pstatus=1
+GoStatus dt=1 g=2 m=1 gstatus=2
+ProcSteal dt=1 p=0 p_seq=1 m=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-sitting-in-syscall.test b/src/internal/trace/v2/testdata/tests/go122-syscall-steal-proc-sitting-in-syscall.test
new file mode 100644 (file)
index 0000000..58c41c5
--- /dev/null
@@ -0,0 +1,15 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=9
+ProcStatus dt=1 p=0 pstatus=4
+ProcSteal dt=1 p=0 p_seq=1 m=1
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+GoStatus dt=1 g=1 m=1 gstatus=3
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testdata/tests/go122-task-across-generations.test b/src/internal/trace/v2/testdata/tests/go122-task-across-generations.test
new file mode 100644 (file)
index 0000000..0b8abd7
--- /dev/null
@@ -0,0 +1,26 @@
+-- expect --
+SUCCESS
+-- trace --
+Trace Go1.22
+EventBatch gen=1 m=0 time=0 size=15
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+UserTaskBegin dt=1 task=2 parent_task=0 name_string=1 stack=0
+EventBatch gen=1 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=1 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=1 m=18446744073709551615 time=0 size=11
+Strings
+String id=1
+       data="my task"
+EventBatch gen=2 m=0 time=5 size=13
+ProcStatus dt=1 p=0 pstatus=1
+GoStatus dt=1 g=1 m=0 gstatus=2
+UserTaskEnd dt=1 task=2 stack=0
+EventBatch gen=2 m=18446744073709551615 time=0 size=5
+Frequency freq=15625000
+EventBatch gen=2 m=18446744073709551615 time=0 size=1
+Stacks
+EventBatch gen=2 m=18446744073709551615 time=0 size=1
+Strings
diff --git a/src/internal/trace/v2/testtrace/expectation.go b/src/internal/trace/v2/testtrace/expectation.go
new file mode 100644 (file)
index 0000000..3e5394a
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testtrace
+
+import (
+       "bufio"
+       "bytes"
+       "fmt"
+       "regexp"
+       "strconv"
+       "strings"
+)
+
+// Expectation represents the expected result of some operation.
+type Expectation struct {
+       failure      bool
+       errorMatcher *regexp.Regexp
+}
+
+// ExpectSuccess returns an Expectation that trivially expects success.
+func ExpectSuccess() *Expectation {
+       return new(Expectation)
+}
+
+// Check validates whether err conforms to the expectation. Returns
+// an error if it does not conform.
+//
+// Conformance means that if failure is true, then err must be non-nil.
+// If err is non-nil, then it must match errorMatcher.
+func (e *Expectation) Check(err error) error {
+       if !e.failure && err != nil {
+               return fmt.Errorf("unexpected error while reading the trace: %v", err)
+       }
+       if e.failure && err == nil {
+               return fmt.Errorf("expected error while reading the trace: want something matching %q, got none", e.errorMatcher)
+       }
+       if e.failure && err != nil && !e.errorMatcher.MatchString(err.Error()) {
+               return fmt.Errorf("unexpected error while reading the trace: want something matching %q, got %s", e.errorMatcher, err.Error())
+       }
+       return nil
+}
+
+// ParseExpectation parses the serialized form of an Expectation.
+func ParseExpectation(data []byte) (*Expectation, error) {
+       exp := new(Expectation)
+       s := bufio.NewScanner(bytes.NewReader(data))
+       if s.Scan() {
+               c := strings.SplitN(s.Text(), " ", 2)
+               switch c[0] {
+               case "SUCCESS":
+               case "FAILURE":
+                       exp.failure = true
+                       if len(c) != 2 {
+                               return exp, fmt.Errorf("bad header line for FAILURE: %q", s.Text())
+                       }
+                       matcher, err := parseMatcher(c[1])
+                       if err != nil {
+                               return exp, err
+                       }
+                       exp.errorMatcher = matcher
+               default:
+                       return exp, fmt.Errorf("bad header line: %q", s.Text())
+               }
+               return exp, nil
+       }
+       return exp, s.Err()
+}
+
+func parseMatcher(quoted string) (*regexp.Regexp, error) {
+       pattern, err := strconv.Unquote(quoted)
+       if err != nil {
+               return nil, fmt.Errorf("malformed pattern: not correctly quoted: %s: %v", quoted, err)
+       }
+       matcher, err := regexp.Compile(pattern)
+       if err != nil {
+               return nil, fmt.Errorf("malformed pattern: not a valid regexp: %s: %v", pattern, err)
+       }
+       return matcher, nil
+}
diff --git a/src/internal/trace/v2/testtrace/format.go b/src/internal/trace/v2/testtrace/format.go
new file mode 100644 (file)
index 0000000..2e2e975
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testtrace
+
+import (
+       "bytes"
+       "fmt"
+       "internal/trace/v2/raw"
+       "internal/txtar"
+       "io"
+)
+
+// ParseFile parses a test file generated by the testgen package.
+func ParseFile(testPath string) (io.Reader, *Expectation, error) {
+       ar, err := txtar.ParseFile(testPath)
+       if err != nil {
+               return nil, nil, fmt.Errorf("failed to read test file for %s: %v", testPath, err)
+       }
+       if len(ar.Files) != 2 {
+               return nil, nil, fmt.Errorf("malformed test %s: wrong number of files", testPath)
+       }
+       if ar.Files[0].Name != "expect" {
+               return nil, nil, fmt.Errorf("malformed test %s: bad filename %s", testPath, ar.Files[0].Name)
+       }
+       if ar.Files[1].Name != "trace" {
+               return nil, nil, fmt.Errorf("malformed test %s: bad filename %s", testPath, ar.Files[1].Name)
+       }
+       tr, err := raw.NewTextReader(bytes.NewReader(ar.Files[1].Data))
+       if err != nil {
+               return nil, nil, fmt.Errorf("malformed test %s: bad trace file: %v", testPath, err)
+       }
+       var buf bytes.Buffer
+       tw, err := raw.NewWriter(&buf, tr.Version())
+       if err != nil {
+               return nil, nil, fmt.Errorf("failed to create trace byte writer: %v", err)
+       }
+       for {
+               ev, err := tr.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       return nil, nil, fmt.Errorf("malformed test %s: bad trace file: %v", testPath, err)
+               }
+               if err := tw.WriteEvent(ev); err != nil {
+                       return nil, nil, fmt.Errorf("internal error during %s: failed to write trace bytes: %v", testPath, err)
+               }
+       }
+       exp, err := ParseExpectation(ar.Files[0].Data)
+       if err != nil {
+               return nil, nil, fmt.Errorf("internal error during %s: failed to parse expectation %q: %v", testPath, string(ar.Files[0].Data), err)
+       }
+       return &buf, exp, nil
+}
diff --git a/src/internal/trace/v2/testtrace/validation.go b/src/internal/trace/v2/testtrace/validation.go
new file mode 100644 (file)
index 0000000..fcbc108
--- /dev/null
@@ -0,0 +1,351 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testtrace
+
+import (
+       "errors"
+       "fmt"
+       "internal/trace/v2"
+       "slices"
+       "strings"
+)
+
+// Validator is a type used for validating a stream of trace.Events.
+type Validator struct {
+       lastTs   trace.Time
+       gs       map[trace.GoID]*goState
+       ps       map[trace.ProcID]*procState
+       ms       map[trace.ThreadID]*schedContext
+       ranges   map[trace.ResourceID][]string
+       tasks    map[trace.TaskID]string
+       seenSync bool
+}
+
+type schedContext struct {
+       M trace.ThreadID
+       P trace.ProcID
+       G trace.GoID
+}
+
+type goState struct {
+       state   trace.GoState
+       binding *schedContext
+}
+
+type procState struct {
+       state   trace.ProcState
+       binding *schedContext
+}
+
+// NewValidator creates a new Validator.
+func NewValidator() *Validator {
+       return &Validator{
+               gs:     make(map[trace.GoID]*goState),
+               ps:     make(map[trace.ProcID]*procState),
+               ms:     make(map[trace.ThreadID]*schedContext),
+               ranges: make(map[trace.ResourceID][]string),
+               tasks:  make(map[trace.TaskID]string),
+       }
+}
+
+// Event validates ev as the next event in a stream of trace.Events.
+//
+// Returns an error if validation fails.
+func (v *Validator) Event(ev trace.Event) error {
+       e := new(errAccumulator)
+
+       // Validate timestamp order.
+       if v.lastTs != 0 {
+               if ev.Time() <= v.lastTs {
+                       e.Errorf("timestamp out-of-order for %+v", ev)
+               } else {
+                       v.lastTs = ev.Time()
+               }
+       } else {
+               v.lastTs = ev.Time()
+       }
+
+       // Validate event stack.
+       checkStack(e, ev.Stack())
+
+       switch ev.Kind() {
+       case trace.EventSync:
+               // Just record that we've seen a Sync at some point.
+               v.seenSync = true
+       case trace.EventMetric:
+               m := ev.Metric()
+               if !strings.Contains(m.Name, ":") {
+                       // Should have a ":" as per runtime/metrics convention.
+                       e.Errorf("invalid metric name %q", m.Name)
+               }
+               // Make sure the value is OK.
+               if m.Value.Kind() == trace.ValueBad {
+                       e.Errorf("invalid value")
+               }
+               switch m.Value.Kind() {
+               case trace.ValueUint64:
+                       // Just make sure it doesn't panic.
+                       _ = m.Value.Uint64()
+               }
+       case trace.EventLabel:
+               l := ev.Label()
+
+               // Check label.
+               if l.Label == "" {
+                       e.Errorf("invalid label %q", l.Label)
+               }
+
+               // Check label resource.
+               if l.Resource.Kind == trace.ResourceNone {
+                       e.Errorf("label resource none")
+               }
+               switch l.Resource.Kind {
+               case trace.ResourceGoroutine:
+                       id := l.Resource.Goroutine()
+                       if _, ok := v.gs[id]; !ok {
+                               e.Errorf("label for invalid goroutine %d", id)
+                       }
+               case trace.ResourceProc:
+                       id := l.Resource.Proc()
+                       if _, ok := v.ps[id]; !ok {
+                               e.Errorf("label for invalid proc %d", id)
+                       }
+               case trace.ResourceThread:
+                       id := l.Resource.Thread()
+                       if _, ok := v.ms[id]; !ok {
+                               e.Errorf("label for invalid thread %d", id)
+                       }
+               }
+       case trace.EventStackSample:
+               // Not much to check here. It's basically a sched context and a stack.
+               // The sched context is also not guaranteed to align with other events.
+               // We already checked the stack above.
+       case trace.EventStateTransition:
+               // Validate state transitions.
+               //
+               // TODO(mknyszek): A lot of logic is duplicated between goroutines and procs.
+               // The two are intentionally handled identically; from the perspective of the
+               // API, resources all have the same general properties. Consider making this
+               // code generic over resources and implementing validation just once.
+               tr := ev.StateTransition()
+               checkStack(e, tr.Stack)
+               switch tr.Resource.Kind {
+               case trace.ResourceGoroutine:
+                       // Basic state transition validation.
+                       id := tr.Resource.Goroutine()
+                       old, new := tr.Goroutine()
+                       if new == trace.GoUndetermined {
+                               e.Errorf("transition to undetermined state for goroutine %d", id)
+                       }
+                       if v.seenSync && old == trace.GoUndetermined {
+                               e.Errorf("undetermined goroutine %d after first global sync", id)
+                       }
+                       if new == trace.GoNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
+                               e.Errorf("goroutine %d died with active ranges", id)
+                       }
+                       state, ok := v.gs[id]
+                       if ok {
+                               if old != state.state {
+                                       e.Errorf("bad old state for goroutine %d: got %s, want %s", id, old, state.state)
+                               }
+                               state.state = new
+                       } else {
+                               if old != trace.GoUndetermined && old != trace.GoNotExist {
+                                       e.Errorf("bad old state for unregistered goroutine %d: %s", id, old)
+                               }
+                               state = &goState{state: new}
+                               v.gs[id] = state
+                       }
+                       // Validate sched context.
+                       if new.Executing() {
+                               ctx := v.getOrCreateThread(e, ev.Thread())
+                               if ctx != nil {
+                                       if ctx.G != trace.NoGoroutine && ctx.G != id {
+                                               e.Errorf("tried to run goroutine %d when one was already executing (%d) on thread %d", id, ctx.G, ev.Thread())
+                                       }
+                                       ctx.G = id
+                                       state.binding = ctx
+                               }
+                       } else if old.Executing() && !new.Executing() {
+                               ctx := state.binding
+                               if ctx != nil {
+                                       if ctx.G != id {
+                                               e.Errorf("tried to stop goroutine %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.G, ev.Thread())
+                                       }
+                                       ctx.G = trace.NoGoroutine
+                                       state.binding = nil
+                               } else {
+                                       e.Errorf("stopping goroutine %d not bound to any active context", id)
+                               }
+                       }
+               case trace.ResourceProc:
+                       // Basic state transition validation.
+                       id := tr.Resource.Proc()
+                       old, new := tr.Proc()
+                       if new == trace.ProcUndetermined {
+                               e.Errorf("transition to undetermined state for proc %d", id)
+                       }
+                       if v.seenSync && old == trace.ProcUndetermined {
+                               e.Errorf("undetermined proc %d after first global sync", id)
+                       }
+                       if new == trace.ProcNotExist && v.hasAnyRange(trace.MakeResourceID(id)) {
+                               e.Errorf("proc %d died with active ranges", id)
+                       }
+                       state, ok := v.ps[id]
+                       if ok {
+                               if old != state.state {
+                                       e.Errorf("bad old state for proc %d: got %s, want %s", id, old, state.state)
+                               }
+                               state.state = new
+                       } else {
+                               if old != trace.ProcUndetermined && old != trace.ProcNotExist {
+                                       e.Errorf("bad old state for unregistered proc %d: %s", id, old)
+                               }
+                               state = &procState{state: new}
+                               v.ps[id] = state
+                       }
+                       // Validate sched context.
+                       if new.Executing() {
+                               ctx := v.getOrCreateThread(e, ev.Thread())
+                               if ctx != nil {
+                                       if ctx.P != trace.NoProc && ctx.P != id {
+                                               e.Errorf("tried to run proc %d when one was already executing (%d) on thread %d", id, ctx.P, ev.Thread())
+                                       }
+                                       ctx.P = id
+                                       state.binding = ctx
+                               }
+                       } else if old.Executing() && !new.Executing() {
+                               ctx := state.binding
+                               if ctx != nil {
+                                       if ctx.P != id {
+                                               e.Errorf("tried to stop proc %d when it wasn't currently executing (currently executing %d) on thread %d", id, ctx.P, ev.Thread())
+                                       }
+                                       ctx.P = trace.NoProc
+                                       state.binding = nil
+                               } else {
+                                       e.Errorf("stopping proc %d not bound to any active context", id)
+                               }
+                       }
+               }
+       case trace.EventRangeBegin, trace.EventRangeActive, trace.EventRangeEnd:
+               // Validate ranges.
+               r := ev.Range()
+               switch ev.Kind() {
+               case trace.EventRangeBegin:
+                       if v.hasRange(r.Scope, r.Name) {
+                               e.Errorf("already active range %q on %v begun again", r.Name, r.Scope)
+                       }
+                       v.addRange(r.Scope, r.Name)
+               case trace.EventRangeActive:
+                       if !v.hasRange(r.Scope, r.Name) {
+                               v.addRange(r.Scope, r.Name)
+                       }
+               case trace.EventRangeEnd:
+                       if !v.hasRange(r.Scope, r.Name) {
+                               e.Errorf("inactive range %q on %v ended", r.Name, r.Scope)
+                       }
+                       v.deleteRange(r.Scope, r.Name)
+               }
+       case trace.EventTaskBegin:
+               // Validate task begin.
+               t := ev.Task()
+               if t.ID == trace.NoTask {
+                       e.Errorf("found invalid task ID for task of type %s", t.Type)
+               }
+               // N.B. Don't check the task type. Empty string is a valid task type.
+               v.tasks[t.ID] = t.Type
+       case trace.EventTaskEnd:
+               // Validate task end.
+               // We can see a task end without a begin, so ignore a task without information.
+               // Instead, if we've seen the task begin, just make sure the task end lines up.
+               t := ev.Task()
+               if typ, ok := v.tasks[t.ID]; ok {
+                       if t.Type != typ {
+                               e.Errorf("task end type %q doesn't match task start type %q for task %d", t.Type, typ, t.ID)
+                       }
+                       delete(v.tasks, t.ID)
+               }
+       case trace.EventLog:
+               // There's really not much here to check, except that we can
+               // generate a Log. The category and message are entirely user-created,
+               // so we can't make any assumptions as to what they are. We also
+               // can't validate the task, because proving the task's existence is very
+               // much best-effort.
+               _ = ev.Log()
+       }
+       return e.Errors()
+}
+
+func (v *Validator) hasRange(r trace.ResourceID, name string) bool {
+       ranges, ok := v.ranges[r]
+       return ok && slices.Contains(ranges, name)
+}
+
+func (v *Validator) addRange(r trace.ResourceID, name string) {
+       ranges, _ := v.ranges[r]
+       ranges = append(ranges, name)
+       v.ranges[r] = ranges
+}
+
+func (v *Validator) hasAnyRange(r trace.ResourceID) bool {
+       ranges, ok := v.ranges[r]
+       return ok && len(ranges) != 0
+}
+
+func (v *Validator) deleteRange(r trace.ResourceID, name string) {
+       ranges, ok := v.ranges[r]
+       if !ok {
+               return
+       }
+       i := slices.Index(ranges, name)
+       if i < 0 {
+               return
+       }
+       v.ranges[r] = slices.Delete(ranges, i, i+1)
+}
+
+func (v *Validator) getOrCreateThread(e *errAccumulator, m trace.ThreadID) *schedContext {
+       if m == trace.NoThread {
+               e.Errorf("must have thread, but thread ID is none")
+               return nil
+       }
+       s, ok := v.ms[m]
+       if !ok {
+               s = &schedContext{M: m, P: trace.NoProc, G: trace.NoGoroutine}
+               v.ms[m] = s
+               return s
+       }
+       return s
+}
+
+func checkStack(e *errAccumulator, stk trace.Stack) {
+       // Check for non-empty values, but we also check for crashes due to incorrect validation.
+       i := 0
+       stk.Frames(func(f trace.StackFrame) bool {
+               if i == 0 {
+                       // Allow for one fully zero stack.
+                       //
+                       // TODO(mknyszek): Investigate why that happens.
+                       return true
+               }
+               if f.Func == "" || f.File == "" || f.PC == 0 || f.Line == 0 {
+                       e.Errorf("invalid stack frame %#v: missing information", f)
+               }
+               i++
+               return true
+       })
+}
+
+type errAccumulator struct {
+       errs []error
+}
+
+func (e *errAccumulator) Errorf(f string, args ...any) {
+       e.errs = append(e.errs, fmt.Errorf(f, args...))
+}
+
+func (e *errAccumulator) Errors() error {
+       return errors.Join(e.errs...)
+}
diff --git a/src/internal/trace/v2/trace_test.go b/src/internal/trace/v2/trace_test.go
new file mode 100644 (file)
index 0000000..af0d639
--- /dev/null
@@ -0,0 +1,583 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+       "bufio"
+       "bytes"
+       "fmt"
+       "internal/testenv"
+       "internal/trace/v2"
+       "internal/trace/v2/testtrace"
+       "io"
+       "os"
+       "path/filepath"
+       "runtime"
+       "strings"
+       "testing"
+)
+
+func TestTraceAnnotations(t *testing.T) {
+       testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) {
+               type evDesc struct {
+                       kind trace.EventKind
+                       task trace.TaskID
+                       args []string
+               }
+               want := []evDesc{
+                       {trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}},
+                       {trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}},
+                       {trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}},
+                       {trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}},
+                       {trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}},
+                       {trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}},
+                       {trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}},
+                       //  Currently, pre-existing region is not recorded to avoid allocations.
+                       {trace.EventRegionBegin, trace.NoTask, []string{"post-existing region"}},
+               }
+               r, err := trace.NewReader(bytes.NewReader(tb))
+               if err != nil {
+                       t.Error(err)
+               }
+               for {
+                       ev, err := r.ReadEvent()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       for i, wantEv := range want {
+                               if wantEv.kind != ev.Kind() {
+                                       continue
+                               }
+                               match := false
+                               switch ev.Kind() {
+                               case trace.EventTaskBegin, trace.EventTaskEnd:
+                                       task := ev.Task()
+                                       match = task.ID == wantEv.task && task.Type == wantEv.args[0]
+                               case trace.EventRegionBegin, trace.EventRegionEnd:
+                                       reg := ev.Region()
+                                       match = reg.Task == wantEv.task && reg.Type == wantEv.args[0]
+                               case trace.EventLog:
+                                       log := ev.Log()
+                                       match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1]
+                               }
+                               if match {
+                                       want[i] = want[len(want)-1]
+                                       want = want[:len(want)-1]
+                                       break
+                               }
+                       }
+               }
+               if len(want) != 0 {
+                       for _, ev := range want {
+                               t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args)
+                       }
+               }
+       })
+}
+
+func TestTraceAnnotationsStress(t *testing.T) {
+       testTraceProg(t, "annotations-stress.go", nil)
+}
+
+func TestTraceCgoCallback(t *testing.T) {
+       switch runtime.GOOS {
+       case "plan9", "windows":
+               t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS)
+       }
+       testTraceProg(t, "cgo-callback.go", nil)
+}
+
+func TestTraceCPUProfile(t *testing.T) {
+       testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) {
+               // Parse stderr which has a CPU profile summary, if everything went well.
+               // (If it didn't, we shouldn't even make it here.)
+               scanner := bufio.NewScanner(bytes.NewReader(stderr))
+               pprofSamples := 0
+               pprofStacks := make(map[string]int)
+               for scanner.Scan() {
+                       var stack string
+                       var samples int
+                       _, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples)
+                       if err != nil {
+                               t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr)
+                       }
+                       pprofStacks[stack] = samples
+                       pprofSamples += samples
+               }
+               if err := scanner.Err(); err != nil {
+                       t.Fatalf("failed to parse CPU profile summary in stderr: %v", err)
+               }
+               if pprofSamples == 0 {
+                       t.Skip("CPU profile did not include any samples while tracing was active")
+               }
+
+               // Examine the execution tracer's view of the CPU profile samples. Filter it
+               // to only include samples from the single test goroutine. Use the goroutine
+               // ID that was recorded in the events: that should reflect getg().m.curg,
+               // same as the profiler's labels (even when the M is using its g0 stack).
+               totalTraceSamples := 0
+               traceSamples := 0
+               traceStacks := make(map[string]int)
+               r, err := trace.NewReader(bytes.NewReader(tb))
+               if err != nil {
+                       t.Error(err)
+               }
+               var hogRegion *trace.Event
+               var hogRegionClosed bool
+               for {
+                       ev, err := r.ReadEvent()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" {
+                               hogRegion = &ev
+                       }
+                       if ev.Kind() == trace.EventStackSample {
+                               totalTraceSamples++
+                               if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() {
+                                       traceSamples++
+                                       var fns []string
+                                       ev.Stack().Frames(func(frame trace.StackFrame) bool {
+                                               if frame.Func != "runtime.goexit" {
+                                                       fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line))
+                                               }
+                                               return true
+                                       })
+                                       stack := strings.Join(fns, "|")
+                                       traceStacks[stack]++
+                               }
+                       }
+                       if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" {
+                               hogRegionClosed = true
+                       }
+               }
+               if hogRegion == nil {
+                       t.Fatalf("execution trace did not identify cpuHogger goroutine")
+               } else if !hogRegionClosed {
+                       t.Fatalf("execution trace did not close cpuHogger region")
+               }
+
+               // The execution trace may drop CPU profile samples if the profiling buffer
+               // overflows. Based on the size of profBufWordCount, that takes a bit over
+               // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
+               // hit that case, then we definitely have at least one full buffer's worth
+               // of CPU samples, so we'll call that success.
+               overflowed := totalTraceSamples >= 1900
+               if traceSamples < pprofSamples {
+                       t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
+                       if !overflowed {
+                               t.Fail()
+                       }
+               }
+
+               for stack, traceSamples := range traceStacks {
+                       pprofSamples := pprofStacks[stack]
+                       delete(pprofStacks, stack)
+                       if traceSamples < pprofSamples {
+                               t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
+                                       stack, pprofSamples, traceSamples)
+                               if !overflowed {
+                                       t.Fail()
+                               }
+                       }
+               }
+               for stack, pprofSamples := range pprofStacks {
+                       t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
+                       if !overflowed {
+                               t.Fail()
+                       }
+               }
+
+               if t.Failed() {
+                       t.Logf("execution trace CPU samples:")
+                       for stack, samples := range traceStacks {
+                               t.Logf("%d: %q", samples, stack)
+                       }
+                       t.Logf("CPU profile:\n%s", stderr)
+               }
+       })
+}
+
+func TestTraceFutileWakeup(t *testing.T) {
+       testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) {
+               // Check to make sure that no goroutine in the "special" trace region
+               // ends up blocking, unblocking, then immediately blocking again.
+               //
+               // The goroutines are careful to call runtime.GoSched in between blocking,
+               // so there should never be a clean block/unblock on the goroutine unless
+               // the runtime was generating extraneous events.
+               const (
+                       entered = iota
+                       blocked
+                       runnable
+                       running
+               )
+               gs := make(map[trace.GoID]int)
+               seenSpecialGoroutines := false
+               r, err := trace.NewReader(bytes.NewReader(tb))
+               if err != nil {
+                       t.Error(err)
+               }
+               for {
+                       ev, err := r.ReadEvent()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       // Only track goroutines in the special region we control, so runtime
+                       // goroutines don't interfere (it's totally valid in traces for a
+                       // goroutine to block, run, and block again; that's not what we care about).
+                       if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" {
+                               seenSpecialGoroutines = true
+                               gs[ev.Goroutine()] = entered
+                       }
+                       if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" {
+                               delete(gs, ev.Goroutine())
+                       }
+                       // Track state transitions for goroutines we care about.
+                       //
+                       // The goroutines we care about will advance through the state machine
+                       // of entered -> blocked -> runnable -> running. If in the running state
+                       // we block, then we have a futile wakeup. Because of the runtime.Gosched
+                       // on these specially marked goroutines, we should end up back in runnable
+                       // first. If at any point we go to a different state, switch back to entered
+                       // and wait for the next time the goroutine blocks.
+                       if ev.Kind() != trace.EventStateTransition {
+                               continue
+                       }
+                       st := ev.StateTransition()
+                       if st.Resource.Kind != trace.ResourceGoroutine {
+                               continue
+                       }
+                       id := st.Resource.Goroutine()
+                       state, ok := gs[id]
+                       if !ok {
+                               continue
+                       }
+                       _, new := st.Goroutine()
+                       switch state {
+                       case entered:
+                               if new == trace.GoWaiting {
+                                       state = blocked
+                               } else {
+                                       state = entered
+                               }
+                       case blocked:
+                               if new == trace.GoRunnable {
+                                       state = runnable
+                               } else {
+                                       state = entered
+                               }
+                       case runnable:
+                               if new == trace.GoRunning {
+                                       state = running
+                               } else {
+                                       state = entered
+                               }
+                       case running:
+                               if new == trace.GoWaiting {
+                                       t.Fatalf("found futile wakeup on goroutine %d", id)
+                               } else {
+                                       state = entered
+                               }
+                       }
+                       gs[id] = state
+               }
+               if !seenSpecialGoroutines {
+                       t.Fatal("did not see a goroutine in a the region 'special'")
+               }
+       })
+}
+
+func TestTraceGCStress(t *testing.T) {
+       testTraceProg(t, "gc-stress.go", nil)
+}
+
+func TestTraceGOMAXPROCS(t *testing.T) {
+       testTraceProg(t, "gomaxprocs.go", nil)
+}
+
+func TestTraceStacks(t *testing.T) {
+       testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) {
+               type frame struct {
+                       fn   string
+                       line int
+               }
+               type evDesc struct {
+                       kind   trace.EventKind
+                       match  string
+                       frames []frame
+               }
+               // mainLine is the line number of `func main()` in testprog/stacks.go.
+               const mainLine = 21
+               want := []evDesc{
+                       {trace.EventStateTransition, "Goroutine Running->Runnable", []frame{
+                               {"main.main", mainLine + 82},
+                       }},
+                       {trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{
+                               {"main.main", mainLine + 11},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.block", 0},
+                               {"main.main.func1", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.chansend1", 0},
+                               {"main.main.func2", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.chanrecv1", 0},
+                               {"main.main.func3", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.chanrecv1", 0},
+                               {"main.main.func4", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"runtime.chansend1", 0},
+                               {"main.main", mainLine + 84},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.chansend1", 0},
+                               {"main.main.func5", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"runtime.chanrecv1", 0},
+                               {"main.main", mainLine + 85},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"runtime.selectgo", 0},
+                               {"main.main.func6", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"runtime.selectgo", 0},
+                               {"main.main", mainLine + 86},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"sync.(*Mutex).Lock", 0},
+                               {"main.main.func7", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"sync.(*Mutex).Unlock", 0},
+                               {"main.main", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"sync.(*WaitGroup).Wait", 0},
+                               {"main.main.func8", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"sync.(*WaitGroup).Add", 0},
+                               {"sync.(*WaitGroup).Done", 0},
+                               {"main.main", mainLine + 91},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"sync.(*Cond).Wait", 0},
+                               {"main.main.func9", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+                               {"sync.(*Cond).Signal", 0},
+                               {"main.main", 0},
+                       }},
+                       {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                               {"time.Sleep", 0},
+                               {"main.main", 0},
+                       }},
+                       {trace.EventMetric, "/sched/gomaxprocs:threads", []frame{
+                               {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+                               {"runtime.startTheWorldGC", 0},
+                               {"runtime.GOMAXPROCS", 0},
+                               {"main.main", 0},
+                       }},
+               }
+               if !stress {
+                       // Only check for this stack if !stress because traceAdvance alone could
+                       // allocate enough memory to trigger a GC if called frequently enough.
+                       // This might cause the runtime.GC call we're trying to match against to
+                       // coalesce with an active GC triggered this by traceAdvance. In that case
+                       // we won't have an EventRangeBegin event that matches the stace trace we're
+                       // looking for, since runtime.GC will not have triggered the GC.
+                       gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{
+                               {"runtime.GC", 0},
+                               {"main.main", 0},
+                       }}
+                       want = append(want, gcEv)
+               }
+               if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
+                       want = append(want, []evDesc{
+                               {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+                                       {"internal/poll.(*FD).Accept", 0},
+                                       {"net.(*netFD).accept", 0},
+                                       {"net.(*TCPListener).accept", 0},
+                                       {"net.(*TCPListener).Accept", 0},
+                                       {"main.main.func10", 0},
+                               }},
+                               {trace.EventStateTransition, "Goroutine Running->Syscall", []frame{
+                                       {"syscall.read", 0},
+                                       {"syscall.Read", 0},
+                                       {"internal/poll.ignoringEINTRIO", 0},
+                                       {"internal/poll.(*FD).Read", 0},
+                                       {"os.(*File).read", 0},
+                                       {"os.(*File).Read", 0},
+                                       {"main.main.func11", 0},
+                               }},
+                       }...)
+               }
+               stackMatches := func(stk trace.Stack, frames []frame) bool {
+                       i := 0
+                       match := true
+                       stk.Frames(func(f trace.StackFrame) bool {
+                               if f.Func != frames[i].fn {
+                                       match = false
+                                       return false
+                               }
+                               if line := uint64(frames[i].line); line != 0 && line != f.Line {
+                                       match = false
+                                       return false
+                               }
+                               i++
+                               return true
+                       })
+                       return match
+               }
+               r, err := trace.NewReader(bytes.NewReader(tb))
+               if err != nil {
+                       t.Error(err)
+               }
+               for {
+                       ev, err := r.ReadEvent()
+                       if err == io.EOF {
+                               break
+                       }
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       for i, wantEv := range want {
+                               if wantEv.kind != ev.Kind() {
+                                       continue
+                               }
+                               match := false
+                               switch ev.Kind() {
+                               case trace.EventStateTransition:
+                                       st := ev.StateTransition()
+                                       str := ""
+                                       switch st.Resource.Kind {
+                                       case trace.ResourceGoroutine:
+                                               old, new := st.Goroutine()
+                                               str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new)
+                                       }
+                                       match = str == wantEv.match
+                               case trace.EventRangeBegin:
+                                       rng := ev.Range()
+                                       match = rng.Name == wantEv.match
+                               case trace.EventMetric:
+                                       metric := ev.Metric()
+                                       match = metric.Name == wantEv.match
+                               }
+                               match = match && stackMatches(ev.Stack(), wantEv.frames)
+                               if match {
+                                       want[i] = want[len(want)-1]
+                                       want = want[:len(want)-1]
+                                       break
+                               }
+                       }
+               }
+               if len(want) != 0 {
+                       for _, ev := range want {
+                               t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames)
+                       }
+               }
+       })
+}
+
+func TestTraceStress(t *testing.T) {
+       switch runtime.GOOS {
+       case "js", "wasip1":
+               t.Skip("no os.Pipe on " + runtime.GOOS)
+       }
+       testTraceProg(t, "stress.go", nil)
+}
+
+func TestTraceStressStartStop(t *testing.T) {
+       switch runtime.GOOS {
+       case "js", "wasip1":
+               t.Skip("no os.Pipe on " + runtime.GOOS)
+       }
+       testTraceProg(t, "stress-start-stop.go", nil)
+}
+
+func TestTraceManyStartStop(t *testing.T) {
+       testTraceProg(t, "many-start-stop.go", nil)
+}
+
+func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) {
+       testenv.MustHaveGoRun(t)
+
+       // Check if we're on a builder.
+       onBuilder := testenv.Builder() != ""
+
+       testPath := filepath.Join("./testdata/testprog", progName)
+       testName := progName
+       runTest := func(t *testing.T, stress bool) {
+               // Run the program and capture the trace, which is always written to stdout.
+               cmd := testenv.Command(t, testenv.GoToolPath(t), "run", testPath)
+               cmd.Env = append(os.Environ(), "GOEXPERIMENT=exectracer2")
+               if stress {
+                       // Advance a generation constantly.
+                       cmd.Env = append(cmd.Env, "GODEBUG=traceadvanceperiod=0")
+               }
+               // Capture stdout and stderr.
+               //
+               // The protoocol for these programs is that stdout contains the trace data
+               // and stderr is an expectation in string format.
+               var traceBuf, errBuf bytes.Buffer
+               cmd.Stdout = &traceBuf
+               cmd.Stderr = &errBuf
+               // Run the program.
+               if err := cmd.Run(); err != nil {
+                       if errBuf.Len() != 0 {
+                               t.Logf("stderr: %s", string(errBuf.Bytes()))
+                       }
+                       t.Fatal(err)
+               }
+               tb := traceBuf.Bytes()
+
+               // Test the trace and the parser.
+               testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess())
+
+               // Run some extra validation.
+               if !t.Failed() && extra != nil {
+                       extra(t, tb, errBuf.Bytes(), stress)
+               }
+
+               // Dump some more information on failure.
+               if t.Failed() && onBuilder {
+                       // Dump directly to the test log on the builder, since this
+                       // data is critical for debugging and this is the only way
+                       // we can currently make sure it's retained.
+                       t.Log("found bad trace; dumping to test log...")
+                       t.Log(dumpTraceToText(t, tb))
+               } else if t.Failed() || *dumpTraces {
+                       // We asked to dump the trace or failed. Write the trace to a file.
+                       t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb))
+               }
+       }
+       t.Run("Default", func(t *testing.T) {
+               runTest(t, false)
+       })
+       t.Run("Stress", func(t *testing.T) {
+               if testing.Short() {
+                       t.Skip("skipping trace reader stress tests in short mode")
+               }
+               runTest(t, true)
+       })
+}
diff --git a/src/internal/trace/v2/value.go b/src/internal/trace/v2/value.go
new file mode 100644 (file)
index 0000000..bd2cba7
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "fmt"
+
+// Value is a dynamically-typed value obtained from a trace.
+type Value struct {
+       kind   ValueKind
+       scalar uint64
+}
+
+// ValueKind is the type of a dynamically-typed value from a trace.
+type ValueKind uint8
+
+const (
+       ValueBad ValueKind = iota
+       ValueUint64
+)
+
+// Kind returns the ValueKind of the value.
+//
+// It represents the underlying structure of the value.
+//
+// New ValueKinds may be added in the future. Users of this type must be robust
+// to that possibility.
+func (v Value) Kind() ValueKind {
+       return v.kind
+}
+
+// Uint64 returns the uint64 value for a MetricSampleUint64.
+//
+// Panics if this metric sample's Kind is not MetricSampleUint64.
+func (v Value) Uint64() uint64 {
+       if v.kind != ValueUint64 {
+               panic("Uint64 called on Value of a different Kind")
+       }
+       return v.scalar
+}
+
+// valueAsString produces a debug string value.
+//
+// This isn't just Value.String because we may want to use that to store
+// string values in the future.
+func valueAsString(v Value) string {
+       switch v.Kind() {
+       case ValueUint64:
+               return fmt.Sprintf("Uint64(%d)", v.scalar)
+       }
+       return "Bad"
+}
diff --git a/src/internal/trace/v2/version/version.go b/src/internal/trace/v2/version/version.go
new file mode 100644 (file)
index 0000000..deb8f2b
--- /dev/null
@@ -0,0 +1,52 @@
+package version
+
+import (
+       "fmt"
+       "io"
+
+       "internal/trace/v2/event"
+       "internal/trace/v2/event/go122"
+)
+
+// Version represents the version of a trace file.
+type Version uint32
+
+const (
+       Go122 Version = 22
+)
+
+var versions = map[Version][]event.Spec{
+       Go122: go122.Specs(),
+}
+
+// Specs returns the set of event.Specs for this version.
+func (v Version) Specs() []event.Spec {
+       return versions[v]
+}
+
+func (v Version) Valid() bool {
+       _, ok := versions[v]
+       return ok
+}
+
+// headerFmt is the format of the header of all Go execution traces.
+const headerFmt = "go 1.%d trace\x00\x00\x00"
+
+// ReadHeader reads the version of the trace out of the trace file's
+// header, whose prefix must be present in v.
+func ReadHeader(r io.Reader) (Version, error) {
+       var v Version
+       _, err := fmt.Fscanf(r, headerFmt, &v)
+       if err != nil {
+               return v, fmt.Errorf("bad file format: not a Go execution trace?")
+       }
+       if !v.Valid() {
+               return v, fmt.Errorf("unknown or unsupported trace version go 1.%d", v)
+       }
+       return v, nil
+}
+
+// WriteHeader writes a header for a trace version v to w.
+func WriteHeader(w io.Writer, v Version) (int, error) {
+       return fmt.Fprintf(w, headerFmt, v)
+}
index 5d0750e8f45c82a6757771e64a6ddc3e06d3a778..304f1a75545c78adaf179a72057585bc46f69879 100644 (file)
@@ -8,6 +8,7 @@ package runtime_test
 
 import (
        "fmt"
+       "internal/goexperiment"
        "internal/goos"
        "internal/platform"
        "internal/testenv"
@@ -785,6 +786,9 @@ func TestCgoTraceParser(t *testing.T) {
        case "plan9", "windows":
                t.Skipf("no pthreads on %s", runtime.GOOS)
        }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping test that is covered elsewhere for the new execution tracer")
+       }
        output := runTestProg(t, "testprogcgo", "CgoTraceParser")
        want := "OK\n"
        ErrTimeOrder := "ErrTimeOrder\n"
@@ -801,6 +805,9 @@ func TestCgoTraceParserWithOneProc(t *testing.T) {
        case "plan9", "windows":
                t.Skipf("no pthreads on %s", runtime.GOOS)
        }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping test that is covered elsewhere for the new execution tracer")
+       }
        output := runTestProg(t, "testprogcgo", "CgoTraceParser", "GOMAXPROCS=1")
        want := "OK\n"
        ErrTimeOrder := "ErrTimeOrder\n"
index cc88a659ca1b32ea97c1f6544cae7672ffbcf017..f8db296e6ba67f73290b0cd69be39f68247f4d86 100644 (file)
@@ -188,6 +188,10 @@ It is a comma-separated list of name=val pairs setting these named variables:
        This increases tracer overhead, but could be helpful as a workaround or for
        debugging unexpected regressions caused by frame pointer unwinding.
 
+       traceadvanceperiod: the approximate period in nanoseconds between trace generations. Only
+       applies if a program is built with GOEXPERIMENT=exectracer2. Used primarily for testing
+       and debugging the execution tracer.
+
        asyncpreemptoff: asyncpreemptoff=1 disables signal-based
        asynchronous goroutine preemption. This makes some loops
        non-preemptible for long periods, which may delay GC and
index bf530eeb5bcf101c292263623f4fd5aa28464119..b1d99997947ae4443e5542dab471f5514a3ff2a3 100644 (file)
@@ -213,7 +213,9 @@ func releaseLockRank(rank lockRank) {
        })
 }
 
-// See comment on lockWithRank regarding stack splitting.
+// nosplit because it may be called from nosplit contexts.
+//
+//go:nosplit
 func lockWithRankMayAcquire(l *mutex, rank lockRank) {
        gp := getg()
        if gp.m.locksHeldLen == 0 {
index d015d6dbabaa8035f28b5debad991c9da3840195..b791acd163d67dfb11b9fd783bef96f12840f12b 100644 (file)
@@ -939,6 +939,9 @@ func gcMarkTermination() {
        mp.preemptoff = "gcing"
        mp.traceback = 2
        curgp := mp.curg
+       // N.B. The execution tracer is not aware of this status
+       // transition and handles it specially based on the
+       // wait reason.
        casGToWaiting(curgp, _Grunning, waitReasonGarbageCollection)
 
        // Run gc on the g0 stack. We do this so that the g stack
@@ -1359,6 +1362,10 @@ func gcBgMarkWorker() {
                        // the G stack. However, stack shrinking is
                        // disabled for mark workers, so it is safe to
                        // read from the G stack.
+                       //
+                       // N.B. The execution tracer is not aware of this status
+                       // transition and handles it specially based on the
+                       // wait reason.
                        casGToWaiting(gp, _Grunning, waitReasonGCWorkerActive)
                        switch pp.gcMarkWorkerMode {
                        default:
index 004dc88828a0a4efe7fc3b122583631bcad39a73..95ec069bcfd76628d31e88604bf22a71e3efca4f 100644 (file)
@@ -414,16 +414,47 @@ func gcAssistAlloc(gp *g) {
                return
        }
 
-       traced := false
+       // This extremely verbose boolean indicates whether we've
+       // entered mark assist from the perspective of the tracer.
+       //
+       // In the old tracer, this is just before we call gcAssistAlloc1
+       // *and* tracing is enabled. Because the old tracer doesn't
+       // do any extra tracking, we need to be careful to not emit an
+       // "end" event if there was no corresponding "begin" for the
+       // mark assist.
+       //
+       // In the new tracer, this is just before we call gcAssistAlloc1
+       // *regardless* of whether tracing is enabled. This is because
+       // the new tracer allows for tracing to begin (and advance
+       // generations) in the middle of a GC mark phase, so we need to
+       // record some state so that the tracer can pick it up to ensure
+       // a consistent trace result.
+       //
+       // TODO(mknyszek): Hide the details of inMarkAssist in tracer
+       // functions and simplify all the state tracking. This is a lot.
+       enteredMarkAssistForTracing := false
 retry:
        if gcCPULimiter.limiting() {
                // If the CPU limiter is enabled, intentionally don't
                // assist to reduce the amount of CPU time spent in the GC.
-               if traced {
+               if enteredMarkAssistForTracing {
                        trace := traceAcquire()
                        if trace.ok() {
                                trace.GCMarkAssistDone()
+                               // Set this *after* we trace the end to make sure
+                               // that we emit an in-progress event if this is
+                               // the first event for the goroutine in the trace
+                               // or trace generation. Also, do this between
+                               // acquire/release because this is part of the
+                               // goroutine's trace state, and it must be atomic
+                               // with respect to the tracer.
+                               gp.inMarkAssist = false
                                traceRelease(trace)
+                       } else {
+                               // This state is tracked even if tracing isn't enabled.
+                               // It's only used by the new tracer.
+                               // See the comment on enteredMarkAssistForTracing.
+                               gp.inMarkAssist = false
                        }
                }
                return
@@ -464,22 +495,59 @@ retry:
                if scanWork == 0 {
                        // We were able to steal all of the credit we
                        // needed.
-                       if traced {
+                       if enteredMarkAssistForTracing {
                                trace := traceAcquire()
                                if trace.ok() {
                                        trace.GCMarkAssistDone()
+                                       // Set this *after* we trace the end to make sure
+                                       // that we emit an in-progress event if this is
+                                       // the first event for the goroutine in the trace
+                                       // or trace generation. Also, do this between
+                                       // acquire/release because this is part of the
+                                       // goroutine's trace state, and it must be atomic
+                                       // with respect to the tracer.
+                                       gp.inMarkAssist = false
                                        traceRelease(trace)
+                               } else {
+                                       // This state is tracked even if tracing isn't enabled.
+                                       // It's only used by the new tracer.
+                                       // See the comment on enteredMarkAssistForTracing.
+                                       gp.inMarkAssist = false
                                }
                        }
                        return
                }
        }
-       if traceEnabled() && !traced {
+       if !enteredMarkAssistForTracing {
                trace := traceAcquire()
                if trace.ok() {
-                       traced = true
+                       if !goexperiment.ExecTracer2 {
+                               // In the old tracer, enter mark assist tracing only
+                               // if we actually traced an event. Otherwise a goroutine
+                               // waking up from mark assist post-GC might end up
+                               // writing a stray "end" event.
+                               //
+                               // This means inMarkAssist will not be meaningful
+                               // in the old tracer; that's OK, it's unused.
+                               //
+                               // See the comment on enteredMarkAssistForTracing.
+                               enteredMarkAssistForTracing = true
+                       }
                        trace.GCMarkAssistStart()
+                       // Set this *after* we trace the start, otherwise we may
+                       // emit an in-progress event for an assist we're about to start.
+                       gp.inMarkAssist = true
                        traceRelease(trace)
+               } else {
+                       gp.inMarkAssist = true
+               }
+               if goexperiment.ExecTracer2 {
+                       // In the new tracer, set enter mark assist tracing if we
+                       // ever pass this point, because we must manage inMarkAssist
+                       // correctly.
+                       //
+                       // See the comment on enteredMarkAssistForTracing.
+                       enteredMarkAssistForTracing = true
                }
        }
 
@@ -525,11 +593,24 @@ retry:
                // At this point either background GC has satisfied
                // this G's assist debt, or the GC cycle is over.
        }
-       if traced {
+       if enteredMarkAssistForTracing {
                trace := traceAcquire()
                if trace.ok() {
                        trace.GCMarkAssistDone()
+                       // Set this *after* we trace the end to make sure
+                       // that we emit an in-progress event if this is
+                       // the first event for the goroutine in the trace
+                       // or trace generation. Also, do this between
+                       // acquire/release because this is part of the
+                       // goroutine's trace state, and it must be atomic
+                       // with respect to the tracer.
+                       gp.inMarkAssist = false
                        traceRelease(trace)
+               } else {
+                       // This state is tracked even if tracing isn't enabled.
+                       // It's only used by the new tracer.
+                       // See the comment on enteredMarkAssistForTracing.
+                       gp.inMarkAssist = false
                }
        }
 }
index d0cb998cffd89acaf5185d288a1c73fc8aaf6e5e..4f6e3a3ced2d000789d5bd070fe761f83cc4856c 100644 (file)
@@ -8,6 +8,7 @@ import (
        "internal/abi"
        "internal/cpu"
        "internal/goarch"
+       "internal/goexperiment"
        "internal/goos"
        "runtime/internal/atomic"
        "runtime/internal/sys"
@@ -1344,7 +1345,10 @@ func stopTheWorld(reason stwReason) {
                // must have preempted all goroutines, including any attempting
                // to scan our stack, in which case, any stack shrinking will
                // have already completed by the time we exit.
-               // Don't provide a wait reason because we're still executing.
+               //
+               // N.B. The execution tracer is not aware of this status
+               // transition and handles it specially based on the
+               // wait reason.
                casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld)
                stopTheWorldWithSema(reason)
                casgstatus(gp, _Gwaiting, _Grunning)
@@ -1451,7 +1455,7 @@ func stopTheWorldWithSema(reason stwReason) {
                if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) {
                        if trace.ok() {
                                trace.GoSysBlock(pp)
-                               trace.ProcStop(pp)
+                               trace.ProcSteal(pp, false)
                        }
                        pp.syscalltick++
                        sched.stopwait--
@@ -1772,6 +1776,8 @@ func mexit(osStack bool) {
        }
        throw("m not found in allm")
 found:
+       // Events must not be traced after this point.
+
        // Delay reaping m until it's done with the stack.
        //
        // Put mp on the free list, though it will not be reaped while freeWait
@@ -1781,6 +1787,9 @@ found:
        //
        // Note that the free list must not be linked through alllink because
        // some functions walk allm without locking, so may be using alllink.
+       //
+       // N.B. It's important that the M appears on the free list simultaneously
+       // with it being removed so that the tracer can find it.
        mp.freeWait.Store(freeMWait)
        mp.freelink = sched.freem
        sched.freem = mp
@@ -1904,21 +1913,25 @@ func forEachPInternal(fn func(*p)) {
 
        // Force Ps currently in _Psyscall into _Pidle and hand them
        // off to induce safe point function execution.
-       trace := traceAcquire()
        for _, p2 := range allp {
                s := p2.status
+
+               // We need to be fine-grained about tracing here, since handoffp
+               // might call into the tracer, and the tracer is non-reentrant.
+               trace := traceAcquire()
                if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) {
                        if trace.ok() {
+                               // It's important that we traceRelease before we call handoffp, which may also traceAcquire.
                                trace.GoSysBlock(p2)
-                               trace.ProcStop(p2)
+                               trace.ProcSteal(p2, false)
+                               traceRelease(trace)
                        }
                        p2.syscalltick++
                        handoffp(p2)
+               } else if trace.ok() {
+                       traceRelease(trace)
                }
        }
-       if trace.ok() {
-               traceRelease(trace)
-       }
 
        // Wait for remaining Ps to run fn.
        if wait {
@@ -2016,6 +2029,7 @@ func allocm(pp *p, fn func(), id int64) *m {
                lock(&sched.lock)
                var newList *m
                for freem := sched.freem; freem != nil; {
+                       // Wait for freeWait to indicate that freem's stack is unused.
                        wait := freem.freeWait.Load()
                        if wait == freeMWait {
                                next := freem.freelink
@@ -2024,6 +2038,12 @@ func allocm(pp *p, fn func(), id int64) *m {
                                freem = next
                                continue
                        }
+                       // Drop any remaining trace resources.
+                       // Ms can continue to emit events all the way until wait != freeMWait,
+                       // so it's only safe to call traceThreadDestroy at this point.
+                       if traceEnabled() || traceShuttingDown() {
+                               traceThreadDestroy(freem)
+                       }
                        // Free the stack if needed. For freeMRef, there is
                        // nothing to do except drop freem from the sched.freem
                        // list.
@@ -2162,9 +2182,27 @@ func needm(signal bool) {
        asminit()
        minit()
 
+       // Emit a trace event for this dead -> syscall transition,
+       // but only in the new tracer and only if we're not in a signal handler.
+       //
+       // N.B. the tracer can run on a bare M just fine, we just have
+       // to make sure to do this before setg(nil) and unminit.
+       var trace traceLocker
+       if goexperiment.ExecTracer2 && !signal {
+               trace = traceAcquire()
+       }
+
        // mp.curg is now a real goroutine.
        casgstatus(mp.curg, _Gdead, _Gsyscall)
        sched.ngsys.Add(-1)
+
+       if goexperiment.ExecTracer2 && !signal {
+               if trace.ok() {
+                       trace.GoCreateSyscall(mp.curg)
+                       traceRelease(trace)
+               }
+       }
+       mp.isExtraInSig = signal
 }
 
 // Acquire an extra m and bind it to the C thread when a pthread key has been created.
@@ -2284,11 +2322,57 @@ func dropm() {
        // with no pointer manipulation.
        mp := getg().m
 
+       // Emit a trace event for this syscall -> dead transition,
+       // but only in the new tracer.
+       //
+       // N.B. the tracer can run on a bare M just fine, we just have
+       // to make sure to do this before setg(nil) and unminit.
+       var trace traceLocker
+       if goexperiment.ExecTracer2 && !mp.isExtraInSig {
+               trace = traceAcquire()
+       }
+
        // Return mp.curg to dead state.
        casgstatus(mp.curg, _Gsyscall, _Gdead)
        mp.curg.preemptStop = false
        sched.ngsys.Add(1)
 
+       if goexperiment.ExecTracer2 && !mp.isExtraInSig {
+               if trace.ok() {
+                       trace.GoDestroySyscall()
+                       traceRelease(trace)
+               }
+       }
+
+       if goexperiment.ExecTracer2 {
+               // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore.
+               //
+               // In the new tracer, we model needm and dropm and a goroutine being created and
+               // destroyed respectively. The m then might get reused with a different procid but
+               // still with a reference to oldp, and still with the same syscalltick. The next
+               // time a G is "created" in needm, it'll return and quietly reacquire its P from a
+               // different m with a different procid, which will confuse the trace parser. By
+               // trashing syscalltick, we ensure that it'll appear as if we lost the P to the
+               // tracer parser and that we just reacquired it.
+               //
+               // Trash the value by decrementing because that gets us as far away from the value
+               // the syscall exit code expects as possible. Setting to zero is risky because
+               // syscalltick could already be zero (and in fact, is initialized to zero).
+               mp.syscalltick--
+       }
+
+       // Reset trace state unconditionally. This goroutine is being 'destroyed'
+       // from the perspective of the tracer.
+       mp.curg.trace.reset()
+
+       // Flush all the M's buffers. This is necessary because the M might
+       // be used on a different thread with a different procid, so we have
+       // to make sure we don't write into the same buffer.
+       if traceEnabled() || traceShuttingDown() {
+               traceThreadDestroy(mp)
+       }
+       mp.isExtraInSig = false
+
        // Block signals before unminit.
        // Unminit unregisters the signal handling stack (but needs g on some systems).
        // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
@@ -2982,8 +3066,8 @@ func execute(gp *g, inheritTime bool) {
        if trace.ok() {
                // GoSysExit has to happen when we have a P, but before GoStart.
                // So we emit it here.
-               if gp.syscallsp != 0 {
-                       trace.GoSysExit()
+               if !goexperiment.ExecTracer2 && gp.syscallsp != 0 {
+                       trace.GoSysExit(true)
                }
                trace.GoStart()
                traceRelease(trace)
@@ -4154,7 +4238,7 @@ func save(pc, sp uintptr) {
 // must always point to a valid stack frame. entersyscall below is the normal
 // entry point for syscalls, which obtains the SP and PC from the caller.
 //
-// Syscall tracing:
+// Syscall tracing (old tracer):
 // At the start of a syscall we emit traceGoSysCall to capture the stack trace.
 // If the syscall does not block, that is it, we do not emit any other events.
 // If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
@@ -4264,6 +4348,8 @@ func entersyscall_gcwait() {
                trace := traceAcquire()
                if trace.ok() {
                        trace.GoSysBlock(pp)
+                       // N.B. ProcSteal not necessary because if we succeed we're
+                       // always stopping the P we just put into the syscall status.
                        trace.ProcStop(pp)
                        traceRelease(trace)
                }
@@ -4364,11 +4450,23 @@ func exitsyscall() {
                }
                trace := traceAcquire()
                if trace.ok() {
-                       if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick {
-                               systemstack(func() {
+                       lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick
+                       systemstack(func() {
+                               if goexperiment.ExecTracer2 {
+                                       // Write out syscall exit eagerly in the experiment.
+                                       //
+                                       // It's important that we write this *after* we know whether we
+                                       // lost our P or not (determined by exitsyscallfast).
+                                       trace.GoSysExit(lostP)
+                               }
+                               if lostP {
+                                       // We lost the P at some point, even though we got it back here.
+                                       // Trace that we're starting again, because there was a traceGoSysBlock
+                                       // call somewhere in exitsyscallfast (indicating that this goroutine
+                                       // had blocked) and we're about to start running again.
                                        trace.GoStart()
-                               })
-                       }
+                               }
+                       })
                }
                // There's a cpu for us, so we can run.
                gp.m.p.ptr().syscalltick++
@@ -4399,19 +4497,15 @@ func exitsyscall() {
                return
        }
 
-       trace := traceAcquire()
-       if trace.ok() {
-               // Wait till traceGoSysBlock event is emitted.
-               // This ensures consistency of the trace (the goroutine is started after it is blocked).
-               for oldp != nil && oldp.syscalltick == gp.m.syscalltick {
-                       osyield()
+       if !goexperiment.ExecTracer2 {
+               // In the old tracer, because we don't have a P we can't
+               // actually record the true time we exited the syscall.
+               // Record it.
+               trace := traceAcquire()
+               if trace.ok() {
+                       trace.RecordSyscallExitedTime(gp, oldp)
+                       traceRelease(trace)
                }
-               // We can't trace syscall exit right now because we don't have a P.
-               // Tracing code can invoke write barriers that cannot run without a P.
-               // So instead we remember the syscall exit time and emit the event
-               // in execute when we have a P.
-               gp.trace.sysExitTime = traceClockNow()
-               traceRelease(trace)
        }
 
        gp.m.locks--
@@ -4452,7 +4546,7 @@ func exitsyscallfast(oldp *p) bool {
                var ok bool
                systemstack(func() {
                        ok = exitsyscallfast_pidle()
-                       if ok {
+                       if ok && !goexperiment.ExecTracer2 {
                                trace := traceAcquire()
                                if trace.ok() {
                                        if oldp != nil {
@@ -4462,7 +4556,9 @@ func exitsyscallfast(oldp *p) bool {
                                                        osyield()
                                                }
                                        }
-                                       trace.GoSysExit()
+                                       // In the experiment, we write this in exitsyscall.
+                                       // Don't write it here unless the experiment is off.
+                                       trace.GoSysExit(true)
                                        traceRelease(trace)
                                }
                        }
@@ -4488,10 +4584,17 @@ func exitsyscallfast_reacquired() {
                        // traceGoSysBlock for this syscall was already emitted,
                        // but here we effectively retake the p from the new syscall running on the same p.
                        systemstack(func() {
-                               // Denote blocking of the new syscall.
-                               trace.GoSysBlock(gp.m.p.ptr())
-                               // Denote completion of the current syscall.
-                               trace.GoSysExit()
+                               if goexperiment.ExecTracer2 {
+                                       // In the experiment, we're stealing the P. It's treated
+                                       // as if it temporarily stopped running. Then, start running.
+                                       trace.ProcSteal(gp.m.p.ptr(), true)
+                                       trace.ProcStart()
+                               } else {
+                                       // Denote blocking of the new syscall.
+                                       trace.GoSysBlock(gp.m.p.ptr())
+                                       // Denote completion of the current syscall.
+                                       trace.GoSysExit(true)
+                               }
                                traceRelease(trace)
                        })
                }
@@ -4521,7 +4624,23 @@ func exitsyscallfast_pidle() bool {
 //
 //go:nowritebarrierrec
 func exitsyscall0(gp *g) {
+       var trace traceLocker
+       if goexperiment.ExecTracer2 {
+               traceExitingSyscall()
+               trace = traceAcquire()
+       }
        casgstatus(gp, _Gsyscall, _Grunnable)
+       if goexperiment.ExecTracer2 {
+               traceExitedSyscall()
+               if trace.ok() {
+                       // Write out syscall exit eagerly in the experiment.
+                       //
+                       // It's important that we write this *after* we know whether we
+                       // lost our P or not (determined by exitsyscallfast).
+                       trace.GoSysExit(true)
+                       traceRelease(trace)
+               }
+       }
        dropg()
        lock(&sched.lock)
        var pp *p
@@ -4772,6 +4891,7 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
        }
        newg.goid = pp.goidcache
        pp.goidcache++
+       newg.trace.reset()
        if trace.ok() {
                trace.GoCreate(newg, newg.startpc)
                traceRelease(trace)
@@ -5204,14 +5324,16 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
                cpuprof.add(tagPtr, stk[:n])
 
                gprof := gp
+               var mp *m
                var pp *p
                if gp != nil && gp.m != nil {
                        if gp.m.curg != nil {
                                gprof = gp.m.curg
                        }
+                       mp = gp.m
                        pp = gp.m.p.ptr()
                }
-               traceCPUSample(gprof, pp, stk[:n])
+               traceCPUSample(gprof, mp, pp, stk[:n])
        }
        getg().m.mallocing--
 }
@@ -5580,6 +5702,16 @@ func wirep(pp *p) {
 
 // Disassociate p and the current m.
 func releasep() *p {
+       trace := traceAcquire()
+       if trace.ok() {
+               trace.ProcStop(getg().m.p.ptr())
+               traceRelease(trace)
+       }
+       return releasepNoTrace()
+}
+
+// Disassociate p and the current m without tracing an event.
+func releasepNoTrace() *p {
        gp := getg()
 
        if gp.m.p == 0 {
@@ -5590,11 +5722,6 @@ func releasep() *p {
                print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
                throw("releasep: invalid p state")
        }
-       trace := traceAcquire()
-       if trace.ok() {
-               trace.ProcStop(gp.m.p.ptr())
-               traceRelease(trace)
-       }
        gp.m.p = 0
        pp.m = 0
        pp.status = _Pidle
@@ -5943,7 +6070,7 @@ func retake(now int64) uint32 {
                                trace := traceAcquire()
                                if trace.ok() {
                                        trace.GoSysBlock(pp)
-                                       trace.ProcStop(pp)
+                                       trace.ProcSteal(pp, false)
                                        traceRelease(trace)
                                }
                                n++
index 92a7e021ee23b5728a539eb8f2b2f9b88a38a799..489dcdd79ca4bf7eb8636296645b5eecb5e9e5bc 100644 (file)
@@ -326,6 +326,7 @@ var debug struct {
        harddecommit       int32
        adaptivestackstart int32
        tracefpunwindoff   int32
+       traceadvanceperiod int32
 
        // debug.malloc is used as a combined debug check
        // in the malloc function and should be set
@@ -362,6 +363,7 @@ var dbgvars = []*dbgVar{
        {name: "adaptivestackstart", value: &debug.adaptivestackstart},
        {name: "tracefpunwindoff", value: &debug.tracefpunwindoff},
        {name: "panicnil", atomic: &debug.panicnil},
+       {name: "traceadvanceperiod", value: &debug.traceadvanceperiod},
 }
 
 func parsedebugvars() {
@@ -380,6 +382,7 @@ func parsedebugvars() {
                // Hence, default to MADV_DONTNEED.
                debug.madvdontneed = 1
        }
+       debug.traceadvanceperiod = defaultTraceAdvancePeriod
 
        godebug := gogetenv("GODEBUG")
 
index e7a3d4ed1ba2e4857bcc43fca70e3b33f9e01582..60942dcdb15b097778a30a3c33e5c68d78fafb4a 100644 (file)
@@ -479,6 +479,9 @@ type g struct {
        // park on a chansend or chanrecv. Used to signal an unsafe point
        // for stack shrinking.
        parkingOnChan atomic.Bool
+       // inMarkAssist indicates whether the goroutine is in mark assist.
+       // Used by the execution tracer.
+       inMarkAssist bool
 
        raceignore    int8  // ignore race detection events
        nocgocallback bool  // whether disable callback from C
@@ -572,6 +575,7 @@ type m struct {
        incgo         bool          // m is executing a cgo call
        isextra       bool          // m is an extra m
        isExtraInC    bool          // m is an extra m that is not executing Go code
+       isExtraInSig  bool          // m is an extra m in a signal handler
        freeWait      atomic.Uint32 // Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait)
        fastrand      uint64
        needextram    bool
@@ -1113,6 +1117,8 @@ const (
        waitReasonGCMarkTermination                       // "GC mark termination"
        waitReasonStoppingTheWorld                        // "stopping the world"
        waitReasonFlushProcCaches                         // "flushing proc caches"
+       waitReasonTraceGoroutineStatus                    // "trace goroutine status"
+       waitReasonTraceProcStatus                         // "trace proc status"
 )
 
 var waitReasonStrings = [...]string{
@@ -1149,6 +1155,8 @@ var waitReasonStrings = [...]string{
        waitReasonGCMarkTermination:     "GC mark termination",
        waitReasonStoppingTheWorld:      "stopping the world",
        waitReasonFlushProcCaches:       "flushing proc caches",
+       waitReasonTraceGoroutineStatus:  "trace goroutine status",
+       waitReasonTraceProcStatus:       "trace proc status",
 }
 
 func (w waitReason) String() string {
index fb9195481aa3018b22a614e9954af18ecbff3734..ccc0864ca9d7e04364eac1659f2bc435c87ec80e 100644 (file)
@@ -5,6 +5,7 @@
 package runtime_test
 
 import (
+       "internal/goexperiment"
        "reflect"
        "runtime"
        "testing"
@@ -16,13 +17,18 @@ import (
 func TestSizeof(t *testing.T) {
        const _64bit = unsafe.Sizeof(uintptr(0)) == 8
 
+       g32bit := uintptr(252)
+       if goexperiment.ExecTracer2 {
+               g32bit = uintptr(256)
+       }
+
        var tests = []struct {
                val    any     // type as a value
                _32bit uintptr // size on 32bit platforms
                _64bit uintptr // size on 64bit platforms
        }{
-               {runtime.G{}, 252, 408},   // g, but exported for testing
-               {runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
+               {runtime.G{}, g32bit, 408}, // g, but exported for testing
+               {runtime.Sudog{}, 56, 88},  // sudog, but exported for testing
        }
 
        for _, tt := range tests {
index a3bcdee010695eed1d3fd56ae75d1742ebb70106..48e939e38f5c5cdf09a7caa9dd6da4c9f3a73991 100644 (file)
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !goexperiment.exectracer2
+
 // Go execution tracer.
 // The tracer captures a wide range of execution events like goroutine
 // creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
@@ -198,6 +200,9 @@ type gTraceState struct {
        lastP              puintptr  // last P emitted an event for this goroutine
 }
 
+// Unused; for compatibility with the new tracer.
+func (s *gTraceState) reset() {}
+
 // mTraceState is per-M state for the tracer.
 type mTraceState struct {
        startingTrace  bool // this M is in TraceStart, potentially before traceEnabled is true
@@ -770,6 +775,13 @@ func traceProcFree(pp *p) {
        unlock(&trace.lock)
 }
 
+// ThreadDestroy is a no-op. It exists as a stub to support the new tracer.
+//
+// This must run on the system stack, just to match the new tracer.
+func traceThreadDestroy(_ *m) {
+       // No-op in old tracer.
+}
+
 // traceFullQueue queues buf into queue of full buffers.
 func traceFullQueue(buf traceBufPtr) {
        buf.ptr().link = 0
@@ -902,7 +914,7 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by
 // traceCPUSample writes a CPU profile sample stack to the execution tracer's
 // profiling buffer. It is called from a signal handler, so is limited in what
 // it can do.
-func traceCPUSample(gp *g, pp *p, stk []uintptr) {
+func traceCPUSample(gp *g, _ *m, pp *p, stk []uintptr) {
        if !traceEnabled() {
                // Tracing is usually turned off; don't spend time acquiring the signal
                // lock unless it's active.
@@ -1702,7 +1714,10 @@ func (_ traceLocker) GoSysCall() {
        traceEvent(traceEvGoSysCall, skip)
 }
 
-func (_ traceLocker) GoSysExit() {
+func (_ traceLocker) GoSysExit(lostP bool) {
+       if !lostP {
+               throw("lostP must always be true in the old tracer for GoSysExit")
+       }
        gp := getg().m.curg
        if !gp.trace.tracedSyscallEnter {
                // There was no syscall entry traced for us at all, so there's definitely
@@ -1729,6 +1744,22 @@ func (_ traceLocker) GoSysExit() {
        traceEvent(traceEvGoSysExit, -1, gp.goid, gp.trace.seq, uint64(ts))
 }
 
+// nosplit because it's called from exitsyscall without a P.
+//
+//go:nosplit
+func (_ traceLocker) RecordSyscallExitedTime(gp *g, oldp *p) {
+       // Wait till traceGoSysBlock event is emitted.
+       // This ensures consistency of the trace (the goroutine is started after it is blocked).
+       for oldp != nil && oldp.syscalltick == gp.m.syscalltick {
+               osyield()
+       }
+       // We can't trace syscall exit right now because we don't have a P.
+       // Tracing code can invoke write barriers that cannot run without a P.
+       // So instead we remember the syscall exit time and emit the event
+       // in execute when we have a P.
+       gp.trace.sysExitTime = traceClockNow()
+}
+
 func (_ traceLocker) GoSysBlock(pp *p) {
        // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked,
        // to handle this we temporary employ the P.
@@ -1740,6 +1771,10 @@ func (_ traceLocker) GoSysBlock(pp *p) {
        releasem(mp)
 }
 
+func (t traceLocker) ProcSteal(pp *p, forMe bool) {
+       t.ProcStop(pp)
+}
+
 func (_ traceLocker) HeapAlloc(live uint64) {
        traceEvent(traceEvHeapAlloc, -1, live)
 }
@@ -1858,6 +1893,14 @@ func (tl traceLocker) OneNewExtraM(gp *g) {
        traceEvent(traceEvGoInSyscall, -1, gp.goid)
 }
 
+// Used only in the new tracer.
+func (tl traceLocker) GoCreateSyscall(gp *g) {
+}
+
+// Used only in the new tracer.
+func (tl traceLocker) GoDestroySyscall() {
+}
+
 // traceTime represents a timestamp for the trace.
 type traceTime uint64
 
@@ -1871,3 +1914,12 @@ type traceTime uint64
 func traceClockNow() traceTime {
        return traceTime(cputicks() / traceTimeDiv)
 }
+
+func traceExitingSyscall() {
+}
+
+func traceExitedSyscall() {
+}
+
+// Not used in the old tracer. Defined for compatibility.
+const defaultTraceAdvancePeriod = 0
index 69ea8f2d3b6e63854c624f8c43b3c11ba71e2a57..1bfe28e6b81298de5e82d18d36668c9c0283cfb8 100644 (file)
@@ -8,6 +8,7 @@ import (
        "bytes"
        "context"
        "fmt"
+       "internal/goexperiment"
        "internal/trace"
        "reflect"
        . "runtime/trace"
@@ -42,6 +43,10 @@ func BenchmarkNewTask(b *testing.B) {
 }
 
 func TestUserTaskRegion(t *testing.T) {
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
index be3adc98017e7eba5d3e4323ca75360cb9c05d3c..f427e578dc7d4700af432fa66ddefa3db48242c7 100644 (file)
@@ -7,6 +7,7 @@ package trace_test
 import (
        "bytes"
        "fmt"
+       "internal/goexperiment"
        "internal/testenv"
        "internal/trace"
        "net"
@@ -152,11 +153,11 @@ func TestTraceSymbolize(t *testing.T) {
                        {"runtime/trace_test.TestTraceSymbolize.func1", 0},
                }},
                {trace.EvGoSched, []frame{
-                       {"runtime/trace_test.TestTraceSymbolize", 111},
+                       {"runtime/trace_test.TestTraceSymbolize", 112},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoCreate, []frame{
-                       {"runtime/trace_test.TestTraceSymbolize", 40},
+                       {"runtime/trace_test.TestTraceSymbolize", 41},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoStop, []frame{
@@ -177,7 +178,7 @@ func TestTraceSymbolize(t *testing.T) {
                }},
                {trace.EvGoUnblock, []frame{
                        {"runtime.chansend1", 0},
-                       {"runtime/trace_test.TestTraceSymbolize", 113},
+                       {"runtime/trace_test.TestTraceSymbolize", 114},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoBlockSend, []frame{
@@ -186,7 +187,7 @@ func TestTraceSymbolize(t *testing.T) {
                }},
                {trace.EvGoUnblock, []frame{
                        {"runtime.chanrecv1", 0},
-                       {"runtime/trace_test.TestTraceSymbolize", 114},
+                       {"runtime/trace_test.TestTraceSymbolize", 115},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoBlockSelect, []frame{
@@ -195,7 +196,7 @@ func TestTraceSymbolize(t *testing.T) {
                }},
                {trace.EvGoUnblock, []frame{
                        {"runtime.selectgo", 0},
-                       {"runtime/trace_test.TestTraceSymbolize", 115},
+                       {"runtime/trace_test.TestTraceSymbolize", 116},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoBlockSync, []frame{
@@ -214,7 +215,7 @@ func TestTraceSymbolize(t *testing.T) {
                {trace.EvGoUnblock, []frame{
                        {"sync.(*WaitGroup).Add", 0},
                        {"sync.(*WaitGroup).Done", 0},
-                       {"runtime/trace_test.TestTraceSymbolize", 120},
+                       {"runtime/trace_test.TestTraceSymbolize", 121},
                        {"testing.tRunner", 0},
                }},
                {trace.EvGoBlockCond, []frame{
@@ -289,6 +290,10 @@ func TestTraceSymbolize(t *testing.T) {
 
 func skipTraceSymbolizeTestIfNecessary(t *testing.T) {
        testenv.MustHaveGoBuild(t)
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
index 04a43a05f4535f2f4afbc3233c15135901df5d11..23a8d11c6f26237b7140a08d8703b024b0d00594 100644 (file)
@@ -9,6 +9,7 @@ import (
        "context"
        "flag"
        "fmt"
+       "internal/goexperiment"
        "internal/profile"
        "internal/race"
        "internal/trace"
@@ -41,6 +42,9 @@ func TestEventBatch(t *testing.T) {
        if testing.Short() {
                t.Skip("skipping in short mode")
        }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        // During Start, bunch of records are written to reflect the current
        // snapshot of the program, including state of each goroutines.
        // And some string constants are written to the trace to aid trace
@@ -127,6 +131,10 @@ func TestTrace(t *testing.T) {
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        buf := new(bytes.Buffer)
        if err := Start(buf); err != nil {
                t.Fatalf("failed to start tracing: %v", err)
@@ -194,6 +202,10 @@ func TestTraceStress(t *testing.T) {
        if testing.Short() {
                t.Skip("skipping in -short mode")
        }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
 
        var wg sync.WaitGroup
        done := make(chan bool)
@@ -356,6 +368,10 @@ func TestTraceStressStartStop(t *testing.T) {
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
        outerDone := make(chan bool)
 
@@ -506,6 +522,9 @@ func TestTraceFutileWakeup(t *testing.T) {
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
        buf := new(bytes.Buffer)
        if err := Start(buf); err != nil {
                t.Fatalf("failed to start tracing: %v", err)
@@ -592,6 +611,10 @@ func TestTraceCPUProfile(t *testing.T) {
        if IsEnabled() {
                t.Skip("skipping because -test.trace is set")
        }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
 
        cpuBuf := new(bytes.Buffer)
        if err := pprof.StartCPUProfile(cpuBuf); err != nil {
diff --git a/src/runtime/trace2.go b/src/runtime/trace2.go
new file mode 100644 (file)
index 0000000..7d95eea
--- /dev/null
@@ -0,0 +1,944 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Go execution tracer.
+// The tracer captures a wide range of execution events like goroutine
+// creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
+// changes of heap size, processor start/stop, etc and writes them to a buffer
+// in a compact form. A precise nanosecond-precision timestamp and a stack
+// trace is captured for most events.
+//
+// Tracer invariants (to keep the synchronization making sense):
+// - An m that has a trace buffer must be on either the allm or sched.freem lists.
+// - Any trace buffer mutation must either be happening in traceAdvance or between
+//   a traceAcquire and a subsequent traceRelease.
+// - traceAdvance cannot return until the previous generation's buffers are all flushed.
+//
+// See https://go.dev/issue/60773 for a link to the full design.
+
+package runtime
+
+import (
+       "runtime/internal/atomic"
+       "unsafe"
+)
+
+// Trace state.
+
+// trace is global tracing context.
+var trace struct {
+       // trace.lock must only be acquired on the system stack where
+       // stack splits cannot happen while it is held.
+       lock mutex
+
+       // Trace buffer management.
+       //
+       // First we check the empty list for any free buffers. If not, buffers
+       // are allocated directly from the OS. Once they're filled up and/or
+       // flushed, they end up on the full queue for trace.gen%2.
+       //
+       // The trace reader takes buffers off the full list one-by-one and
+       // places them into reading until they're finished being read from.
+       // Then they're placed onto the empty list.
+       //
+       // Protected by trace.lock.
+       reading       *traceBuf // buffer currently handed off to user
+       empty         *traceBuf // stack of empty buffers
+       full          [2]traceBufQueue
+       workAvailable atomic.Bool
+
+       // State for the trace reader goroutine.
+       //
+       // Protected by trace.lock.
+       readerGen     atomic.Uintptr // the generation the reader is currently reading for
+       flushedGen    atomic.Uintptr // the last completed generation
+       headerWritten bool           // whether ReadTrace has emitted trace header
+
+       // doneSema is used to synchronize the reader and traceAdvance. Specifically,
+       // it notifies traceAdvance that the reader is done with a generation.
+       // Both semaphores are 0 by default (so, acquires block). traceAdvance
+       // attempts to acquire for gen%2 after flushing the last buffers for gen.
+       // Meanwhile the reader releases the sema for gen%2 when it has finished
+       // processing gen.
+       doneSema [2]uint32
+
+       // Trace data tables for deduplicating data going into the trace.
+       // There are 2 of each: one for gen%2, one for 1-gen%2.
+       stackTab  [2]traceStackTable  // maps stack traces to unique ids
+       stringTab [2]traceStringTable // maps strings to unique ids
+
+       // cpuLogRead accepts CPU profile samples from the signal handler where
+       // they're generated. It uses a three-word header to hold the IDs of the P, G,
+       // and M (respectively) that were active at the time of the sample. Because
+       // profBuf uses a record with all zeros in its header to indicate overflow,
+       // we make sure to make the P field always non-zero: The ID of a real P will
+       // start at bit 1, and bit 0 will be set. Samples that arrive while no P is
+       // running (such as near syscalls) will set the first header field to 0b10.
+       // This careful handling of the first header field allows us to store ID of
+       // the active G directly in the second field, even though that will be 0
+       // when sampling g0.
+       //
+       // Initialization and teardown of these fields is protected by traceAdvanceSema.
+       cpuLogRead  *profBuf
+       signalLock  atomic.Uint32           // protects use of the following member, only usable in signal handlers
+       cpuLogWrite atomic.Pointer[profBuf] // copy of cpuLogRead for use in signal handlers, set without signalLock
+       cpuSleep    wakeableSleep
+       cpuLogDone  <-chan struct{}
+       cpuBuf      [2]*traceBuf
+
+       reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil
+
+       // Fast mappings from enumerations to string IDs that are prepopulated
+       // in the trace.
+       markWorkerLabels [2][len(gcMarkWorkerModeStrings)]traceArg
+       goStopReasons    [2][len(traceGoStopReasonStrings)]traceArg
+       goBlockReasons   [2][len(traceBlockReasonStrings)]traceArg
+
+       // Trace generation counter.
+       gen            atomic.Uintptr
+       lastNonZeroGen uintptr // last non-zero value of gen
+
+       // shutdown is set when we are waiting for trace reader to finish after setting gen to 0
+       //
+       // Writes protected by trace.lock.
+       shutdown atomic.Bool
+
+       // Number of goroutines in syscall exiting slow path.
+       exitingSyscall atomic.Int32
+
+       // seqGC is the sequence counter for GC begin/end.
+       //
+       // Mutated only during stop-the-world.
+       seqGC uint64
+}
+
+// Trace public API.
+
+var (
+       traceAdvanceSema  uint32 = 1
+       traceShutdownSema uint32 = 1
+)
+
+// StartTrace enables tracing for the current process.
+// While tracing, the data will be buffered and available via ReadTrace.
+// StartTrace returns an error if tracing is already enabled.
+// Most clients should use the runtime/trace package or the testing package's
+// -test.trace flag instead of calling StartTrace directly.
+func StartTrace() error {
+       if traceEnabled() || traceShuttingDown() {
+               return errorString("tracing is already enabled")
+       }
+       // Block until cleanup of the last trace is done.
+       semacquire(&traceShutdownSema)
+       semrelease(&traceShutdownSema)
+
+       // Hold traceAdvanceSema across trace start, since we'll want it on
+       // the other side of tracing being enabled globally.
+       semacquire(&traceAdvanceSema)
+
+       // Initialize CPU profile -> trace ingestion.
+       traceInitReadCPU()
+
+       // Compute the first generation for this StartTrace.
+       //
+       // Note: we start from the last non-zero generation rather than 1 so we
+       // can avoid resetting all the arrays indexed by gen%2 or gen%3. There's
+       // more than one of each per m, p, and goroutine.
+       firstGen := traceNextGen(trace.lastNonZeroGen)
+
+       // Reset GC sequencer.
+       trace.seqGC = 1
+
+       // Reset trace reader state.
+       trace.headerWritten = false
+       trace.readerGen.Store(firstGen)
+       trace.flushedGen.Store(0)
+
+       // Register some basic strings in the string tables.
+       traceRegisterLabelsAndReasons(firstGen)
+
+       // Stop the world.
+       //
+       // The purpose of stopping the world is to make sure that no goroutine is in a
+       // context where it could emit an event by bringing all goroutines to a safe point
+       // with no opportunity to transition.
+       //
+       // The exception to this rule are goroutines that are concurrently exiting a syscall.
+       // Those will all be forced into the syscalling slow path, and we'll just make sure
+       // that we don't observe any goroutines in that critical section before starting
+       // the world again.
+       //
+       // A good follow-up question to this is why stopping the world is necessary at all
+       // given that we have traceAcquire and traceRelease. Unfortunately, those only help
+       // us when tracing is already active (for performance, so when tracing is off the
+       // tracing seqlock is left untouched). The main issue here is subtle: we're going to
+       // want to obtain a correct starting status for each goroutine, but there are windows
+       // of time in which we could read and emit an incorrect status. Specifically:
+       //
+       //      trace := traceAcquire()
+       //  // <----> problem window
+       //      casgstatus(gp, _Gwaiting, _Grunnable)
+       //      if trace.ok() {
+       //              trace.GoUnpark(gp, 2)
+       //              traceRelease(trace)
+       //      }
+       //
+       // More precisely, if we readgstatus for a gp while another goroutine is in the problem
+       // window and that goroutine didn't observe that tracing had begun, then we might write
+       // a GoStatus(GoWaiting) event for that goroutine, but it won't trace an event marking
+       // the transition from GoWaiting to GoRunnable. The trace will then be broken, because
+       // future events will be emitted assuming the tracer sees GoRunnable.
+       //
+       // In short, what we really need here is to make sure that the next time *any goroutine*
+       // hits a traceAcquire, it sees that the trace is enabled.
+       //
+       // Note also that stopping the world is necessary to make sure sweep-related events are
+       // coherent. Since the world is stopped and sweeps are non-preemptible, we can never start
+       // the world and see an unpaired sweep 'end' event. Other parts of the tracer rely on this.
+       stopTheWorld(stwStartTrace)
+
+       // Prevent sysmon from running any code that could generate events.
+       lock(&sched.sysmonlock)
+
+       // Reset mSyscallID on all Ps while we have them stationary and the trace is disabled.
+       for _, pp := range allp {
+               pp.trace.mSyscallID = -1
+       }
+
+       // Start tracing.
+       //
+       // After this executes, other Ms may start creating trace buffers and emitting
+       // data into them.
+       trace.gen.Store(firstGen)
+
+       // Wait for exitingSyscall to drain.
+       //
+       // It may not monotonically decrease to zero, but in the limit it will always become
+       // zero because the world is stopped and there are no available Ps for syscall-exited
+       // goroutines to run on.
+       //
+       // Because we set gen before checking this, and because exitingSyscall is always incremented
+       // *after* traceAcquire (which checks gen), we can be certain that when exitingSyscall is zero
+       // that any goroutine that goes to exit a syscall from then on *must* observe the new gen.
+       //
+       // The critical section on each goroutine here is going to be quite short, so the likelihood
+       // that we observe a zero value is high.
+       for trace.exitingSyscall.Load() != 0 {
+               osyield()
+       }
+
+       // Record some initial pieces of information.
+       //
+       // N.B. This will also emit a status event for this goroutine.
+       tl := traceAcquire()
+       tl.Gomaxprocs(gomaxprocs)  // Get this as early in the trace as possible. See comment in traceAdvance.
+       tl.STWStart(stwStartTrace) // We didn't trace this above, so trace it now.
+
+       // Record the fact that a GC is active, if applicable.
+       if gcphase == _GCmark || gcphase == _GCmarktermination {
+               tl.GCActive()
+       }
+
+       // Record the heap goal so we have it at the very beginning of the trace.
+       tl.HeapGoal()
+
+       // Make sure a ProcStatus is emitted for every P, while we're here.
+       for _, pp := range allp {
+               tl.writer().writeProcStatusForP(pp, pp == tl.mp.p.ptr()).end()
+       }
+       traceRelease(tl)
+
+       unlock(&sched.sysmonlock)
+       startTheWorld()
+
+       traceStartReadCPU()
+       traceAdvancer.start()
+
+       semrelease(&traceAdvanceSema)
+       return nil
+}
+
+// StopTrace stops tracing, if it was previously enabled.
+// StopTrace only returns after all the reads for the trace have completed.
+func StopTrace() {
+       traceAdvance(true)
+}
+
+// traceAdvance moves tracing to the next generation, and cleans up the current generation,
+// ensuring that it's flushed out before returning. If stopTrace is true, it disables tracing
+// altogether instead of advancing to the next generation.
+//
+// traceAdvanceSema must not be held.
+func traceAdvance(stopTrace bool) {
+       semacquire(&traceAdvanceSema)
+
+       // Get the gen that we're advancing from. In this function we don't really care much
+       // about the generation we're advancing _into_ since we'll do all the cleanup in this
+       // generation for the next advancement.
+       gen := trace.gen.Load()
+       if gen == 0 {
+               // We may end up here traceAdvance is called concurrently with StopTrace.
+               semrelease(&traceAdvanceSema)
+               return
+       }
+
+       // Write an EvFrequency event for this generation.
+       //
+       // N.B. This may block for quite a while to get a good frequency estimate, so make sure we do
+       // this here and not e.g. on the trace reader.
+       traceFrequency(gen)
+
+       // Collect all the untraced Gs.
+       type untracedG struct {
+               gp           *g
+               goid         uint64
+               mid          int64
+               status       uint32
+               waitreason   waitReason
+               inMarkAssist bool
+       }
+       var untracedGs []untracedG
+       forEachGRace(func(gp *g) {
+               // Make absolutely sure all Gs are ready for the next
+               // generation. We need to do this even for dead Gs because
+               // they may come alive with a new identity, and its status
+               // traced bookkeeping might end up being stale.
+               // We may miss totally new goroutines, but they'll always
+               // have clean bookkeeping.
+               gp.trace.readyNextGen(gen)
+               // If the status was traced, nothing else to do.
+               if gp.trace.statusWasTraced(gen) {
+                       return
+               }
+               // Scribble down information about this goroutine.
+               ug := untracedG{gp: gp, mid: -1}
+               systemstack(func() {
+                       me := getg().m.curg
+                       // We don't have to handle this G status transition because we
+                       // already eliminated ourselves from consideration above.
+                       casGToWaiting(me, _Grunning, waitReasonTraceGoroutineStatus)
+                       // We need to suspend and take ownership of the G to safely read its
+                       // goid. Note that we can't actually emit the event at this point
+                       // because we might stop the G in a window where it's unsafe to write
+                       // events based on the G's status. We need the global trace buffer flush
+                       // coming up to make sure we're not racing with the G.
+                       //
+                       // It should be very unlikely that we try to preempt a running G here.
+                       // The only situation that we might is that we're racing with a G
+                       // that's running for the first time in this generation. Therefore,
+                       // this should be relatively fast.
+                       s := suspendG(gp)
+                       if !s.dead {
+                               ug.goid = s.g.goid
+                               if s.g.m != nil {
+                                       ug.mid = s.g.m.id
+                               }
+                               ug.status = readgstatus(s.g) &^ _Gscan
+                               ug.waitreason = s.g.waitreason
+                               ug.inMarkAssist = s.g.inMarkAssist
+                       }
+                       resumeG(s)
+                       casgstatus(me, _Gwaiting, _Grunning)
+               })
+               if ug.goid != 0 {
+                       untracedGs = append(untracedGs, ug)
+               }
+       })
+
+       if !stopTrace {
+               // Re-register runtime goroutine labels and stop/block reasons.
+               traceRegisterLabelsAndReasons(traceNextGen(gen))
+       }
+
+       // Now that we've done some of the heavy stuff, prevent the world from stopping.
+       // This is necessary to ensure the consistency of the STW events. If we're feeling
+       // adventurous we could lift this restriction and add a STWActive event, but the
+       // cost of maintaining this consistency is low. We're not going to hold this semaphore
+       // for very long and most STW periods are very short.
+       // Once we hold worldsema, prevent preemption as well so we're not interrupted partway
+       // through this. We want to get this done as soon as possible.
+       semacquire(&worldsema)
+       mp := acquirem()
+
+       // Advance the generation or stop the trace.
+       trace.lastNonZeroGen = gen
+       if stopTrace {
+               systemstack(func() {
+                       // Ordering is important here. Set shutdown first, then disable tracing,
+                       // so that conditions like (traceEnabled() || traceShuttingDown()) have
+                       // no opportunity to be false. Hold the trace lock so this update appears
+                       // atomic to the trace reader.
+                       lock(&trace.lock)
+                       trace.shutdown.Store(true)
+                       trace.gen.Store(0)
+                       unlock(&trace.lock)
+               })
+       } else {
+               trace.gen.Store(traceNextGen(gen))
+       }
+
+       // Emit a ProcsChange event so we have one on record for each generation.
+       // Let's emit it as soon as possible so that downstream tools can rely on the value
+       // being there fairly soon in a generation.
+       //
+       // It's important that we do this before allowing stop-the-worlds again,
+       // because the procs count could change.
+       if !stopTrace {
+               tl := traceAcquire()
+               tl.Gomaxprocs(gomaxprocs)
+               traceRelease(tl)
+       }
+
+       // Emit a GCActive event in the new generation if necessary.
+       //
+       // It's important that we do this before allowing stop-the-worlds again,
+       // because that could emit global GC-related events.
+       if !stopTrace && (gcphase == _GCmark || gcphase == _GCmarktermination) {
+               tl := traceAcquire()
+               tl.GCActive()
+               traceRelease(tl)
+       }
+
+       // Preemption is OK again after this. If the world stops or whatever it's fine.
+       // We're just cleaning up the last generation after this point.
+       //
+       // We also don't care if the GC starts again after this for the same reasons.
+       releasem(mp)
+       semrelease(&worldsema)
+
+       // Snapshot allm and freem.
+       //
+       // Snapshotting after the generation counter update is sufficient.
+       // Because an m must be on either allm or sched.freem if it has an active trace
+       // buffer, new threads added to allm after this point must necessarily observe
+       // the new generation number (sched.lock acts as a barrier).
+       //
+       // Threads that exit before this point and are on neither list explicitly
+       // flush their own buffers in traceThreadDestroy.
+       //
+       // Snapshotting freem is necessary because Ms can continue to emit events
+       // while they're still on that list. Removal from sched.freem is serialized with
+       // this snapshot, so either we'll capture an m on sched.freem and race with
+       // the removal to flush its buffers (resolved by traceThreadDestroy acquiring
+       // the thread's seqlock, which one of us must win, so at least its old gen buffer
+       // will be flushed in time for the new generation) or it will have flushed its
+       // buffers before we snapshotted it to begin with.
+       lock(&sched.lock)
+       mToFlush := allm
+       for mp := mToFlush; mp != nil; mp = mp.alllink {
+               mp.trace.link = mp.alllink
+       }
+       for mp := sched.freem; mp != nil; mp = mp.freelink {
+               mp.trace.link = mToFlush
+               mToFlush = mp
+       }
+       unlock(&sched.lock)
+
+       // Iterate over our snapshot, flushing every buffer until we're done.
+       //
+       // Because trace writers read the generation while the seqlock is
+       // held, we can be certain that when there are no writers there are
+       // also no stale generation values left. Therefore, it's safe to flush
+       // any buffers that remain in that generation's slot.
+       systemstack(func() {
+               // Track iterations for some rudimentary deadlock detection.
+               i := 0
+               detectedDeadlock := false
+
+               for mToFlush != nil {
+                       prev := &mToFlush
+                       for mp := *prev; mp != nil; {
+                               if mp.trace.seqlock.Load()%2 != 0 {
+                                       // The M is writing. Come back to it later.
+                                       prev = &mp.trace.link
+                                       mp = mp.trace.link
+                                       continue
+                               }
+                               // Flush the trace buffer.
+                               //
+                               // trace.lock needed for traceBufFlush, but also to synchronize
+                               // with traceThreadDestroy, which flushes both buffers unconditionally.
+                               lock(&trace.lock)
+                               bufp := &mp.trace.buf[gen%2]
+                               if *bufp != nil {
+                                       traceBufFlush(*bufp, gen)
+                                       *bufp = nil
+                               }
+                               unlock(&trace.lock)
+
+                               // Remove the m from the flush list.
+                               *prev = mp.trace.link
+                               mp.trace.link = nil
+                               mp = *prev
+                       }
+                       // Yield only if we're going to be going around the loop again.
+                       if mToFlush != nil {
+                               osyield()
+                       }
+
+                       // Try to detect a deadlock. We probably shouldn't loop here
+                       // this many times.
+                       if i > 100000 && !detectedDeadlock {
+                               detectedDeadlock = true
+                               println("runtime: failing to flush")
+                               for mp := mToFlush; mp != nil; mp = mp.trace.link {
+                                       print("runtime: m=", mp.id, "\n")
+                               }
+                       }
+                       i++
+               }
+       })
+
+       // At this point, the old generation is fully flushed minus stack and string
+       // tables, CPU samples, and goroutines that haven't run at all during the last
+       // generation.
+
+       // Check to see if any Gs still haven't had events written out for them.
+       statusWriter := unsafeTraceWriter(gen, nil)
+       for _, ug := range untracedGs {
+               if ug.gp.trace.statusWasTraced(gen) {
+                       // It was traced, we don't need to do anything.
+                       continue
+               }
+               // It still wasn't traced. Because we ensured all Ms stopped writing trace
+               // events to the last generation, that must mean the G never had its status
+               // traced in gen between when we recorded it and now. If that's true, the goid
+               // and status we recorded then is exactly what we want right now.
+               status := goStatusToTraceGoStatus(ug.status, ug.waitreason)
+               statusWriter = statusWriter.writeGoStatus(ug.goid, ug.mid, status, ug.inMarkAssist)
+       }
+       statusWriter.flush().end()
+
+       systemstack(func() {
+               // Flush CPU samples, stacks, and strings for the last generation. This is safe,
+               // because we're now certain no M is writing to the last generation.
+               //
+               // Ordering is important here. traceCPUFlush may generate new stacks and dumping
+               // stacks may generate new strings.
+               traceCPUFlush(gen)
+               trace.stackTab[gen%2].dump(gen)
+               trace.stringTab[gen%2].reset(gen)
+
+               // That's it. This generation is done producing buffers.
+               lock(&trace.lock)
+               trace.flushedGen.Store(gen)
+               unlock(&trace.lock)
+       })
+
+       if stopTrace {
+               semacquire(&traceShutdownSema)
+
+               // Finish off CPU profile reading.
+               traceStopReadCPU()
+       } else {
+               // Go over each P and emit a status event for it if necessary.
+               //
+               // We do this at the beginning of the new generation instead of the
+               // end like we do for goroutines because forEachP doesn't give us a
+               // hook to skip Ps that have already been traced. Since we have to
+               // preempt all Ps anyway, might as well stay consistent with StartTrace
+               // which does this during the STW.
+               semacquire(&worldsema)
+               forEachP(waitReasonTraceProcStatus, func(pp *p) {
+                       tl := traceAcquire()
+                       if !pp.trace.statusWasTraced(tl.gen) {
+                               tl.writer().writeProcStatusForP(pp, false).end()
+                       }
+                       traceRelease(tl)
+               })
+               // Perform status reset on dead Ps because they just appear as idle.
+               //
+               // Holding worldsema prevents allp from changing.
+               //
+               // TODO(mknyszek): Consider explicitly emitting ProcCreate and ProcDestroy
+               // events to indicate whether a P exists, rather than just making its
+               // existence implicit.
+               for _, pp := range allp[len(allp):cap(allp)] {
+                       pp.trace.readyNextGen(traceNextGen(gen))
+               }
+               semrelease(&worldsema)
+       }
+
+       // Block until the trace reader has finished processing the last generation.
+       semacquire(&trace.doneSema[gen%2])
+       if raceenabled {
+               raceacquire(unsafe.Pointer(&trace.doneSema[gen%2]))
+       }
+
+       // Double-check that things look as we expect after advancing and perform some
+       // final cleanup if the trace has fully stopped.
+       systemstack(func() {
+               lock(&trace.lock)
+               if !trace.full[gen%2].empty() {
+                       throw("trace: non-empty full trace buffer for done generation")
+               }
+               if stopTrace {
+                       if !trace.full[1-(gen%2)].empty() {
+                               throw("trace: non-empty full trace buffer for next generation")
+                       }
+                       if trace.reading != nil || trace.reader.Load() != nil {
+                               throw("trace: reading after shutdown")
+                       }
+                       // Free all the empty buffers.
+                       for trace.empty != nil {
+                               buf := trace.empty
+                               trace.empty = buf.link
+                               sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf), &memstats.other_sys)
+                       }
+                       // Clear trace.shutdown and other flags.
+                       trace.headerWritten = false
+                       trace.shutdown.Store(false)
+               }
+               unlock(&trace.lock)
+       })
+
+       if stopTrace {
+               // Clear the sweep state on every P for the next time tracing is enabled.
+               //
+               // It may be stale in the next trace because we may have ended tracing in
+               // the middle of a sweep on a P.
+               //
+               // It's fine not to call forEachP here because tracing is disabled and we
+               // know at this point that nothing is calling into the tracer, but we do
+               // need to look at dead Ps too just because GOMAXPROCS could have been called
+               // at any point since we stopped tracing, and we have to ensure there's no
+               // bad state on dead Ps too. Prevent a STW and a concurrent GOMAXPROCS that
+               // might mutate allp by making ourselves briefly non-preemptible.
+               mp := acquirem()
+               for _, pp := range allp[:cap(allp)] {
+                       pp.trace.inSweep = false
+                       pp.trace.maySweep = false
+                       pp.trace.swept = 0
+                       pp.trace.reclaimed = 0
+               }
+               releasem(mp)
+       }
+
+       // Release the advance semaphore. If stopTrace is true we're still holding onto
+       // traceShutdownSema.
+       //
+       // Do a direct handoff. Don't let one caller of traceAdvance starve
+       // other calls to traceAdvance.
+       semrelease1(&traceAdvanceSema, true, 0)
+
+       if stopTrace {
+               // Stop the traceAdvancer. We can't be holding traceAdvanceSema here because
+               // we'll deadlock (we're blocked on the advancer goroutine exiting, but it
+               // may be currently trying to acquire traceAdvanceSema).
+               traceAdvancer.stop()
+               semrelease(&traceShutdownSema)
+       }
+}
+
+func traceNextGen(gen uintptr) uintptr {
+       if gen == ^uintptr(0) {
+               // gen is used both %2 and %3 and we want both patterns to continue when we loop around.
+               // ^uint32(0) and ^uint64(0) are both odd and multiples of 3. Therefore the next generation
+               // we want is even and one more than a multiple of 3. The smallest such number is 4.
+               return 4
+       }
+       return gen + 1
+}
+
+// traceRegisterLabelsAndReasons re-registers mark worker labels and
+// goroutine stop/block reasons in the string table for the provided
+// generation. Note: the provided generation must not have started yet.
+func traceRegisterLabelsAndReasons(gen uintptr) {
+       for i, label := range gcMarkWorkerModeStrings[:] {
+               trace.markWorkerLabels[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, label))
+       }
+       for i, str := range traceBlockReasonStrings[:] {
+               trace.goBlockReasons[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, str))
+       }
+       for i, str := range traceGoStopReasonStrings[:] {
+               trace.goStopReasons[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, str))
+       }
+}
+
+// ReadTrace returns the next chunk of binary tracing data, blocking until data
+// is available. If tracing is turned off and all the data accumulated while it
+// was on has been returned, ReadTrace returns nil. The caller must copy the
+// returned data before calling ReadTrace again.
+// ReadTrace must be called from one goroutine at a time.
+func ReadTrace() []byte {
+top:
+       var buf []byte
+       var park bool
+       systemstack(func() {
+               buf, park = readTrace0()
+       })
+       if park {
+               gopark(func(gp *g, _ unsafe.Pointer) bool {
+                       if !trace.reader.CompareAndSwapNoWB(nil, gp) {
+                               // We're racing with another reader.
+                               // Wake up and handle this case.
+                               return false
+                       }
+
+                       if g2 := traceReader(); gp == g2 {
+                               // New data arrived between unlocking
+                               // and the CAS and we won the wake-up
+                               // race, so wake up directly.
+                               return false
+                       } else if g2 != nil {
+                               printlock()
+                               println("runtime: got trace reader", g2, g2.goid)
+                               throw("unexpected trace reader")
+                       }
+
+                       return true
+               }, nil, waitReasonTraceReaderBlocked, traceBlockSystemGoroutine, 2)
+               goto top
+       }
+
+       return buf
+}
+
+// readTrace0 is ReadTrace's continuation on g0. This must run on the
+// system stack because it acquires trace.lock.
+//
+//go:systemstack
+func readTrace0() (buf []byte, park bool) {
+       if raceenabled {
+               // g0 doesn't have a race context. Borrow the user G's.
+               if getg().racectx != 0 {
+                       throw("expected racectx == 0")
+               }
+               getg().racectx = getg().m.curg.racectx
+               // (This defer should get open-coded, which is safe on
+               // the system stack.)
+               defer func() { getg().racectx = 0 }()
+       }
+
+       // This function must not allocate while holding trace.lock:
+       // allocation can call heap allocate, which will try to emit a trace
+       // event while holding heap lock.
+       lock(&trace.lock)
+
+       if trace.reader.Load() != nil {
+               // More than one goroutine reads trace. This is bad.
+               // But we rather do not crash the program because of tracing,
+               // because tracing can be enabled at runtime on prod servers.
+               unlock(&trace.lock)
+               println("runtime: ReadTrace called from multiple goroutines simultaneously")
+               return nil, false
+       }
+       // Recycle the old buffer.
+       if buf := trace.reading; buf != nil {
+               buf.link = trace.empty
+               trace.empty = buf
+               trace.reading = nil
+       }
+       // Write trace header.
+       if !trace.headerWritten {
+               trace.headerWritten = true
+               unlock(&trace.lock)
+               return []byte("go 1.22 trace\x00\x00\x00"), false
+       }
+
+       // Read the next buffer.
+
+       if trace.readerGen.Load() == 0 {
+               trace.readerGen.Store(1)
+       }
+       var gen uintptr
+       for {
+               assertLockHeld(&trace.lock)
+               gen = trace.readerGen.Load()
+
+               // Check to see if we need to block for more data in this generation
+               // or if we need to move our generation forward.
+               if !trace.full[gen%2].empty() {
+                       break
+               }
+               // Most of the time readerGen is one generation ahead of flushedGen, as the
+               // current generation is being read from. Then, once the last buffer is flushed
+               // into readerGen, flushedGen will rise to meet it. At this point, the tracer
+               // is waiting on the reader to finish flushing the last generation so that it
+               // can continue to advance.
+               if trace.flushedGen.Load() == gen {
+                       if trace.shutdown.Load() {
+                               // Wake up anyone waiting for us to be done with this generation.
+                               //
+                               // Do this after reading trace.shutdown, because the thread we're
+                               // waking up is going to clear trace.shutdown.
+                               if raceenabled {
+                                       // Model synchronization on trace.doneSema, which te race
+                                       // detector does not see. This is required to avoid false
+                                       // race reports on writer passed to trace.Start.
+                                       racerelease(unsafe.Pointer(&trace.doneSema[gen%2]))
+                               }
+                               semrelease(&trace.doneSema[gen%2])
+
+                               // We're shutting down, and the last generation is fully
+                               // read. We're done.
+                               unlock(&trace.lock)
+                               return nil, false
+                       }
+                       // The previous gen has had all of its buffers flushed, and
+                       // there's nothing else for us to read. Advance the generation
+                       // we're reading from and try again.
+                       trace.readerGen.Store(trace.gen.Load())
+
+                       // Wake up anyone waiting for us to be done with this generation.
+                       //
+                       // Do this after reading gen to make sure we can't have the trace
+                       // advance until we've read it.
+                       if raceenabled {
+                               // See comment above in the shutdown case.
+                               racerelease(unsafe.Pointer(&trace.doneSema[gen%2]))
+                       }
+                       semrelease(&trace.doneSema[gen%2])
+                       continue
+               }
+               // Wait for new data.
+               //
+               // We don't simply use a note because the scheduler
+               // executes this goroutine directly when it wakes up
+               // (also a note would consume an M).
+               //
+               // Before we drop the lock, clear the workAvailable flag. Work can
+               // only be queued with trace.lock held, so this is at least true until
+               // we drop the lock.
+               trace.workAvailable.Store(false)
+               unlock(&trace.lock)
+               return nil, true
+       }
+       // Pull a buffer.
+       tbuf := trace.full[gen%2].pop()
+       trace.reading = tbuf
+       unlock(&trace.lock)
+       return tbuf.arr[:tbuf.pos], false
+}
+
+// traceReader returns the trace reader that should be woken up, if any.
+// Callers should first check (traceEnabled() || traceShuttingDown()).
+//
+// This must run on the system stack because it acquires trace.lock.
+//
+//go:systemstack
+func traceReader() *g {
+       gp := traceReaderAvailable()
+       if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) {
+               return nil
+       }
+       return gp
+}
+
+// traceReaderAvailable returns the trace reader if it is not currently
+// scheduled and should be. Callers should first check that
+// (traceEnabled() || traceShuttingDown()) is true.
+func traceReaderAvailable() *g {
+       // There are three conditions under which we definitely want to schedule
+       // the reader:
+       // - The reader is lagging behind in finishing off the last generation.
+       //   In this case, trace buffers could even be empty, but the trace
+       //   advancer will be waiting on the reader, so we have to make sure
+       //   to schedule the reader ASAP.
+       // - The reader has pending work to process for it's reader generation
+       //   (assuming readerGen is not lagging behind). Note that we also want
+       //   to be careful *not* to schedule the reader if there's no work to do.
+       // - The trace is shutting down. The trace stopper blocks on the reader
+       //   to finish, much like trace advancement.
+       //
+       // We also want to be careful not to schedule the reader if there's no
+       // reason to.
+       if trace.flushedGen.Load() == trace.readerGen.Load() || trace.workAvailable.Load() || trace.shutdown.Load() {
+               return trace.reader.Load()
+       }
+       return nil
+}
+
+// Trace advancer goroutine.
+var traceAdvancer traceAdvancerState
+
+type traceAdvancerState struct {
+       timer wakeableSleep
+       done  chan struct{}
+}
+
+// start starts a new traceAdvancer.
+func (s *traceAdvancerState) start() {
+       // Start a goroutine to periodically advance the trace generation.
+       s.done = make(chan struct{})
+       s.timer.init()
+       go func() {
+               for traceEnabled() {
+                       // Set a timer to wake us up
+                       s.timer.sleep(int64(debug.traceadvanceperiod))
+
+                       // Try to advance the trace.
+                       traceAdvance(false)
+               }
+               s.done <- struct{}{}
+       }()
+}
+
+// stop stops a traceAdvancer and blocks until it exits.
+func (s *traceAdvancerState) stop() {
+       s.timer.wake()
+       <-s.done
+       close(s.done)
+       s.timer.close()
+}
+
+// traceAdvancePeriod is the approximate period between
+// new generations.
+const defaultTraceAdvancePeriod = 1e9 // 1 second.
+
+// wakeableSleep manages a wakeable goroutine sleep.
+//
+// Users of this type must call init before first use and
+// close to free up resources. Once close is called, init
+// must be called before another use.
+type wakeableSleep struct {
+       timer  *timer
+       wakeup chan struct{}
+}
+
+// init initializes the timer.
+func (s *wakeableSleep) init() {
+       s.wakeup = make(chan struct{}, 1)
+       s.timer = new(timer)
+       s.timer.arg = s
+       s.timer.f = func(s any, _ uintptr) {
+               s.(*wakeableSleep).wake()
+       }
+}
+
+// sleep sleeps for the provided duration in nanoseconds or until
+// another goroutine calls wake.
+//
+// Must not be called by more than one goroutine at a time.
+func (s *wakeableSleep) sleep(ns int64) {
+       resetTimer(s.timer, nanotime()+ns)
+       <-s.wakeup
+       stopTimer(s.timer)
+}
+
+// wake awakens any goroutine sleeping on the timer.
+//
+// Safe for concurrent use.
+func (s *wakeableSleep) wake() {
+       // Non-blocking send.
+       //
+       // Others may also write to this channel and we don't
+       // want to block on the receiver waking up. This also
+       // effectively batches together wakeup notifications.
+       select {
+       case s.wakeup <- struct{}{}:
+       default:
+       }
+}
+
+// close wakes any goroutine sleeping on the timer and prevents
+// further sleeping on it.
+//
+// It must only be called once no goroutine is sleeping on the
+// timer *and* nothing else will call wake concurrently.
+func (s *wakeableSleep) close() {
+       close(s.wakeup)
+}
diff --git a/src/runtime/trace2buf.go b/src/runtime/trace2buf.go
new file mode 100644 (file)
index 0000000..54de5e1
--- /dev/null
@@ -0,0 +1,259 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace buffer management.
+
+package runtime
+
+import (
+       "runtime/internal/sys"
+       "unsafe"
+)
+
+// Maximum number of bytes required to encode uint64 in base-128.
+const traceBytesPerNumber = 10
+
+// traceWriter is the interface for writing all trace data.
+//
+// This type is passed around as a value, and all of its methods return
+// a new traceWriter. This allows for chaining together calls in a fluent-style
+// API. This is partly stylistic, and very slightly for performance, since
+// the compiler can destructure this value and pass it between calls as
+// just regular arguments. However, this style is not load-bearing, and
+// we can change it if it's deemed too error-prone.
+type traceWriter struct {
+       traceLocker
+       *traceBuf
+}
+
+// write returns an a traceWriter that writes into the current M's stream.
+func (tl traceLocker) writer() traceWriter {
+       return traceWriter{traceLocker: tl, traceBuf: tl.mp.trace.buf[tl.gen%2]}
+}
+
+// unsafeTraceWriter produces a traceWriter that doesn't lock the trace.
+//
+// It should only be used in contexts where either:
+// - Another traceLocker is held.
+// - trace.gen is prevented from advancing.
+//
+// buf may be nil.
+func unsafeTraceWriter(gen uintptr, buf *traceBuf) traceWriter {
+       return traceWriter{traceLocker: traceLocker{gen: gen}, traceBuf: buf}
+}
+
+// end writes the buffer back into the m.
+func (w traceWriter) end() {
+       if w.mp == nil {
+               // Tolerate a nil mp. It makes code that creates traceWriters directly
+               // less error-prone.
+               return
+       }
+       w.mp.trace.buf[w.gen%2] = w.traceBuf
+}
+
+// ensure makes sure that at least maxSize bytes are available to write.
+//
+// Returns whether the buffer was flushed.
+func (w traceWriter) ensure(maxSize int) (traceWriter, bool) {
+       refill := w.traceBuf == nil || !w.available(maxSize)
+       if refill {
+               w = w.refill()
+       }
+       return w, refill
+}
+
+// flush puts w.traceBuf on the queue of full buffers.
+func (w traceWriter) flush() traceWriter {
+       systemstack(func() {
+               lock(&trace.lock)
+               if w.traceBuf != nil {
+                       traceBufFlush(w.traceBuf, w.gen)
+               }
+               unlock(&trace.lock)
+       })
+       w.traceBuf = nil
+       return w
+}
+
+// refill puts w.traceBuf on the queue of full buffers and refresh's w's buffer.
+func (w traceWriter) refill() traceWriter {
+       systemstack(func() {
+               lock(&trace.lock)
+               if w.traceBuf != nil {
+                       traceBufFlush(w.traceBuf, w.gen)
+               }
+               if trace.empty != nil {
+                       w.traceBuf = trace.empty
+                       trace.empty = w.traceBuf.link
+                       unlock(&trace.lock)
+               } else {
+                       unlock(&trace.lock)
+                       w.traceBuf = (*traceBuf)(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys))
+                       if w.traceBuf == nil {
+                               throw("trace: out of memory")
+                       }
+               }
+       })
+       // Initialize the buffer.
+       ts := traceClockNow()
+       if ts <= w.traceBuf.lastTime {
+               ts = w.traceBuf.lastTime + 1
+       }
+       w.traceBuf.lastTime = ts
+       w.traceBuf.link = nil
+       w.traceBuf.pos = 0
+
+       // Tolerate a nil mp.
+       mID := ^uint64(0)
+       if w.mp != nil {
+               mID = uint64(w.mp.procid)
+       }
+
+       // Write the buffer's header.
+       w.byte(byte(traceEvEventBatch))
+       w.varint(uint64(w.gen))
+       w.varint(uint64(mID))
+       w.varint(uint64(ts))
+       w.traceBuf.lenPos = w.varintReserve()
+       return w
+}
+
+// traceBufQueue is a FIFO of traceBufs.
+type traceBufQueue struct {
+       head, tail *traceBuf
+}
+
+// push queues buf into queue of buffers.
+func (q *traceBufQueue) push(buf *traceBuf) {
+       buf.link = nil
+       if q.head == nil {
+               q.head = buf
+       } else {
+               q.tail.link = buf
+       }
+       q.tail = buf
+}
+
+// pop dequeues from the queue of buffers.
+func (q *traceBufQueue) pop() *traceBuf {
+       buf := q.head
+       if buf == nil {
+               return nil
+       }
+       q.head = buf.link
+       if q.head == nil {
+               q.tail = nil
+       }
+       buf.link = nil
+       return buf
+}
+
+func (q *traceBufQueue) empty() bool {
+       return q.head == nil
+}
+
+// traceBufHeader is per-P tracing buffer.
+type traceBufHeader struct {
+       link     *traceBuf // in trace.empty/full
+       lastTime traceTime // when we wrote the last event
+       pos      int       // next write offset in arr
+       lenPos   int       // position of batch length value
+}
+
+// traceBuf is per-M tracing buffer.
+//
+// TODO(mknyszek): Rename traceBuf to traceBatch, since they map 1:1 with event batches.
+type traceBuf struct {
+       _ sys.NotInHeap
+       traceBufHeader
+       arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
+}
+
+// byte appends v to buf.
+func (buf *traceBuf) byte(v byte) {
+       buf.arr[buf.pos] = v
+       buf.pos++
+}
+
+// varint appends v to buf in little-endian-base-128 encoding.
+func (buf *traceBuf) varint(v uint64) {
+       pos := buf.pos
+       arr := buf.arr[pos : pos+traceBytesPerNumber]
+       for i := range arr {
+               if v < 0x80 {
+                       pos += i + 1
+                       arr[i] = byte(v)
+                       break
+               }
+               arr[i] = 0x80 | byte(v)
+               v >>= 7
+       }
+       buf.pos = pos
+}
+
+// varintReserve reserves enough space in buf to hold any varint.
+//
+// Space reserved this way can be filled in with the varintAt method.
+func (buf *traceBuf) varintReserve() int {
+       p := buf.pos
+       buf.pos += traceBytesPerNumber
+       return p
+}
+
+// stringData appends s's data directly to buf.
+func (buf *traceBuf) stringData(s string) {
+       buf.pos += copy(buf.arr[buf.pos:], s)
+}
+
+func (buf *traceBuf) available(size int) bool {
+       return len(buf.arr)-buf.pos >= size
+}
+
+// varintAt writes varint v at byte position pos in buf. This always
+// consumes traceBytesPerNumber bytes. This is intended for when the caller
+// needs to reserve space for a varint but can't populate it until later.
+// Use varintReserve to reserve this space.
+func (buf *traceBuf) varintAt(pos int, v uint64) {
+       for i := 0; i < traceBytesPerNumber; i++ {
+               if i < traceBytesPerNumber-1 {
+                       buf.arr[pos] = 0x80 | byte(v)
+               } else {
+                       buf.arr[pos] = byte(v)
+               }
+               v >>= 7
+               pos++
+       }
+       if v != 0 {
+               throw("v could not fit in traceBytesPerNumber")
+       }
+}
+
+// traceBufFlush flushes a trace buffer.
+//
+// Must run on the system stack because trace.lock must be held.
+//
+//go:systemstack
+func traceBufFlush(buf *traceBuf, gen uintptr) {
+       assertLockHeld(&trace.lock)
+
+       // Write out the non-header length of the batch in the header.
+       //
+       // Note: the length of the header is not included to make it easier
+       // to calculate this value when deserializing and reserializing the
+       // trace. Varints can have additional padding of zero bits that is
+       // quite difficult to preserve, and if we include the header we
+       // force serializers to do more work. Nothing else actually needs
+       // padding.
+       buf.varintAt(buf.lenPos, uint64(buf.pos-(buf.lenPos+traceBytesPerNumber)))
+       trace.full[gen%2].push(buf)
+
+       // Notify the scheduler that there's work available and that the trace
+       // reader should be scheduled.
+       if !trace.workAvailable.Load() {
+               trace.workAvailable.Store(true)
+       }
+}
diff --git a/src/runtime/trace2cpu.go b/src/runtime/trace2cpu.go
new file mode 100644 (file)
index 0000000..4c9bad4
--- /dev/null
@@ -0,0 +1,242 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// CPU profile -> trace
+
+package runtime
+
+// traceInitReadCPU initializes CPU profile -> tracer state for tracing.
+//
+// Returns a profBuf for reading from.
+func traceInitReadCPU() {
+       if traceEnabled() {
+               throw("traceInitReadCPU called with trace enabled")
+       }
+       // Create new profBuf for CPU samples that will be emitted as events.
+       profBuf := newProfBuf(3, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid, mp.procid]
+       trace.cpuLogRead = profBuf
+       // We must not acquire trace.signalLock outside of a signal handler: a
+       // profiling signal may arrive at any time and try to acquire it, leading to
+       // deadlock. Because we can't use that lock to protect updates to
+       // trace.cpuLogWrite (only use of the structure it references), reads and
+       // writes of the pointer must be atomic. (And although this field is never
+       // the sole pointer to the profBuf value, it's best to allow a write barrier
+       // here.)
+       trace.cpuLogWrite.Store(profBuf)
+}
+
+// traceStartReadCPU creates a goroutine to start reading CPU profile
+// data into an active trace.
+//
+// traceAdvanceSema must be held.
+func traceStartReadCPU() {
+       if !traceEnabled() {
+               throw("traceStartReadCPU called with trace disabled")
+       }
+       // Spin up the logger goroutine.
+       trace.cpuSleep.init()
+       done := make(chan struct{}, 1)
+       go func() {
+               for traceEnabled() {
+                       // Sleep here because traceReadCPU is non-blocking. This mirrors
+                       // how the runtime/pprof package obtains CPU profile data.
+                       //
+                       // We can't do a blocking read here because Darwin can't do a
+                       // wakeup from a signal handler, so all CPU profiling is just
+                       // non-blocking. See #61768 for more details.
+                       //
+                       // Like the runtime/pprof package, even if that bug didn't exist
+                       // we would still want to do a goroutine-level sleep in between
+                       // reads to avoid frequent wakeups.
+                       trace.cpuSleep.sleep(100_000_000)
+                       if !traceReadCPU(trace.cpuLogRead) {
+                               break
+                       }
+               }
+               done <- struct{}{}
+       }()
+       trace.cpuLogDone = done
+}
+
+// traceStopReadCPU blocks until the trace CPU reading goroutine exits.
+//
+// traceAdvanceSema must be held, and tracing must be disabled.
+func traceStopReadCPU() {
+       if traceEnabled() {
+               throw("traceStopReadCPU called with trace enabled")
+       }
+
+       // Once we close the profbuf, we'll be in one of two situations:
+       // - The logger goroutine has already exited because it observed
+       //   that the trace is disabled.
+       // - The logger goroutine is asleep.
+       //
+       // Wake the goroutine so it can observe that their the buffer is
+       // closed an exit.
+       trace.cpuLogWrite.Store(nil)
+       trace.cpuLogRead.close()
+       trace.cpuSleep.wake()
+
+       // Wait until the logger goroutine exits.
+       <-trace.cpuLogDone
+
+       // Clear state for the next trace.
+       trace.cpuLogDone = nil
+       trace.cpuLogRead = nil
+       trace.cpuSleep.close()
+}
+
+// traceReadCPU attempts to read from the provided profBuf and write
+// into the trace. Returns true if there might be more to read or false
+// if the profBuf is closed or the caller should otherwise stop reading.
+//
+// No more than one goroutine may be in traceReadCPU for the same
+// profBuf at a time.
+func traceReadCPU(pb *profBuf) bool {
+       var pcBuf [traceStackSize]uintptr
+
+       data, tags, eof := pb.read(profBufNonBlocking)
+       for len(data) > 0 {
+               if len(data) < 4 || data[0] > uint64(len(data)) {
+                       break // truncated profile
+               }
+               if data[0] < 4 || tags != nil && len(tags) < 1 {
+                       break // malformed profile
+               }
+               if len(tags) < 1 {
+                       break // mismatched profile records and tags
+               }
+
+               // Deserialize the data in the profile buffer.
+               recordLen := data[0]
+               timestamp := data[1]
+               ppid := data[2] >> 1
+               if hasP := (data[2] & 0b1) != 0; !hasP {
+                       ppid = ^uint64(0)
+               }
+               goid := data[3]
+               mpid := data[4]
+               stk := data[5:recordLen]
+
+               // Overflow records always have their headers contain
+               // all zeroes.
+               isOverflowRecord := len(stk) == 1 && data[2] == 0 && data[3] == 0 && data[4] == 0
+
+               // Move the data iterator forward.
+               data = data[recordLen:]
+               // No support here for reporting goroutine tags at the moment; if
+               // that information is to be part of the execution trace, we'd
+               // probably want to see when the tags are applied and when they
+               // change, instead of only seeing them when we get a CPU sample.
+               tags = tags[1:]
+
+               if isOverflowRecord {
+                       // Looks like an overflow record from the profBuf. Not much to
+                       // do here, we only want to report full records.
+                       continue
+               }
+
+               // Construct the stack for insertion to the stack table.
+               nstk := 1
+               pcBuf[0] = logicalStackSentinel
+               for ; nstk < len(pcBuf) && nstk-1 < len(stk); nstk++ {
+                       pcBuf[nstk] = uintptr(stk[nstk-1])
+               }
+
+               // Write out a trace event.
+               tl := traceAcquire()
+               if !tl.ok() {
+                       // Tracing disabled, exit without continuing.
+                       return false
+               }
+               w := unsafeTraceWriter(tl.gen, trace.cpuBuf[tl.gen%2])
+
+               // Ensure we have a place to write to.
+               var flushed bool
+               w, flushed = w.ensure(2 + 5*traceBytesPerNumber /* traceEvCPUSamples + traceEvCPUSample + timestamp + g + m + p + stack ID */)
+               if flushed {
+                       // Annotate the batch as containing strings.
+                       w.byte(byte(traceEvCPUSamples))
+               }
+
+               // Add the stack to the table.
+               stackID := trace.stackTab[tl.gen%2].put(pcBuf[:nstk])
+
+               // Write out the CPU sample.
+               w.byte(byte(traceEvCPUSample))
+               w.varint(timestamp)
+               w.varint(mpid)
+               w.varint(ppid)
+               w.varint(goid)
+               w.varint(stackID)
+
+               trace.cpuBuf[tl.gen%2] = w.traceBuf
+               traceRelease(tl)
+       }
+       return !eof
+}
+
+// traceCPUFlush flushes trace.cpuBuf[gen%2]. The caller must be certain that gen
+// has completed and that there are no more writers to it.
+//
+// Must run on the systemstack because it flushes buffers and acquires trace.lock
+// to do so.
+//
+//go:systemstack
+func traceCPUFlush(gen uintptr) {
+       if buf := trace.cpuBuf[gen%2]; buf != nil {
+               lock(&trace.lock)
+               traceBufFlush(buf, gen)
+               unlock(&trace.lock)
+               trace.cpuBuf[gen%2] = nil
+       }
+}
+
+// traceCPUSample writes a CPU profile sample stack to the execution tracer's
+// profiling buffer. It is called from a signal handler, so is limited in what
+// it can do.
+func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
+       if !traceEnabled() {
+               // Tracing is usually turned off; don't spend time acquiring the signal
+               // lock unless it's active.
+               return
+       }
+
+       now := traceClockNow()
+       // The "header" here is the ID of the M that was running the profiled code,
+       // followed by the IDs of the P and goroutine. (For normal CPU profiling, it's
+       // usually the number of samples with the given stack.) Near syscalls, pp
+       // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp.
+       var hdr [3]uint64
+       if pp != nil {
+               // Overflow records in profBuf have all header values set to zero. Make
+               // sure that real headers have at least one bit set.
+               hdr[0] = uint64(pp.id)<<1 | 0b1
+       } else {
+               hdr[0] = 0b10
+       }
+       if gp != nil {
+               hdr[1] = gp.goid
+       }
+       if mp != nil {
+               hdr[2] = uint64(mp.procid)
+       }
+
+       // Allow only one writer at a time
+       for !trace.signalLock.CompareAndSwap(0, 1) {
+               // TODO: Is it safe to osyield here? https://go.dev/issue/52672
+               osyield()
+       }
+
+       if log := trace.cpuLogWrite.Load(); log != nil {
+               // Note: we don't pass a tag pointer here (how should profiling tags
+               // interact with the execution tracer?), but if we did we'd need to be
+               // careful about write barriers. See the long comment in profBuf.write.
+               log.write(nil, int64(now), hdr[:], stk)
+       }
+
+       trace.signalLock.Store(0)
+}
diff --git a/src/runtime/trace2event.go b/src/runtime/trace2event.go
new file mode 100644 (file)
index 0000000..f7abf60
--- /dev/null
@@ -0,0 +1,194 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace event writing API for trace2runtime.go.
+
+package runtime
+
+import (
+       "runtime/internal/sys"
+)
+
+// Event types in the trace, args are given in square brackets.
+//
+// Naming scheme:
+//   - Time range event pairs have suffixes "Begin" and "End".
+//   - "Start", "Stop", "Create", "Destroy", "Block", "Unblock"
+//     are suffixes reserved for scheduling resources.
+//
+// NOTE: If you add an event type, make sure you also update all
+// tables in this file!
+type traceEv uint8
+
+const (
+       traceEvNone traceEv = iota // unused
+
+       // Structural events.
+       traceEvEventBatch // start of per-M batch of events [generation, M ID, timestamp, batch length]
+       traceEvStacks     // start of a section of the stack table [...traceEvStack]
+       traceEvStack      // stack table entry [ID, ...{PC, func string ID, file string ID, line #}]
+       traceEvStrings    // start of a section of the string dictionary [...traceEvString]
+       traceEvString     // string dictionary entry [ID, length, string]
+       traceEvCPUSamples // start of a section of CPU samples [...traceEvCPUSample]
+       traceEvCPUSample  // CPU profiling sample [timestamp, M ID, P ID, goroutine ID, stack ID]
+       traceEvFrequency  // timestamp units per sec [freq]
+
+       // Procs.
+       traceEvProcsChange // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack ID]
+       traceEvProcStart   // start of P [timestamp, P ID, P seq]
+       traceEvProcStop    // stop of P [timestamp]
+       traceEvProcSteal   // P was stolen [timestamp, P ID, P seq, M ID]
+       traceEvProcStatus  // P status at the start of a generation [timestamp, P ID, status]
+
+       // Goroutines.
+       traceEvGoCreate            // goroutine creation [timestamp, new goroutine ID, new stack ID, stack ID]
+       traceEvGoCreateSyscall     // goroutine appears in syscall (cgo callback) [timestamp, new goroutine ID]
+       traceEvGoStart             // goroutine starts running [timestamp, goroutine ID, goroutine seq]
+       traceEvGoDestroy           // goroutine ends [timestamp]
+       traceEvGoDestroySyscall    // goroutine ends in syscall (cgo callback) [timestamp]
+       traceEvGoStop              // goroutine yields its time, but is runnable [timestamp, reason, stack ID]
+       traceEvGoBlock             // goroutine blocks [timestamp, reason, stack ID]
+       traceEvGoUnblock           // goroutine is unblocked [timestamp, goroutine ID, goroutine seq, stack ID]
+       traceEvGoSyscallBegin      // syscall enter [timestamp, stack ID]
+       traceEvGoSyscallEnd        // syscall exit [timestamp]
+       traceEvGoSyscallEndBlocked // syscall exit and it blocked at some point [timestamp]
+       traceEvGoStatus            // goroutine status at the start of a generation [timestamp, goroutine ID, M ID, status]
+
+       // STW.
+       traceEvSTWBegin // STW start [timestamp, kind]
+       traceEvSTWEnd   // STW done [timestamp]
+
+       // GC events.
+       traceEvGCActive           // GC active [timestamp, seq]
+       traceEvGCBegin            // GC start [timestamp, seq, stack ID]
+       traceEvGCEnd              // GC done [timestamp, seq]
+       traceEvGCSweepActive      // GC sweep active [timestamp, P ID]
+       traceEvGCSweepBegin       // GC sweep start [timestamp, stack ID]
+       traceEvGCSweepEnd         // GC sweep done [timestamp, swept bytes, reclaimed bytes]
+       traceEvGCMarkAssistActive // GC mark assist active [timestamp, goroutine ID]
+       traceEvGCMarkAssistBegin  // GC mark assist start [timestamp, stack ID]
+       traceEvGCMarkAssistEnd    // GC mark assist done [timestamp]
+       traceEvHeapAlloc          // gcController.heapLive change [timestamp, heap alloc in bytes]
+       traceEvHeapGoal           // gcController.heapGoal() change [timestamp, heap goal in bytes]
+
+       // Annotations.
+       traceEvGoLabel         // apply string label to current running goroutine [timestamp, label string ID]
+       traceEvUserTaskBegin   // trace.NewTask [timestamp, internal task ID, internal parent task ID, name string ID, stack ID]
+       traceEvUserTaskEnd     // end of a task [timestamp, internal task ID, stack ID]
+       traceEvUserRegionBegin // trace.{Start,With}Region [timestamp, internal task ID, name string ID, stack ID]
+       traceEvUserRegionEnd   // trace.{End,With}Region [timestamp, internal task ID, name string ID, stack ID]
+       traceEvUserLog         // trace.Log [timestamp, internal task ID, key string ID, stack, value string ID]
+)
+
+// traceArg is a simple wrapper type to help ensure that arguments passed
+// to traces are well-formed.
+type traceArg uint64
+
+// traceEventWriter is the high-level API for writing trace events.
+//
+// See the comment on traceWriter about style for more details as to why
+// this type and its methods are structured the way they are.
+type traceEventWriter struct {
+       w traceWriter
+}
+
+// eventWriter creates a new traceEventWriter. It is the main entrypoint for writing trace events.
+//
+// Before creating the event writer, this method will emit a status for the current goroutine
+// or proc if it exists, and if it hasn't had its status emitted yet. goStatus and procStatus indicate
+// what the status of goroutine or P should be immediately *before* the events that are about to
+// be written using the eventWriter (if they exist). No status will be written if there's no active
+// goroutine or P.
+//
+// Callers can elect to pass a constant value here if the status is clear (e.g. a goroutine must have
+// been Runnable before a GoStart). Otherwise, callers can query the status of either the goroutine
+// or P and pass the appropriate status.
+//
+// In this case, the default status should be traceGoBad or traceProcBad to help identify bugs sooner.
+func (tl traceLocker) eventWriter(goStatus traceGoStatus, procStatus traceProcStatus) traceEventWriter {
+       w := tl.writer()
+       if pp := tl.mp.p.ptr(); pp != nil && !pp.trace.statusWasTraced(tl.gen) && pp.trace.acquireStatus(tl.gen) {
+               w = w.writeProcStatus(uint64(pp.id), procStatus, pp.trace.inSweep)
+       }
+       if gp := tl.mp.curg; gp != nil && !gp.trace.statusWasTraced(tl.gen) && gp.trace.acquireStatus(tl.gen) {
+               w = w.writeGoStatus(uint64(gp.goid), int64(tl.mp.procid), goStatus, gp.inMarkAssist)
+       }
+       return traceEventWriter{w}
+}
+
+// commit writes out a trace event and calls end. It's a helper to make the
+// common case of writing out a single event less error-prone.
+func (e traceEventWriter) commit(ev traceEv, args ...traceArg) {
+       e = e.write(ev, args...)
+       e.end()
+}
+
+// write writes an event into the trace.
+func (e traceEventWriter) write(ev traceEv, args ...traceArg) traceEventWriter {
+       e.w = e.w.event(ev, args...)
+       return e
+}
+
+// end finishes writing to the trace. The traceEventWriter must not be used after this call.
+func (e traceEventWriter) end() {
+       e.w.end()
+}
+
+// traceEventWrite is the part of traceEvent that actually writes the event.
+func (w traceWriter) event(ev traceEv, args ...traceArg) traceWriter {
+       // Make sure we have room.
+       w, _ = w.ensure(1 + (len(args)+1)*traceBytesPerNumber)
+
+       // Compute the timestamp diff that we'll put in the trace.
+       ts := traceClockNow()
+       if ts <= w.traceBuf.lastTime {
+               ts = w.traceBuf.lastTime + 1
+       }
+       tsDiff := uint64(ts - w.traceBuf.lastTime)
+       w.traceBuf.lastTime = ts
+
+       // Write out event.
+       w.byte(byte(ev))
+       w.varint(tsDiff)
+       for _, arg := range args {
+               w.varint(uint64(arg))
+       }
+       return w
+}
+
+// stack takes a stack trace skipping the provided number of frames.
+// It then returns a traceArg representing that stack which may be
+// passed to write.
+func (tl traceLocker) stack(skip int) traceArg {
+       return traceArg(traceStack(skip, tl.mp, tl.gen))
+}
+
+// startPC takes a start PC for a goroutine and produces a unique
+// stack ID for it.
+//
+// It then returns a traceArg representing that stack which may be
+// passed to write.
+func (tl traceLocker) startPC(pc uintptr) traceArg {
+       // +PCQuantum because makeTraceFrame expects return PCs and subtracts PCQuantum.
+       return traceArg(trace.stackTab[tl.gen%2].put([]uintptr{
+               logicalStackSentinel,
+               startPCForTrace(pc) + sys.PCQuantum,
+       }))
+}
+
+// string returns a traceArg representing s which may be passed to write.
+// The string is assumed to be relatively short and popular, so it may be
+// stored for a while in the string dictionary.
+func (tl traceLocker) string(s string) traceArg {
+       return traceArg(trace.stringTab[tl.gen%2].put(tl.gen, s))
+}
+
+// uniqueString returns a traceArg representing s which may be passed to write.
+// The string is assumed to be unique or long, so it will be written out to
+// the trace eagerly.
+func (tl traceLocker) uniqueString(s string) traceArg {
+       return traceArg(trace.stringTab[tl.gen%2].emit(tl.gen, s))
+}
diff --git a/src/runtime/trace2map.go b/src/runtime/trace2map.go
new file mode 100644 (file)
index 0000000..4a5a7ec
--- /dev/null
@@ -0,0 +1,145 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Simple hash table for tracing. Provides a mapping
+// between variable-length data and a unique ID. Subsequent
+// puts of the same data will return the same ID.
+//
+// Uses a region-based allocation scheme and assumes that the
+// table doesn't ever grow very big.
+//
+// This is definitely not a general-purpose hash table! It avoids
+// doing any high-level Go operations so it's safe to use even in
+// sensitive contexts.
+
+package runtime
+
+import (
+       "runtime/internal/atomic"
+       "runtime/internal/sys"
+       "unsafe"
+)
+
+type traceMap struct {
+       lock mutex // Must be acquired on the system stack
+       seq  atomic.Uint64
+       mem  traceRegionAlloc
+       tab  [1 << 13]atomic.UnsafePointer // *traceMapNode (can't use generics because it's notinheap)
+}
+
+type traceMapNode struct {
+       _    sys.NotInHeap
+       link atomic.UnsafePointer // *traceMapNode (can't use generics because it's notinheap)
+       hash uintptr
+       id   uint64
+       data []byte
+}
+
+// next is a type-safe wrapper around link.
+func (n *traceMapNode) next() *traceMapNode {
+       return (*traceMapNode)(n.link.Load())
+}
+
+// stealID steals an ID from the table, ensuring that it will not
+// appear in the table anymore.
+func (tab *traceMap) stealID() uint64 {
+       return tab.seq.Add(1)
+}
+
+// put inserts the data into the table.
+//
+// It's always safe to noescape data because its bytes are always copied.
+//
+// Returns a unique ID for the data and whether this is the first time
+// the data has been added to the map.
+func (tab *traceMap) put(data unsafe.Pointer, size uintptr) (uint64, bool) {
+       if size == 0 {
+               return 0, false
+       }
+       hash := memhash(data, 0, size)
+       // First, search the hashtable w/o the mutex.
+       if id := tab.find(data, size, hash); id != 0 {
+               return id, false
+       }
+       // Now, double check under the mutex.
+       // Switch to the system stack so we can acquire tab.lock
+       var id uint64
+       var added bool
+       systemstack(func() {
+               lock(&tab.lock)
+               if id = tab.find(data, size, hash); id != 0 {
+                       unlock(&tab.lock)
+                       return
+               }
+               // Create new record.
+               id = tab.seq.Add(1)
+               vd := tab.newTraceMapNode(data, size, hash, id)
+
+               // Insert it into the table.
+               //
+               // Update the link first, since the node isn't published yet.
+               // Then, store the node in the table as the new first node
+               // for the bucket.
+               part := int(hash % uintptr(len(tab.tab)))
+               vd.link.StoreNoWB(tab.tab[part].Load())
+               tab.tab[part].StoreNoWB(unsafe.Pointer(vd))
+               unlock(&tab.lock)
+
+               added = true
+       })
+       return id, added
+}
+
+// find looks up data in the table, assuming hash is a hash of data.
+//
+// Returns 0 if the data is not found, and the unique ID for it if it is.
+func (tab *traceMap) find(data unsafe.Pointer, size, hash uintptr) uint64 {
+       part := int(hash % uintptr(len(tab.tab)))
+       for vd := tab.bucket(part); vd != nil; vd = vd.next() {
+               // Synchronization not necessary. Once published to the table, these
+               // values are immutable.
+               if vd.hash == hash && uintptr(len(vd.data)) == size {
+                       if memequal(unsafe.Pointer(&vd.data[0]), data, size) {
+                               return vd.id
+                       }
+               }
+       }
+       return 0
+}
+
+// bucket is a type-safe wrapper for looking up a value in tab.tab.
+func (tab *traceMap) bucket(part int) *traceMapNode {
+       return (*traceMapNode)(tab.tab[part].Load())
+}
+
+func (tab *traceMap) newTraceMapNode(data unsafe.Pointer, size, hash uintptr, id uint64) *traceMapNode {
+       // Create data array.
+       sl := notInHeapSlice{
+               array: tab.mem.alloc(size),
+               len:   int(size),
+               cap:   int(size),
+       }
+       memmove(unsafe.Pointer(sl.array), data, size)
+
+       // Create metadata structure.
+       meta := (*traceMapNode)(unsafe.Pointer(tab.mem.alloc(unsafe.Sizeof(traceMapNode{}))))
+       *(*notInHeapSlice)(unsafe.Pointer(&meta.data)) = sl
+       meta.id = id
+       meta.hash = hash
+       return meta
+}
+
+// reset drops all allocated memory from the table and resets it.
+//
+// tab.lock must be held. Must run on the system stack because of this.
+//
+//go:systemstack
+func (tab *traceMap) reset() {
+       assertLockHeld(&tab.lock)
+       tab.mem.drop()
+       tab.seq.Store(0)
+       tab.tab = [1 << 13]atomic.UnsafePointer{}
+}
diff --git a/src/runtime/trace2region.go b/src/runtime/trace2region.go
new file mode 100644 (file)
index 0000000..b514d12
--- /dev/null
@@ -0,0 +1,62 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Simple not-in-heap bump-pointer traceRegion allocator.
+
+package runtime
+
+import (
+       "internal/goarch"
+       "runtime/internal/sys"
+       "unsafe"
+)
+
+// traceRegionAlloc is a non-thread-safe region allocator.
+// It holds a linked list of traceRegionAllocBlock.
+type traceRegionAlloc struct {
+       head *traceRegionAllocBlock
+       off  uintptr
+}
+
+// traceRegionAllocBlock is a block in traceRegionAlloc.
+//
+// traceRegionAllocBlock is allocated from non-GC'd memory, so it must not
+// contain heap pointers. Writes to pointers to traceRegionAllocBlocks do
+// not need write barriers.
+type traceRegionAllocBlock struct {
+       _    sys.NotInHeap
+       next *traceRegionAllocBlock
+       data [64<<10 - goarch.PtrSize]byte
+}
+
+// alloc allocates n-byte block.
+func (a *traceRegionAlloc) alloc(n uintptr) *notInHeap {
+       n = alignUp(n, goarch.PtrSize)
+       if a.head == nil || a.off+n > uintptr(len(a.head.data)) {
+               if n > uintptr(len(a.head.data)) {
+                       throw("traceRegion: alloc too large")
+               }
+               block := (*traceRegionAllocBlock)(sysAlloc(unsafe.Sizeof(traceRegionAllocBlock{}), &memstats.other_sys))
+               if block == nil {
+                       throw("traceRegion: out of memory")
+               }
+               block.next = a.head
+               a.head = block
+               a.off = 0
+       }
+       p := &a.head.data[a.off]
+       a.off += n
+       return (*notInHeap)(unsafe.Pointer(p))
+}
+
+// drop frees all previously allocated memory and resets the allocator.
+func (a *traceRegionAlloc) drop() {
+       for a.head != nil {
+               block := a.head
+               a.head = block.next
+               sysFree(unsafe.Pointer(block), unsafe.Sizeof(traceRegionAllocBlock{}), &memstats.other_sys)
+       }
+}
diff --git a/src/runtime/trace2runtime.go b/src/runtime/trace2runtime.go
new file mode 100644 (file)
index 0000000..74aeb57
--- /dev/null
@@ -0,0 +1,695 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Runtime -> tracer API.
+
+package runtime
+
+import (
+       "runtime/internal/atomic"
+       _ "unsafe" // for go:linkname
+)
+
+// gTraceState is per-G state for the tracer.
+type gTraceState struct {
+       traceSchedResourceState
+}
+
+// reset resets the gTraceState for a new goroutine.
+func (s *gTraceState) reset() {
+       s.seq = [2]uint64{}
+       // N.B. s.statusTraced is managed and cleared separately.
+}
+
+// mTraceState is per-M state for the tracer.
+type mTraceState struct {
+       seqlock atomic.Uintptr // seqlock indicating that this M is writing to a trace buffer.
+       buf     [2]*traceBuf   // Per-M traceBuf for writing. Indexed by trace.gen%2.
+       link    *m             // Snapshot of alllink or freelink.
+}
+
+// pTraceState is per-P state for the tracer.
+type pTraceState struct {
+       traceSchedResourceState
+
+       // mSyscallID is the ID of the M this was bound to before entering a syscall.
+       mSyscallID int64
+
+       // maySweep indicates the sweep events should be traced.
+       // This is used to defer the sweep start event until a span
+       // has actually been swept.
+       maySweep bool
+
+       // inSweep indicates that at least one sweep event has been traced.
+       inSweep bool
+
+       // swept and reclaimed track the number of bytes swept and reclaimed
+       // by sweeping in the current sweep loop (while maySweep was true).
+       swept, reclaimed uintptr
+}
+
+// traceLockInit initializes global trace locks.
+func traceLockInit() {
+       // Sharing a lock rank here is fine because they should never be accessed
+       // together. If they are, we want to find out immediately.
+       lockInit(&trace.stringTab[0].lock, lockRankTraceStrings)
+       lockInit(&trace.stringTab[0].tab.lock, lockRankTraceStrings)
+       lockInit(&trace.stringTab[1].lock, lockRankTraceStrings)
+       lockInit(&trace.stringTab[1].tab.lock, lockRankTraceStrings)
+       lockInit(&trace.stackTab[0].tab.lock, lockRankTraceStackTab)
+       lockInit(&trace.stackTab[1].tab.lock, lockRankTraceStackTab)
+       lockInit(&trace.lock, lockRankTrace)
+}
+
+// lockRankMayTraceFlush records the lock ranking effects of a
+// potential call to traceFlush.
+//
+// nosplit because traceAcquire is nosplit.
+//
+//go:nosplit
+func lockRankMayTraceFlush() {
+       lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock))
+}
+
+// traceBlockReason is an enumeration of reasons a goroutine might block.
+// This is the interface the rest of the runtime uses to tell the
+// tracer why a goroutine blocked. The tracer then propagates this information
+// into the trace however it sees fit.
+//
+// Note that traceBlockReasons should not be compared, since reasons that are
+// distinct by name may *not* be distinct by value.
+type traceBlockReason uint8
+
+const (
+       traceBlockGeneric traceBlockReason = iota
+       traceBlockForever
+       traceBlockNet
+       traceBlockSelect
+       traceBlockCondWait
+       traceBlockSync
+       traceBlockChanSend
+       traceBlockChanRecv
+       traceBlockGCMarkAssist
+       traceBlockGCSweep
+       traceBlockSystemGoroutine
+       traceBlockPreempted
+       traceBlockDebugCall
+       traceBlockUntilGCEnds
+       traceBlockSleep
+)
+
+var traceBlockReasonStrings = [...]string{
+       traceBlockGeneric:         "unspecified",
+       traceBlockForever:         "forever",
+       traceBlockNet:             "network",
+       traceBlockSelect:          "select",
+       traceBlockCondWait:        "sync.(*Cond).Wait",
+       traceBlockSync:            "sync",
+       traceBlockChanSend:        "chan send",
+       traceBlockChanRecv:        "chan receive",
+       traceBlockGCMarkAssist:    "GC mark assist wait for work",
+       traceBlockGCSweep:         "GC background sweeper wait",
+       traceBlockSystemGoroutine: "system goroutine wait",
+       traceBlockPreempted:       "preempted",
+       traceBlockDebugCall:       "wait for debug call",
+       traceBlockUntilGCEnds:     "wait until GC ends",
+       traceBlockSleep:           "sleep",
+}
+
+// traceGoStopReason is an enumeration of reasons a goroutine might yield.
+//
+// Note that traceGoStopReasons should not be compared, since reasons that are
+// distinct by name may *not* be distinct by value.
+type traceGoStopReason uint8
+
+const (
+       traceGoStopGeneric traceGoStopReason = iota
+       traceGoStopGoSched
+       traceGoStopPreempted
+)
+
+var traceGoStopReasonStrings = [...]string{
+       traceGoStopGeneric:   "unspecified",
+       traceGoStopGoSched:   "runtime.GoSched",
+       traceGoStopPreempted: "preempted",
+}
+
+// traceEnabled returns true if the trace is currently enabled.
+//
+//go:nosplit
+func traceEnabled() bool {
+       return trace.gen.Load() != 0
+}
+
+// traceShuttingDown returns true if the trace is currently shutting down.
+func traceShuttingDown() bool {
+       return trace.shutdown.Load()
+}
+
+// traceLocker represents an M writing trace events. While a traceLocker value
+// is valid, the tracer observes all operations on the G/M/P or trace events being
+// written as happening atomically.
+type traceLocker struct {
+       mp  *m
+       gen uintptr
+}
+
+// debugTraceReentrancy checks if the trace is reentrant.
+//
+// This is optional because throwing in a function makes it instantly
+// not inlineable, and we want traceAcquire to be inlineable for
+// low overhead when the trace is disabled.
+const debugTraceReentrancy = false
+
+// traceAcquire prepares this M for writing one or more trace events.
+//
+// nosplit because it's called on the syscall path when stack movement is forbidden.
+//
+//go:nosplit
+func traceAcquire() traceLocker {
+       if !traceEnabled() {
+               return traceLocker{}
+       }
+       return traceAcquireEnabled()
+}
+
+// traceAcquireEnabled is the traceEnabled path for traceAcquire. It's explicitly
+// broken out to make traceAcquire inlineable to keep the overhead of the tracer
+// when it's disabled low.
+//
+// nosplit because it's called by traceAcquire, which is nosplit.
+//
+//go:nosplit
+func traceAcquireEnabled() traceLocker {
+       // Any time we acquire a traceLocker, we may flush a trace buffer. But
+       // buffer flushes are rare. Record the lock edge even if it doesn't happen
+       // this time.
+       lockRankMayTraceFlush()
+
+       // Prevent preemption.
+       mp := acquirem()
+
+       // Acquire the trace seqlock.
+       seq := mp.trace.seqlock.Add(1)
+       if debugTraceReentrancy && seq%2 != 1 {
+               throw("bad use of trace.seqlock or tracer is reentrant")
+       }
+
+       // N.B. This load of gen appears redundant with the one in traceEnabled.
+       // However, it's very important that the gen we use for writing to the trace
+       // is acquired under a traceLocker so traceAdvance can make sure no stale
+       // gen values are being used.
+       //
+       // Because we're doing this load again, it also means that the trace
+       // might end up being disabled when we load it. In that case we need to undo
+       // what we did and bail.
+       gen := trace.gen.Load()
+       if gen == 0 {
+               mp.trace.seqlock.Add(1)
+               releasem(mp)
+               return traceLocker{}
+       }
+       return traceLocker{mp, gen}
+}
+
+// ok returns true if the traceLocker is valid (i.e. tracing is enabled).
+//
+// nosplit because it's called on the syscall path when stack movement is forbidden.
+//
+//go:nosplit
+func (tl traceLocker) ok() bool {
+       return tl.gen != 0
+}
+
+// traceRelease indicates that this M is done writing trace events.
+//
+// nosplit because it's called on the syscall path when stack movement is forbidden.
+//
+//go:nosplit
+func traceRelease(tl traceLocker) {
+       seq := tl.mp.trace.seqlock.Add(1)
+       if debugTraceReentrancy && seq%2 != 0 {
+               print("runtime: seq=", seq, "\n")
+               throw("bad use of trace.seqlock")
+       }
+       releasem(tl.mp)
+}
+
+// traceExitingSyscall marks a goroutine as exiting the syscall slow path.
+//
+// Must be paired with a traceExitedSyscall call.
+func traceExitingSyscall() {
+       trace.exitingSyscall.Add(1)
+}
+
+// traceExitedSyscall marks a goroutine as having exited the syscall slow path.
+func traceExitedSyscall() {
+       trace.exitingSyscall.Add(-1)
+}
+
+// Gomaxprocs emits a ProcsChange event.
+func (tl traceLocker) Gomaxprocs(procs int32) {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvProcsChange, traceArg(procs), tl.stack(1))
+}
+
+// ProcStart traces a ProcStart event.
+//
+// Must be called with a valid P.
+func (tl traceLocker) ProcStart() {
+       pp := tl.mp.p.ptr()
+       // Procs are typically started within the scheduler when there is no user goroutine. If there is a user goroutine,
+       // it must be in _Gsyscall because the only time a goroutine is allowed to have its Proc moved around from under it
+       // is during a syscall.
+       tl.eventWriter(traceGoSyscall, traceProcIdle).commit(traceEvProcStart, traceArg(pp.id), pp.trace.nextSeq(tl.gen))
+}
+
+// ProcStop traces a ProcStop event.
+func (tl traceLocker) ProcStop(pp *p) {
+       // The only time a goroutine is allowed to have its Proc moved around
+       // from under it is during a syscall.
+       tl.eventWriter(traceGoSyscall, traceProcRunning).commit(traceEvProcStop)
+}
+
+// GCActive traces a GCActive event.
+//
+// Must be emitted by an actively running goroutine on an active P. This restriction can be changed
+// easily and only depends on where it's currently called.
+func (tl traceLocker) GCActive() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCActive, traceArg(trace.seqGC))
+       // N.B. Only one GC can be running at a time, so this is naturally
+       // serialized by the caller.
+       trace.seqGC++
+}
+
+// GCStart traces a GCBegin event.
+//
+// Must be emitted by an actively running goroutine on an active P. This restriction can be changed
+// easily and only depends on where it's currently called.
+func (tl traceLocker) GCStart() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCBegin, traceArg(trace.seqGC), tl.stack(3))
+       // N.B. Only one GC can be running at a time, so this is naturally
+       // serialized by the caller.
+       trace.seqGC++
+}
+
+// GCDone traces a GCEnd event.
+//
+// Must be emitted by an actively running goroutine on an active P. This restriction can be changed
+// easily and only depends on where it's currently called.
+func (tl traceLocker) GCDone() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCEnd, traceArg(trace.seqGC))
+       // N.B. Only one GC can be running at a time, so this is naturally
+       // serialized by the caller.
+       trace.seqGC++
+}
+
+// STWStart traces a STWBegin event.
+func (tl traceLocker) STWStart(reason stwReason) {
+       // Although the current P may be in _Pgcstop here, we model the P as running during the STW. This deviates from the
+       // runtime's state tracking, but it's more accurate and doesn't result in any loss of information.
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvSTWBegin, tl.string(reason.String()), tl.stack(2))
+}
+
+// STWDone traces a STWEnd event.
+func (tl traceLocker) STWDone() {
+       // Although the current P may be in _Pgcstop here, we model the P as running during the STW. This deviates from the
+       // runtime's state tracking, but it's more accurate and doesn't result in any loss of information.
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvSTWEnd)
+}
+
+// GCSweepStart prepares to trace a sweep loop. This does not
+// emit any events until traceGCSweepSpan is called.
+//
+// GCSweepStart must be paired with traceGCSweepDone and there
+// must be no preemption points between these two calls.
+//
+// Must be called with a valid P.
+func (tl traceLocker) GCSweepStart() {
+       // Delay the actual GCSweepBegin event until the first span
+       // sweep. If we don't sweep anything, don't emit any events.
+       pp := tl.mp.p.ptr()
+       if pp.trace.maySweep {
+               throw("double traceGCSweepStart")
+       }
+       pp.trace.maySweep, pp.trace.swept, pp.trace.reclaimed = true, 0, 0
+}
+
+// GCSweepSpan traces the sweep of a single span. If this is
+// the first span swept since traceGCSweepStart was called, this
+// will emit a GCSweepBegin event.
+//
+// This may be called outside a traceGCSweepStart/traceGCSweepDone
+// pair; however, it will not emit any trace events in this case.
+//
+// Must be called with a valid P.
+func (tl traceLocker) GCSweepSpan(bytesSwept uintptr) {
+       pp := tl.mp.p.ptr()
+       if pp.trace.maySweep {
+               if pp.trace.swept == 0 {
+                       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCSweepBegin, tl.stack(1))
+                       pp.trace.inSweep = true
+               }
+               pp.trace.swept += bytesSwept
+       }
+}
+
+// GCSweepDone finishes tracing a sweep loop. If any memory was
+// swept (i.e. traceGCSweepSpan emitted an event) then this will emit
+// a GCSweepEnd event.
+//
+// Must be called with a valid P.
+func (tl traceLocker) GCSweepDone() {
+       pp := tl.mp.p.ptr()
+       if !pp.trace.maySweep {
+               throw("missing traceGCSweepStart")
+       }
+       if pp.trace.inSweep {
+               tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCSweepEnd, traceArg(pp.trace.swept), traceArg(pp.trace.reclaimed))
+               pp.trace.inSweep = false
+       }
+       pp.trace.maySweep = false
+}
+
+// GCMarkAssistStart emits a MarkAssistBegin event.
+func (tl traceLocker) GCMarkAssistStart() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCMarkAssistBegin, tl.stack(1))
+}
+
+// GCMarkAssistDone emits a MarkAssistEnd event.
+func (tl traceLocker) GCMarkAssistDone() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGCMarkAssistEnd)
+}
+
+// GoCreate emits a GoCreate event.
+func (tl traceLocker) GoCreate(newg *g, pc uintptr) {
+       newg.trace.setStatusTraced(tl.gen)
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGoCreate, traceArg(newg.goid), tl.startPC(pc), tl.stack(2))
+}
+
+// GoStart emits a GoStart event.
+//
+// Must be called with a valid P.
+func (tl traceLocker) GoStart() {
+       gp := getg().m.curg
+       pp := gp.m.p
+       w := tl.eventWriter(traceGoRunnable, traceProcRunning)
+       w = w.write(traceEvGoStart, traceArg(gp.goid), gp.trace.nextSeq(tl.gen))
+       if pp.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {
+               w = w.write(traceEvGoLabel, trace.markWorkerLabels[tl.gen%2][pp.ptr().gcMarkWorkerMode])
+       }
+       w.end()
+}
+
+// GoEnd emits a GoDestroy event.
+//
+// TODO(mknyszek): Rename this to GoDestroy.
+func (tl traceLocker) GoEnd() {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGoDestroy)
+}
+
+// GoSched emits a GoStop event with a GoSched reason.
+func (tl traceLocker) GoSched() {
+       tl.GoStop(traceGoStopGoSched)
+}
+
+// GoPreempt emits a GoStop event with a GoPreempted reason.
+func (tl traceLocker) GoPreempt() {
+       tl.GoStop(traceGoStopPreempted)
+}
+
+// GoStop emits a GoStop event with the provided reason.
+func (tl traceLocker) GoStop(reason traceGoStopReason) {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGoStop, traceArg(trace.goStopReasons[tl.gen%2][reason]), tl.stack(1))
+}
+
+// GoPark emits a GoBlock event with the provided reason.
+//
+// TODO(mknyszek): Replace traceBlockReason with waitReason. It's silly
+// that we have both, and waitReason is way more descriptive.
+func (tl traceLocker) GoPark(reason traceBlockReason, skip int) {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGoBlock, traceArg(trace.goBlockReasons[tl.gen%2][reason]), tl.stack(skip))
+}
+
+// GoUnpark emits a GoUnblock event.
+func (tl traceLocker) GoUnpark(gp *g, skip int) {
+       // Emit a GoWaiting status if necessary for the unblocked goroutine.
+       w := tl.eventWriter(traceGoRunning, traceProcRunning)
+       if !gp.trace.statusWasTraced(tl.gen) && gp.trace.acquireStatus(tl.gen) {
+               // Careful: don't use the event writer. We never want status or in-progress events
+               // to trigger more in-progress events.
+               w.w = w.w.writeGoStatus(gp.goid, -1, traceGoWaiting, gp.inMarkAssist)
+       }
+       w.commit(traceEvGoUnblock, traceArg(gp.goid), gp.trace.nextSeq(tl.gen), tl.stack(skip))
+}
+
+// GoSysCall emits a GoSyscallBegin event.
+//
+// Must be called with a valid P.
+func (tl traceLocker) GoSysCall() {
+       var skip int
+       switch {
+       case tracefpunwindoff():
+               // Unwind by skipping 1 frame relative to gp.syscallsp which is captured 3
+               // results by hard coding the number of frames in between our caller and the
+               // actual syscall, see cases below.
+               // TODO(felixge): Implement gp.syscallbp to avoid this workaround?
+               skip = 1
+       case GOOS == "solaris" || GOOS == "illumos":
+               // These platforms don't use a libc_read_trampoline.
+               skip = 3
+       default:
+               // Skip the extra trampoline frame used on most systems.
+               skip = 4
+       }
+       // Scribble down the M that the P is currently attached to.
+       tl.mp.p.ptr().trace.mSyscallID = int64(tl.mp.procid)
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvGoSyscallBegin, tl.stack(skip))
+}
+
+// GoSysExit emits a GoSyscallEnd event, possibly along with a GoSyscallBlocked event
+// if lostP is true.
+//
+// lostP must be true in all cases that a goroutine loses its P during a syscall.
+// This means it's not sufficient to check if it has no P. In particular, it needs to be
+// true in the following cases:
+// - The goroutine lost its P, it ran some other code, and then got it back. It's now running with that P.
+// - The goroutine lost its P and was unable to reacquire it, and is now running without a P.
+// - The goroutine lost its P and acquired a different one, and is now running with that P.
+func (tl traceLocker) GoSysExit(lostP bool) {
+       ev := traceEvGoSyscallEnd
+       procStatus := traceProcSyscall // Procs implicitly enter traceProcSyscall on GoSyscallBegin.
+       if lostP {
+               ev = traceEvGoSyscallEndBlocked
+               procStatus = traceProcRunning // If a G has a P when emitting this event, it reacquired a P and is indeed running.
+       } else {
+               tl.mp.p.ptr().trace.mSyscallID = -1
+       }
+       tl.eventWriter(traceGoSyscall, procStatus).commit(ev)
+}
+
+// ProcSteal indicates that our current M stole a P from another M.
+//
+// forMe indicates that the caller is stealing pp to wire it up to itself.
+//
+// The caller must have ownership of pp.
+func (tl traceLocker) ProcSteal(pp *p, forMe bool) {
+       // Grab the M ID we stole from.
+       mStolenFrom := pp.trace.mSyscallID
+       pp.trace.mSyscallID = -1
+
+       // The status of the proc and goroutine, if we need to emit one here, is not evident from the
+       // context of just emitting this event alone. There are two cases. Either we're trying to steal
+       // the P just to get its attention (e.g. STW or sysmon retake) or we're trying to steal a P for
+       // ourselves specifically to keep running. The two contexts look different, but can be summarized
+       // fairly succinctly. In the former, we're a regular running goroutine and proc, if we have either.
+       // In the latter, we're a goroutine in a syscall,
+       goStatus := traceGoRunning
+       procStatus := traceProcRunning
+       if forMe {
+               goStatus = traceGoSyscall
+               procStatus = traceProcSyscallAbandoned
+       }
+       w := tl.eventWriter(goStatus, procStatus)
+
+       // Emit the status of the P we're stealing. We may have *just* done this, but we may not have,
+       // even if forMe is true, depending on whether we wired the P to ourselves already.
+       if !pp.trace.statusWasTraced(tl.gen) && pp.trace.acquireStatus(tl.gen) {
+               // Careful: don't use the event writer. We never want status or in-progress events
+               // to trigger more in-progress events.
+               w.w = w.w.writeProcStatus(uint64(pp.id), traceProcSyscallAbandoned, pp.trace.inSweep)
+       }
+       w.commit(traceEvProcSteal, traceArg(pp.id), pp.trace.nextSeq(tl.gen), traceArg(mStolenFrom))
+}
+
+// GoSysBlock is a no-op in the new tracer.
+func (tl traceLocker) GoSysBlock(pp *p) {
+}
+
+// HeapAlloc emits a HeapAlloc event.
+func (tl traceLocker) HeapAlloc(live uint64) {
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvHeapAlloc, traceArg(live))
+}
+
+// HeapGoal reads the current heap goal and emits a HeapGoal event.
+func (tl traceLocker) HeapGoal() {
+       heapGoal := gcController.heapGoal()
+       if heapGoal == ^uint64(0) {
+               // Heap-based triggering is disabled.
+               heapGoal = 0
+       }
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvHeapGoal, traceArg(heapGoal))
+}
+
+// OneNewExtraM is a no-op in the new tracer. This is worth keeping around though because
+// it's a good place to insert a thread-level event about the new extra M.
+func (tl traceLocker) OneNewExtraM(_ *g) {
+}
+
+// GoCreateSyscall indicates that a goroutine has transitioned from dead to GoSyscall.
+//
+// Unlike GoCreate, the caller must be running on gp.
+//
+// This occurs when C code calls into Go. On pthread platforms it occurs only when
+// a C thread calls into Go code for the first time.
+func (tl traceLocker) GoCreateSyscall(gp *g) {
+       // N.B. We should never trace a status for this goroutine (which we're currently running on),
+       // since we want this to appear like goroutine creation.
+       gp.trace.setStatusTraced(tl.gen)
+       tl.eventWriter(traceGoBad, traceProcBad).commit(traceEvGoCreateSyscall, traceArg(gp.goid))
+}
+
+// GoDestroySyscall indicates that a goroutine has transitioned from GoSyscall to dead.
+//
+// Must not have a P.
+//
+// This occurs when Go code returns back to C. On pthread platforms it occurs only when
+// the C thread is destroyed.
+func (tl traceLocker) GoDestroySyscall() {
+       // N.B. If we trace a status here, we must never have a P, and we must be on a goroutine
+       // that is in the syscall state.
+       tl.eventWriter(traceGoSyscall, traceProcBad).commit(traceEvGoDestroySyscall)
+}
+
+// To access runtime functions from runtime/trace.
+// See runtime/trace/annotation.go
+
+// trace_userTaskCreate emits a UserTaskCreate event.
+//
+//go:linkname trace_userTaskCreate runtime/trace.userTaskCreate
+func trace_userTaskCreate(id, parentID uint64, taskType string) {
+       tl := traceAcquire()
+       if !tl.ok() {
+               // Need to do this check because the caller won't have it.
+               return
+       }
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvUserTaskBegin, traceArg(id), traceArg(parentID), tl.string(taskType), tl.stack(3))
+       traceRelease(tl)
+}
+
+// trace_userTaskEnd emits a UserTaskEnd event.
+//
+//go:linkname trace_userTaskEnd runtime/trace.userTaskEnd
+func trace_userTaskEnd(id uint64) {
+       tl := traceAcquire()
+       if !tl.ok() {
+               // Need to do this check because the caller won't have it.
+               return
+       }
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvUserTaskEnd, traceArg(id), tl.stack(2))
+       traceRelease(tl)
+}
+
+// trace_userTaskEnd emits a UserRegionBegin or UserRegionEnd event,
+// depending on mode (0 == Begin, 1 == End).
+//
+// TODO(mknyszek): Just make this two functions.
+//
+//go:linkname trace_userRegion runtime/trace.userRegion
+func trace_userRegion(id, mode uint64, name string) {
+       tl := traceAcquire()
+       if !tl.ok() {
+               // Need to do this check because the caller won't have it.
+               return
+       }
+       var ev traceEv
+       switch mode {
+       case 0:
+               ev = traceEvUserRegionBegin
+       case 1:
+               ev = traceEvUserRegionEnd
+       default:
+               return
+       }
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(ev, traceArg(id), tl.string(name), tl.stack(3))
+       traceRelease(tl)
+}
+
+// trace_userTaskEnd emits a UserRegionBegin or UserRegionEnd event.
+//
+//go:linkname trace_userLog runtime/trace.userLog
+func trace_userLog(id uint64, category, message string) {
+       tl := traceAcquire()
+       if !tl.ok() {
+               // Need to do this check because the caller won't have it.
+               return
+       }
+       tl.eventWriter(traceGoRunning, traceProcRunning).commit(traceEvUserLog, traceArg(id), tl.string(category), tl.uniqueString(message), tl.stack(3))
+       traceRelease(tl)
+}
+
+// traceProcFree is called when a P is destroyed.
+//
+// This must run on the system stack to match the old tracer.
+//
+//go:systemstack
+func traceProcFree(_ *p) {
+}
+
+// traceThreadDestroy is called when a thread is removed from
+// sched.freem.
+//
+// mp must not be able to emit trace events anymore.
+//
+// sched.lock must be held to synchronize with traceAdvance.
+func traceThreadDestroy(mp *m) {
+       assertLockHeld(&sched.lock)
+
+       // Flush all outstanding buffers to maintain the invariant
+       // that an M only has active buffers while on sched.freem
+       // or allm.
+       //
+       // Perform a traceAcquire/traceRelease on behalf of mp to
+       // synchronize with the tracer trying to flush our buffer
+       // as well.
+       seq := mp.trace.seqlock.Add(1)
+       if debugTraceReentrancy && seq%2 != 1 {
+               throw("bad use of trace.seqlock or tracer is reentrant")
+       }
+       systemstack(func() {
+               lock(&trace.lock)
+               for i := range mp.trace.buf {
+                       if mp.trace.buf[i] != nil {
+                               // N.B. traceBufFlush accepts a generation, but it
+                               // really just cares about gen%2.
+                               traceBufFlush(mp.trace.buf[i], uintptr(i))
+                               mp.trace.buf[i] = nil
+                       }
+               }
+               unlock(&trace.lock)
+       })
+       seq1 := mp.trace.seqlock.Add(1)
+       if seq1 != seq+1 {
+               print("runtime: seq1=", seq1, "\n")
+               throw("bad use of trace.seqlock")
+       }
+}
+
+// Not used in the new tracer; solely for compatibility with the old tracer.
+// nosplit because it's called from exitsyscall without a P.
+//
+//go:nosplit
+func (_ traceLocker) RecordSyscallExitedTime(_ *g, _ *p) {
+}
diff --git a/src/runtime/trace2stack.go b/src/runtime/trace2stack.go
new file mode 100644 (file)
index 0000000..ebfe7c5
--- /dev/null
@@ -0,0 +1,282 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace stack table and acquisition.
+
+package runtime
+
+import (
+       "internal/abi"
+       "internal/goarch"
+       "unsafe"
+)
+
+const (
+       // Maximum number of PCs in a single stack trace.
+       // Since events contain only stack id rather than whole stack trace,
+       // we can allow quite large values here.
+       traceStackSize = 128
+
+       // logicalStackSentinel is a sentinel value at pcBuf[0] signifying that
+       // pcBuf[1:] holds a logical stack requiring no further processing. Any other
+       // value at pcBuf[0] represents a skip value to apply to the physical stack in
+       // pcBuf[1:] after inline expansion.
+       logicalStackSentinel = ^uintptr(0)
+)
+
+// traceStack captures a stack trace and registers it in the trace stack table.
+// It then returns its unique ID.
+//
+// skip controls the number of leaf frames to omit in order to hide tracer internals
+// from stack traces, see CL 5523.
+//
+// Avoid calling this function directly. gen needs to be the current generation
+// that this stack trace is being written out for, which needs to be synchronized with
+// generations moving forward. Prefer traceEventWriter.stack.
+func traceStack(skip int, mp *m, gen uintptr) uint64 {
+       var pcBuf [traceStackSize]uintptr
+
+       gp := getg()
+       curgp := gp.m.curg
+       nstk := 1
+       if tracefpunwindoff() || mp.hasCgoOnStack() {
+               // Slow path: Unwind using default unwinder. Used when frame pointer
+               // unwinding is unavailable or disabled (tracefpunwindoff), or might
+               // produce incomplete results or crashes (hasCgoOnStack). Note that no
+               // cgo callback related crashes have been observed yet. The main
+               // motivation is to take advantage of a potentially registered cgo
+               // symbolizer.
+               pcBuf[0] = logicalStackSentinel
+               if curgp == gp {
+                       nstk += callers(skip+1, pcBuf[1:])
+               } else if curgp != nil {
+                       nstk += gcallers(curgp, skip, pcBuf[1:])
+               }
+       } else {
+               // Fast path: Unwind using frame pointers.
+               pcBuf[0] = uintptr(skip)
+               if curgp == gp {
+                       nstk += fpTracebackPCs(unsafe.Pointer(getfp()), pcBuf[1:])
+               } else if curgp != nil {
+                       // We're called on the g0 stack through mcall(fn) or systemstack(fn). To
+                       // behave like gcallers above, we start unwinding from sched.bp, which
+                       // points to the caller frame of the leaf frame on g's stack. The return
+                       // address of the leaf frame is stored in sched.pc, which we manually
+                       // capture here.
+                       pcBuf[1] = curgp.sched.pc
+                       nstk += 1 + fpTracebackPCs(unsafe.Pointer(curgp.sched.bp), pcBuf[2:])
+               }
+       }
+       if nstk > 0 {
+               nstk-- // skip runtime.goexit
+       }
+       if nstk > 0 && curgp.goid == 1 {
+               nstk-- // skip runtime.main
+       }
+       id := trace.stackTab[gen%2].put(pcBuf[:nstk])
+       return id
+}
+
+// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids.
+// It is lock-free for reading.
+type traceStackTable struct {
+       tab traceMap
+}
+
+// put returns a unique id for the stack trace pcs and caches it in the table,
+// if it sees the trace for the first time.
+func (t *traceStackTable) put(pcs []uintptr) uint64 {
+       if len(pcs) == 0 {
+               return 0
+       }
+       id, _ := t.tab.put(noescape(unsafe.Pointer(&pcs[0])), uintptr(len(pcs))*unsafe.Sizeof(uintptr(0)))
+       return id
+}
+
+// dump writes all previously cached stacks to trace buffers,
+// releases all memory and resets state.
+//
+// This must run on the system stack because it flushes buffers and thus
+// may acquire trace.lock.
+//
+//go:systemstack
+func (t *traceStackTable) dump(gen uintptr) {
+       w := unsafeTraceWriter(gen, nil)
+
+       // Iterate over the table.
+       lock(&t.tab.lock)
+       for i := range t.tab.tab {
+               stk := t.tab.bucket(i)
+               for ; stk != nil; stk = stk.next() {
+                       stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&stk.data[0])), uintptr(len(stk.data))/unsafe.Sizeof(uintptr(0)))
+
+                       // N.B. This might allocate, but that's OK because we're not writing to the M's buffer,
+                       // but one we're about to create (with ensure).
+                       frames := makeTraceFrames(gen, fpunwindExpand(stack))
+
+                       // Returns the maximum number of bytes required to hold the encoded stack, given that
+                       // it contains N frames.
+                       maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber
+
+                       // Estimate the size of this record. This
+                       // bound is pretty loose, but avoids counting
+                       // lots of varint sizes.
+                       //
+                       // Add 1 because we might also write traceEvStacks.
+                       var flushed bool
+                       w, flushed = w.ensure(1 + maxBytes)
+                       if flushed {
+                               w.byte(byte(traceEvStacks))
+                       }
+
+                       // Emit stack event.
+                       w.byte(byte(traceEvStack))
+                       w.varint(uint64(stk.id))
+                       w.varint(uint64(len(frames)))
+                       for _, frame := range frames {
+                               w.varint(uint64(frame.PC))
+                               w.varint(frame.funcID)
+                               w.varint(frame.fileID)
+                               w.varint(frame.line)
+                       }
+               }
+       }
+       t.tab.reset()
+       unlock(&t.tab.lock)
+
+       w.flush().end()
+}
+
+// makeTraceFrames returns the frames corresponding to pcs. It may
+// allocate and may emit trace events.
+func makeTraceFrames(gen uintptr, pcs []uintptr) []traceFrame {
+       frames := make([]traceFrame, 0, len(pcs))
+       ci := CallersFrames(pcs)
+       for {
+               f, more := ci.Next()
+               frames = append(frames, makeTraceFrame(gen, f))
+               if !more {
+                       return frames
+               }
+       }
+}
+
+type traceFrame struct {
+       PC     uintptr
+       funcID uint64
+       fileID uint64
+       line   uint64
+}
+
+// makeTraceFrame sets up a traceFrame for a frame.
+func makeTraceFrame(gen uintptr, f Frame) traceFrame {
+       var frame traceFrame
+       frame.PC = f.PC
+
+       fn := f.Function
+       const maxLen = 1 << 10
+       if len(fn) > maxLen {
+               fn = fn[len(fn)-maxLen:]
+       }
+       frame.funcID = trace.stringTab[gen%2].put(gen, fn)
+       frame.line = uint64(f.Line)
+       file := f.File
+       if len(file) > maxLen {
+               file = file[len(file)-maxLen:]
+       }
+       frame.fileID = trace.stringTab[gen%2].put(gen, file)
+       return frame
+}
+
+// tracefpunwindoff returns true if frame pointer unwinding for the tracer is
+// disabled via GODEBUG or not supported by the architecture.
+func tracefpunwindoff() bool {
+       return debug.tracefpunwindoff != 0 || (goarch.ArchFamily != goarch.AMD64 && goarch.ArchFamily != goarch.ARM64)
+}
+
+// fpTracebackPCs populates pcBuf with the return addresses for each frame and
+// returns the number of PCs written to pcBuf. The returned PCs correspond to
+// "physical frames" rather than "logical frames"; that is if A is inlined into
+// B, this will return a PC for only B.
+func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) {
+       for i = 0; i < len(pcBuf) && fp != nil; i++ {
+               // return addr sits one word above the frame pointer
+               pcBuf[i] = *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
+               // follow the frame pointer to the next one
+               fp = unsafe.Pointer(*(*uintptr)(fp))
+       }
+       return i
+}
+
+// fpunwindExpand checks if pcBuf contains logical frames (which include inlined
+// frames) or physical frames (produced by frame pointer unwinding) using a
+// sentinel value in pcBuf[0]. Logical frames are simply returned without the
+// sentinel. Physical frames are turned into logical frames via inline unwinding
+// and by applying the skip value that's stored in pcBuf[0].
+func fpunwindExpand(pcBuf []uintptr) []uintptr {
+       if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
+               // pcBuf contains logical rather than inlined frames, skip has already been
+               // applied, just return it without the sentinel value in pcBuf[0].
+               return pcBuf[1:]
+       }
+
+       var (
+               lastFuncID = abi.FuncIDNormal
+               newPCBuf   = make([]uintptr, 0, traceStackSize)
+               skip       = pcBuf[0]
+               // skipOrAdd skips or appends retPC to newPCBuf and returns true if more
+               // pcs can be added.
+               skipOrAdd = func(retPC uintptr) bool {
+                       if skip > 0 {
+                               skip--
+                       } else {
+                               newPCBuf = append(newPCBuf, retPC)
+                       }
+                       return len(newPCBuf) < cap(newPCBuf)
+               }
+       )
+
+outer:
+       for _, retPC := range pcBuf[1:] {
+               callPC := retPC - 1
+               fi := findfunc(callPC)
+               if !fi.valid() {
+                       // There is no funcInfo if callPC belongs to a C function. In this case
+                       // we still keep the pc, but don't attempt to expand inlined frames.
+                       if more := skipOrAdd(retPC); !more {
+                               break outer
+                       }
+                       continue
+               }
+
+               u, uf := newInlineUnwinder(fi, callPC)
+               for ; uf.valid(); uf = u.next(uf) {
+                       sf := u.srcFunc(uf)
+                       if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
+                               // ignore wrappers
+                       } else if more := skipOrAdd(uf.pc + 1); !more {
+                               break outer
+                       }
+                       lastFuncID = sf.funcID
+               }
+       }
+       return newPCBuf
+}
+
+// startPCForTrace returns the start PC of a goroutine for tracing purposes.
+// If pc is a wrapper, it returns the PC of the wrapped function. Otherwise it
+// returns pc.
+func startPCForTrace(pc uintptr) uintptr {
+       f := findfunc(pc)
+       if !f.valid() {
+               return pc // may happen for locked g in extra M since its pc is 0.
+       }
+       w := funcdata(f, abi.FUNCDATA_WrapInfo)
+       if w == nil {
+               return pc // not a wrapper
+       }
+       return f.datap.textAddr(*(*uint32)(w))
+}
diff --git a/src/runtime/trace2status.go b/src/runtime/trace2status.go
new file mode 100644 (file)
index 0000000..0f64452
--- /dev/null
@@ -0,0 +1,204 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace goroutine and P status management.
+
+package runtime
+
+import "runtime/internal/atomic"
+
+// traceGoStatus is the status of a goroutine.
+//
+// They correspond directly to the various goroutine
+// statuses.
+type traceGoStatus uint8
+
+const (
+       traceGoBad traceGoStatus = iota
+       traceGoRunnable
+       traceGoRunning
+       traceGoSyscall
+       traceGoWaiting
+)
+
+// traceProcStatus is the status of a P.
+//
+// They mostly correspond to the various P statuses.
+type traceProcStatus uint8
+
+const (
+       traceProcBad traceProcStatus = iota
+       traceProcRunning
+       traceProcIdle
+       traceProcSyscall
+
+       // traceProcSyscallAbandoned is a special case of
+       // traceProcSyscall. It's used in the very specific case
+       // where the first a P is mentioned in a generation is
+       // part of a ProcSteal event. If that's the first time
+       // it's mentioned, then there's no GoSyscallBegin to
+       // connect the P stealing back to at that point. This
+       // special state indicates this to the parser, so it
+       // doesn't try to find a GoSyscallEndBlocked that
+       // corresponds with the ProcSteal.
+       traceProcSyscallAbandoned
+)
+
+// writeGoStatus emits a GoStatus event as well as any active ranges on the goroutine.
+func (w traceWriter) writeGoStatus(goid uint64, mid int64, status traceGoStatus, markAssist bool) traceWriter {
+       // The status should never be bad. Some invariant must have been violated.
+       if status == traceGoBad {
+               print("runtime: goid=", goid, "\n")
+               throw("attempted to trace a bad status for a goroutine")
+       }
+
+       // Trace the status.
+       w = w.event(traceEvGoStatus, traceArg(goid), traceArg(uint64(mid)), traceArg(status))
+
+       // Trace any special ranges that are in-progress.
+       if markAssist {
+               w = w.event(traceEvGCMarkAssistActive, traceArg(goid))
+       }
+       return w
+}
+
+// writeProcStatusForP emits a ProcStatus event for the provided p based on its status.
+//
+// The caller must fully own pp and it must be prevented from transitioning (e.g. this can be
+// called by a forEachP callback or from a STW).
+func (w traceWriter) writeProcStatusForP(pp *p, inSTW bool) traceWriter {
+       if !pp.trace.acquireStatus(w.gen) {
+               return w
+       }
+       var status traceProcStatus
+       switch pp.status {
+       case _Pidle, _Pgcstop:
+               status = traceProcIdle
+               if pp.status == _Pgcstop && inSTW {
+                       // N.B. a P that is running and currently has the world stopped will be
+                       // in _Pgcstop, but we model it as running in the tracer.
+                       status = traceProcRunning
+               }
+       case _Prunning, _Psyscall:
+               status = traceProcRunning
+       default:
+               throw("attempt to trace invalid or unsupported P status")
+       }
+       w = w.writeProcStatus(uint64(pp.id), status, pp.trace.inSweep)
+       return w
+}
+
+// writeProcStatus emits a ProcStatus event with all the provided information.
+//
+// The caller must have taken ownership of a P's status writing, and the P must be
+// prevented from transitioning.
+func (w traceWriter) writeProcStatus(pid uint64, status traceProcStatus, inSweep bool) traceWriter {
+       // The status should never be bad. Some invariant must have been violated.
+       if status == traceProcBad {
+               print("runtime: pid=", pid, "\n")
+               throw("attempted to trace a bad status for a proc")
+       }
+
+       // Trace the status.
+       w = w.event(traceEvProcStatus, traceArg(pid), traceArg(status))
+
+       // Trace any special ranges that are in-progress.
+       if inSweep {
+               w = w.event(traceEvGCSweepActive, traceArg(pid))
+       }
+       return w
+}
+
+// goStatusToTraceGoStatus translates the internal status to tracGoStatus.
+//
+// status must not be _Gdead or any status whose name has the suffix "_unused."
+func goStatusToTraceGoStatus(status uint32, wr waitReason) traceGoStatus {
+       // N.B. Ignore the _Gscan bit. We don't model it in the tracer.
+       var tgs traceGoStatus
+       switch status &^ _Gscan {
+       case _Grunnable:
+               tgs = traceGoRunnable
+       case _Grunning, _Gcopystack:
+               tgs = traceGoRunning
+       case _Gsyscall:
+               tgs = traceGoSyscall
+       case _Gwaiting, _Gpreempted:
+               // There are a number of cases where a G might end up in
+               // _Gwaiting but it's actually running in a non-preemptive
+               // state but needs to present itself as preempted to the
+               // garbage collector. In these cases, we're not going to
+               // emit an event, and we want these goroutines to appear in
+               // the final trace as if they're running, not blocked.
+               tgs = traceGoWaiting
+               if status == _Gwaiting &&
+                       wr == waitReasonStoppingTheWorld ||
+                       wr == waitReasonGCMarkTermination ||
+                       wr == waitReasonGarbageCollection ||
+                       wr == waitReasonTraceProcStatus ||
+                       wr == waitReasonGCWorkerActive {
+                       tgs = traceGoRunning
+               }
+       case _Gdead:
+               throw("tried to trace dead goroutine")
+       default:
+               throw("tried to trace goroutine with invalid or unsupported status")
+       }
+       return tgs
+}
+
+// traceSchedResourceState is shared state for scheduling resources (i.e. fields common to
+// both Gs and Ps).
+type traceSchedResourceState struct {
+       // statusTraced indicates whether a status event was traced for this resource
+       // a particular generation.
+       //
+       // There are 3 of these because when transitioning across generations, traceAdvance
+       // needs to be able to reliably observe whether a status was traced for the previous
+       // generation, while we need to clear the value for the next generation.
+       statusTraced [3]atomic.Uint32
+
+       // seq is the sequence counter for this scheduling resource's events.
+       // The purpose of the sequence counter is to establish a partial order between
+       // events that don't obviously happen serially (same M) in the stream ofevents.
+       //
+       // There are two of these so that we can reset the counter on each generation.
+       // This saves space in the resulting trace by keeping the counter small and allows
+       // GoStatus and GoCreate events to omit a sequence number (implicitly 0).
+       seq [2]uint64
+}
+
+// acquireStatus acquires the right to emit a Status event for the scheduling resource.
+func (r *traceSchedResourceState) acquireStatus(gen uintptr) bool {
+       if !r.statusTraced[gen%3].CompareAndSwap(0, 1) {
+               return false
+       }
+       r.readyNextGen(gen)
+       return true
+}
+
+// readyNextGen readies r for the generation following gen.
+func (r *traceSchedResourceState) readyNextGen(gen uintptr) {
+       nextGen := traceNextGen(gen)
+       r.seq[nextGen%2] = 0
+       r.statusTraced[nextGen%3].Store(0)
+}
+
+// statusWasTraced returns true if the sched resource's status was already acquired for tracing.
+func (r *traceSchedResourceState) statusWasTraced(gen uintptr) bool {
+       return r.statusTraced[gen%3].Load() != 0
+}
+
+// setStatusTraced indicates that the resource's status was already traced, for example
+// when a goroutine is created.
+func (r *traceSchedResourceState) setStatusTraced(gen uintptr) {
+       r.statusTraced[gen%3].Store(1)
+}
+
+// nextSeq returns the next sequence number for the resource.
+func (r *traceSchedResourceState) nextSeq(gen uintptr) traceArg {
+       r.seq[gen%2]++
+       return traceArg(r.seq[gen%2])
+}
diff --git a/src/runtime/trace2string.go b/src/runtime/trace2string.go
new file mode 100644 (file)
index 0000000..cbb0ecf
--- /dev/null
@@ -0,0 +1,104 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace string management.
+
+package runtime
+
+// Trace strings.
+
+const maxTraceStringLen = 1024
+
+// traceStringTable is map of string -> unique ID that also manages
+// writing strings out into the trace.
+type traceStringTable struct {
+       // lock protects buf.
+       lock mutex
+       buf  *traceBuf // string batches to write out to the trace.
+
+       // tab is a mapping of string -> unique ID.
+       tab traceMap
+}
+
+// put adds a string to the table, emits it, and returns a unique ID for it.
+func (t *traceStringTable) put(gen uintptr, s string) uint64 {
+       // Put the string in the table.
+       ss := stringStructOf(&s)
+       id, added := t.tab.put(ss.str, uintptr(ss.len))
+       if added {
+               // Write the string to the buffer.
+               systemstack(func() {
+                       t.writeString(gen, id, s)
+               })
+       }
+       return id
+}
+
+// emit emits a string and creates an ID for it, but doesn't add it to the table. Returns the ID.
+func (t *traceStringTable) emit(gen uintptr, s string) uint64 {
+       // Grab an ID and write the string to the buffer.
+       id := t.tab.stealID()
+       systemstack(func() {
+               t.writeString(gen, id, s)
+       })
+       return id
+}
+
+// writeString writes the string to t.buf.
+//
+// Must run on the systemstack because it may flush buffers and thus could acquire trace.lock.
+//
+//go:systemstack
+func (t *traceStringTable) writeString(gen uintptr, id uint64, s string) {
+       // Truncate the string if necessary.
+       if len(s) > maxTraceStringLen {
+               s = s[:maxTraceStringLen]
+       }
+
+       lock(&t.lock)
+       w := unsafeTraceWriter(gen, t.buf)
+
+       // Ensure we have a place to write to.
+       var flushed bool
+       w, flushed = w.ensure(2 + 2*traceBytesPerNumber + len(s) /* traceEvStrings + traceEvString + ID + len + string data */)
+       if flushed {
+               // Annotate the batch as containing strings.
+               w.byte(byte(traceEvStrings))
+       }
+
+       // Write out the string.
+       w.byte(byte(traceEvString))
+       w.varint(id)
+       w.varint(uint64(len(s)))
+       w.stringData(s)
+
+       // Store back buf if it was updated during ensure.
+       t.buf = w.traceBuf
+       unlock(&t.lock)
+}
+
+// reset clears the string table and flushes any buffers it has.
+//
+// Must be called only once the caller is certain nothing else will be
+// added to this table.
+//
+// Because it flushes buffers, this may acquire trace.lock and thus
+// must run on the systemstack.
+//
+//go:systemstack
+func (t *traceStringTable) reset(gen uintptr) {
+       if t.buf != nil {
+               lock(&trace.lock)
+               traceBufFlush(t.buf, gen)
+               unlock(&trace.lock)
+               t.buf = nil
+       }
+
+       // Reset the table.
+       lock(&t.tab.lock)
+       t.tab.reset()
+       unlock(&t.tab.lock)
+}
diff --git a/src/runtime/trace2time.go b/src/runtime/trace2time.go
new file mode 100644 (file)
index 0000000..8a4499e
--- /dev/null
@@ -0,0 +1,90 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.exectracer2
+
+// Trace time and clock.
+
+package runtime
+
+import "internal/goarch"
+
+// Timestamps in trace are produced through either nanotime or cputicks
+// and divided by traceTimeDiv. nanotime is used everywhere except on
+// platforms where osHasLowResClock is true, because the system clock
+// isn't granular enough to get useful information out of a trace in
+// many cases.
+//
+// This makes absolute values of timestamp diffs smaller, and so they are
+// encoded in fewer bytes.
+//
+// The target resolution in all cases is 64 nanoseconds.
+// This is based on the fact that fundamentally the execution tracer won't emit
+// events more frequently than roughly every 200 ns or so, because that's roughly
+// how long it takes to call through the scheduler.
+// We could be more aggressive and bump this up to 128 ns while still getting
+// useful data, but the extra bit doesn't save us that much and the headroom is
+// nice to have.
+//
+// Hitting this target resolution is easy in the nanotime case: just pick a
+// division of 64. In the cputicks case it's a bit more complex.
+//
+// For x86, on a 3 GHz machine, we'd want to divide by 3*64 to hit our target.
+// To keep the division operation efficient, we round that up to 4*64, or 256.
+// Given what cputicks represents, we use this on all other platforms except
+// for PowerPC.
+// The suggested increment frequency for PowerPC's time base register is
+// 512 MHz according to Power ISA v2.07 section 6.2, so we use 32 on ppc64
+// and ppc64le.
+const traceTimeDiv = (1-osHasLowResClockInt)*64 + osHasLowResClockInt*(256-224*(goarch.IsPpc64|goarch.IsPpc64le))
+
+// traceTime represents a timestamp for the trace.
+type traceTime uint64
+
+// traceClockNow returns a monotonic timestamp. The clock this function gets
+// the timestamp from is specific to tracing, and shouldn't be mixed with other
+// clock sources.
+//
+// nosplit because it's called from exitsyscall, which is nosplit.
+//
+//go:nosplit
+func traceClockNow() traceTime {
+       if osHasLowResClock {
+               return traceTime(cputicks() / traceTimeDiv)
+       }
+       return traceTime(nanotime() / traceTimeDiv)
+}
+
+// traceClockUnitsPerSecond estimates the number of trace clock units per
+// second that elapse.
+func traceClockUnitsPerSecond() uint64 {
+       if osHasLowResClock {
+               // We're using cputicks as our clock, so we need a real estimate.
+               return uint64(ticksPerSecond())
+       }
+       // Our clock is nanotime, so it's just the constant time division.
+       // (trace clock units / nanoseconds) * (1e9 nanoseconds / 1 second)
+       return uint64(1.0 / float64(traceTimeDiv) * 1e9)
+}
+
+// traceFrequency writes a batch with a single EvFrequency event.
+//
+// freq is the number of trace clock units per second.
+func traceFrequency(gen uintptr) {
+       w := unsafeTraceWriter(gen, nil)
+
+       // Ensure we have a place to write to.
+       w, _ = w.ensure(1 + traceBytesPerNumber /* traceEvFrequency + frequency */)
+
+       // Write out the string.
+       w.byte(byte(traceEvFrequency))
+       w.varint(traceClockUnitsPerSecond())
+
+       // Immediately flush the buffer.
+       systemstack(func() {
+               lock(&trace.lock)
+               traceBufFlush(w.traceBuf, gen)
+               unlock(&trace.lock)
+       })
+}
index 3f207aabc8e363a8e5f38768f31c807572a77335..d6357b12bb34076e2c1f8b2349011478dd92b960 100644 (file)
@@ -9,8 +9,10 @@ package runtime_test
 import (
        "bytes"
        "fmt"
+       "internal/goexperiment"
        "internal/testenv"
        "internal/trace"
+       tracev2 "internal/trace/v2"
        "io"
        "os"
        "runtime"
@@ -33,9 +35,17 @@ func TestTraceUnwindCGO(t *testing.T) {
                t.Fatal(err)
        }
 
-       logs := map[string]*trace.Event{
-               "goCalledFromC":       nil,
-               "goCalledFromCThread": nil,
+       wantLogs := []string{
+               "goCalledFromC",
+               "goCalledFromCThread",
+       }
+       logs := make(map[string]*trace.Event)
+       for _, category := range wantLogs {
+               logs[category] = nil
+       }
+       logsV2 := make(map[string]*tracev2.Event)
+       for _, category := range wantLogs {
+               logsV2[category] = nil
        }
        for _, tracefpunwindoff := range []int{1, 0} {
                env := fmt.Sprintf("GODEBUG=tracefpunwindoff=%d", tracefpunwindoff)
@@ -50,14 +60,25 @@ func TestTraceUnwindCGO(t *testing.T) {
                if err != nil {
                        t.Fatalf("failed to read trace: %s", err)
                }
-               events := parseTrace(t, bytes.NewReader(traceData))
+               if goexperiment.ExecTracer2 {
+                       for category := range logs {
+                               event := mustFindLogV2(t, bytes.NewReader(traceData), category)
+                               if wantEvent := logsV2[category]; wantEvent == nil {
+                                       logsV2[category] = &event
+                               } else if got, want := dumpStackV2(&event), dumpStackV2(wantEvent); got != want {
+                                       t.Errorf("%q: got stack:\n%s\nwant stack:\n%s\n", category, got, want)
+                               }
+                       }
+               } else {
+                       events := parseTrace(t, bytes.NewReader(traceData))
 
-               for category := range logs {
-                       event := mustFindLog(t, events, category)
-                       if wantEvent := logs[category]; wantEvent == nil {
-                               logs[category] = event
-                       } else if got, want := dumpStack(event), dumpStack(wantEvent); got != want {
-                               t.Errorf("%q: got stack:\n%s\nwant stack:\n%s\n", category, got, want)
+                       for category := range logs {
+                               event := mustFindLog(t, events, category)
+                               if wantEvent := logs[category]; wantEvent == nil {
+                                       logs[category] = event
+                               } else if got, want := dumpStack(event), dumpStack(wantEvent); got != want {
+                                       t.Errorf("%q: got stack:\n%s\nwant stack:\n%s\n", category, got, want)
+                               }
                        }
                }
        }
@@ -103,3 +124,40 @@ func parseTrace(t *testing.T, r io.Reader) []*trace.Event {
        }
        return res.Events
 }
+
+func mustFindLogV2(t *testing.T, trace io.Reader, category string) tracev2.Event {
+       r, err := tracev2.NewReader(trace)
+       if err != nil {
+               t.Fatalf("bad trace: %v", err)
+       }
+       var candidates []tracev2.Event
+       for {
+               ev, err := r.ReadEvent()
+               if err == io.EOF {
+                       break
+               }
+               if err != nil {
+                       t.Fatalf("failed to parse trace: %v", err)
+               }
+               if ev.Kind() == tracev2.EventLog && ev.Log().Category == category {
+                       candidates = append(candidates, ev)
+               }
+       }
+       if len(candidates) == 0 {
+               t.Fatalf("could not find log with category: %q", category)
+       } else if len(candidates) > 1 {
+               t.Fatalf("found more than one log with category: %q", category)
+       }
+       return candidates[0]
+}
+
+// dumpStack returns e.Stack() as a string.
+func dumpStackV2(e *tracev2.Event) string {
+       var buf bytes.Buffer
+       e.Stack().Frames(func(f tracev2.StackFrame) bool {
+               file := strings.TrimPrefix(f.File, runtime.GOROOT())
+               fmt.Fprintf(&buf, "%s\n\t%s:%d\n", f.Func, file, f.Line)
+               return true
+       })
+       return buf.String()
+}