runtime: add execution tracer v2 behind GOEXPERIMENT=exectracer2

[gostls13.git] / src / runtime / trace / trace_test.go
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go

index 76d3d44ae70d1074d23659924e370d44855631c0..23a8d11c6f26237b7140a08d8703b024b0d00594 100644 (file)
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -1,4 +1,4 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
+// Copyright 2014 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
@@ -6,38 +6,95 @@ package trace_test
  
  import (
         "bytes"
+       "context"
+       "flag"
+       "fmt"
+       "internal/goexperiment"
+       "internal/profile"
+       "internal/race"
         "internal/trace"
         "io"
         "net"
         "os"
         "runtime"
+       "runtime/pprof"
         . "runtime/trace"
+       "strconv"
+       "strings"
         "sync"
         "testing"
         "time"
  )
  
-func skipTraceTestsIfNeeded(t *testing.T) {
-       switch runtime.GOOS {
-       case "solaris":
-               t.Skip("skipping: solaris timer can go backwards (https://golang.org/issue/8976)")
-       case "darwin":
-               switch runtime.GOARCH {
-               case "arm", "arm64":
-                       // TODO(rsc): What does this have to do with the trace tests?
-                       // There is no forking here.
-                       t.Skipf("skipping on %s/%s, cannot fork", runtime.GOOS, runtime.GOARCH)
-               }
+var (
+       saveTraces = flag.Bool("savetraces", false, "save traces collected by tests")
+)
+
+// TestEventBatch tests Flush calls that happen during Start
+// don't produce corrupted traces.
+func TestEventBatch(t *testing.T) {
+       if race.Enabled {
+               t.Skip("skipping in race mode")
         }
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if testing.Short() {
+               t.Skip("skipping in short mode")
+       }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
+       // During Start, bunch of records are written to reflect the current
+       // snapshot of the program, including state of each goroutines.
+       // And some string constants are written to the trace to aid trace
+       // parsing. This test checks Flush of the buffer occurred during
+       // this process doesn't cause corrupted traces.
+       // When a Flush is called during Start is complicated
+       // so we test with a range of number of goroutines hoping that one
+       // of them triggers Flush.
+       // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate
+       // and traceEvGoWaiting events (12~13bytes per goroutine).
+       for g := 4950; g < 5050; g++ {
+               n := g
+               t.Run("G="+strconv.Itoa(n), func(t *testing.T) {
+                       var wg sync.WaitGroup
+                       wg.Add(n)
+
+                       in := make(chan bool, 1000)
+                       for i := 0; i < n; i++ {
+                               go func() {
+                                       <-in
+                                       wg.Done()
+                               }()
+                       }
+                       buf := new(bytes.Buffer)
+                       if err := Start(buf); err != nil {
+                               t.Fatalf("failed to start tracing: %v", err)
+                       }
  
-       switch runtime.GOARCH {
-       case "arm":
-               t.Skip("skipping: arm tests fail with 'failed to parse trace' (https://golang.org/issue/9725)")
+                       for i := 0; i < n; i++ {
+                               in <- true
+                       }
+                       wg.Wait()
+                       Stop()
+
+                       _, err := trace.Parse(buf, "")
+                       if err == trace.ErrTimeOrder {
+                               t.Skipf("skipping trace: %v", err)
+                       }
+
+                       if err != nil {
+                               t.Fatalf("failed to parse trace: %v", err)
+                       }
+               })
         }
  }
  
  func TestTraceStartStop(t *testing.T) {
-       skipTraceTestsIfNeeded(t)
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
         buf := new(bytes.Buffer)
         if err := Start(buf); err != nil {
                 t.Fatalf("failed to start tracing: %v", err)
@@ -51,10 +108,13 @@ func TestTraceStartStop(t *testing.T) {
         if size != buf.Len() {
                 t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len())
         }
+       saveTrace(t, buf, "TestTraceStartStop")
  }
  
  func TestTraceDoubleStart(t *testing.T) {
-       skipTraceTestsIfNeeded(t)
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
         Stop()
         buf := new(bytes.Buffer)
         if err := Start(buf); err != nil {
@@ -68,13 +128,20 @@ func TestTraceDoubleStart(t *testing.T) {
  }
  
  func TestTrace(t *testing.T) {
-       skipTraceTestsIfNeeded(t)
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
         buf := new(bytes.Buffer)
         if err := Start(buf); err != nil {
                 t.Fatalf("failed to start tracing: %v", err)
         }
         Stop()
-       _, err := trace.Parse(buf)
+       saveTrace(t, buf, "TestTrace")
+       _, err := trace.Parse(buf, "")
         if err == trace.ErrTimeOrder {
                 t.Skipf("skipping trace: %v", err)
         }
@@ -83,25 +150,62 @@ func TestTrace(t *testing.T) {
         }
  }
  
-func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
-       events, err := trace.Parse(r)
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) {
+       res, err := trace.Parse(r, "")
         if err == trace.ErrTimeOrder {
                 t.Skipf("skipping trace: %v", err)
         }
         if err != nil {
-               return nil, nil, err
+               t.Fatalf("failed to parse trace: %v", err)
         }
-       gs := trace.GoroutineStats(events)
+       gs := trace.GoroutineStats(res.Events)
         for goid := range gs {
                 // We don't do any particular checks on the result at the moment.
                 // But still check that RelatedGoroutines does not crash, hang, etc.
-               _ = trace.RelatedGoroutines(events, goid)
+               _ = trace.RelatedGoroutines(res.Events, goid)
+       }
+       return res.Events, gs
+}
+
+func testBrokenTimestamps(t *testing.T, data []byte) {
+       // On some processors cputicks (used to generate trace timestamps)
+       // produce non-monotonic timestamps. It is important that the parser
+       // distinguishes logically inconsistent traces (e.g. missing, excessive
+       // or misordered events) from broken timestamps. The former is a bug
+       // in tracer, the latter is a machine issue.
+       // So now that we have a consistent trace, test that (1) parser does
+       // not return a logical error in case of broken timestamps
+       // and (2) broken timestamps are eventually detected and reported.
+       trace.BreakTimestampsForTesting = true
+       defer func() {
+               trace.BreakTimestampsForTesting = false
+       }()
+       for i := 0; i < 1e4; i++ {
+               _, err := trace.Parse(bytes.NewReader(data), "")
+               if err == trace.ErrTimeOrder {
+                       return
+               }
+               if err != nil {
+                       t.Fatalf("failed to parse trace: %v", err)
+               }
         }
-       return events, gs, nil
  }
  
  func TestTraceStress(t *testing.T) {
-       skipTraceTestsIfNeeded(t)
+       switch runtime.GOOS {
+       case "js", "wasip1":
+               t.Skip("no os.Pipe on " + runtime.GOOS)
+       }
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if testing.Short() {
+               t.Skip("skipping in -short mode")
+       }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
  
         var wg sync.WaitGroup
         done := make(chan bool)
@@ -153,7 +257,13 @@ func TestTraceStress(t *testing.T) {
  
         runtime.GC()
         // Trigger GC from malloc.
-       for i := 0; i < 1e3; i++ {
+       n := int(1e3)
+       if isMemoryConstrained() {
+               // Reduce allocation to avoid running out of
+               // memory on the builder - see issue/12032.
+               n = 512
+       }
+       for i := 0; i < n; i++ {
                 _ = make([]byte, 1<<20)
         }
  
@@ -227,17 +337,41 @@ func TestTraceStress(t *testing.T) {
         runtime.GOMAXPROCS(procs)
  
         Stop()
-       _, _, err = parseTrace(t, buf)
-       if err != nil {
-               t.Fatalf("failed to parse trace: %v", err)
+       saveTrace(t, buf, "TestTraceStress")
+       trace := buf.Bytes()
+       parseTrace(t, buf)
+       testBrokenTimestamps(t, trace)
+}
+
+// isMemoryConstrained reports whether the current machine is likely
+// to be memory constrained.
+// This was originally for the openbsd/arm builder (Issue 12032).
+// TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME?
+func isMemoryConstrained() bool {
+       if runtime.GOOS == "plan9" {
+               return true
+       }
+       switch runtime.GOARCH {
+       case "arm", "mips", "mipsle":
+               return true
         }
+       return false
  }
  
  // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine.
  // And concurrently with all that start/stop trace 3 times.
  func TestTraceStressStartStop(t *testing.T) {
-       skipTraceTestsIfNeeded(t)
-
+       switch runtime.GOOS {
+       case "js", "wasip1":
+               t.Skip("no os.Pipe on " + runtime.GOOS)
+       }
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
         defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
         outerDone := make(chan bool)
  
@@ -257,7 +391,8 @@ func TestTraceStressStartStop(t *testing.T) {
  
                 rp, wp, err := os.Pipe()
                 if err != nil {
-                       t.Fatalf("failed to create pipe: %v", err)
+                       t.Errorf("failed to create pipe: %v", err)
+                       return
                 }
                 defer func() {
                         rp.Close()
@@ -286,7 +421,13 @@ func TestTraceStressStartStop(t *testing.T) {
  
                 runtime.GC()
                 // Trigger GC from malloc.
-               for i := 0; i < 1e3; i++ {
+               n := int(1e3)
+               if isMemoryConstrained() {
+                       // Reduce allocation to avoid running out of
+                       // memory on the builder.
+                       n = 512
+               }
+               for i := 0; i < n; i++ {
                         _ = make([]byte, 1<<20)
                 }
  
@@ -327,7 +468,8 @@ func TestTraceStressStartStop(t *testing.T) {
                 // A bit of network.
                 ln, err := net.Listen("tcp", "127.0.0.1:0")
                 if err != nil {
-                       t.Fatalf("listen failed: %v", err)
+                       t.Errorf("listen failed: %v", err)
+                       return
                 }
                 defer ln.Close()
                 go func() {
@@ -342,7 +484,8 @@ func TestTraceStressStartStop(t *testing.T) {
                 }()
                 c, err := net.Dial("tcp", ln.Addr().String())
                 if err != nil {
-                       t.Fatalf("dial failed: %v", err)
+                       t.Errorf("dial failed: %v", err)
+                       return
                 }
                 var tmp [1]byte
                 c.Read(tmp[:])
@@ -367,21 +510,21 @@ func TestTraceStressStartStop(t *testing.T) {
                 }
                 time.Sleep(time.Millisecond)
                 Stop()
-               if _, _, err := parseTrace(t, buf); err != nil {
-                       t.Fatalf("failed to parse trace: %v", err)
-               }
+               saveTrace(t, buf, "TestTraceStressStartStop")
+               trace := buf.Bytes()
+               parseTrace(t, buf)
+               testBrokenTimestamps(t, trace)
         }
         <-outerDone
  }
  
  func TestTraceFutileWakeup(t *testing.T) {
-       // The test generates a full-load of futile wakeups on channels,
-       // and ensures that the trace is consistent after their removal.
-       skipTraceTestsIfNeeded(t)
-       if runtime.GOOS == "linux" && runtime.GOARCH == "ppc64le" {
-               t.Skip("test is unreliable; issue #10512")
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if goexperiment.ExecTracer2 {
+               t.Skip("skipping because this test is incompatible with the new tracer")
         }
-
         buf := new(bytes.Buffer)
         if err := Start(buf); err != nil {
                 t.Fatalf("failed to start tracing: %v", err)
@@ -434,10 +577,8 @@ func TestTraceFutileWakeup(t *testing.T) {
         done.Wait()
  
         Stop()
-       events, _, err := parseTrace(t, buf)
-       if err != nil {
-               t.Fatalf("failed to parse trace: %v", err)
-       }
+       saveTrace(t, buf, "TestTraceFutileWakeup")
+       events, _ := parseTrace(t, buf)
         // Check that (1) trace does not contain EvFutileWakeup events and
         // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events
         // (we call runtime.Gosched between all operations, so these would be futile wakeups).
@@ -465,3 +606,212 @@ func TestTraceFutileWakeup(t *testing.T) {
                 }
         }
  }
+
+func TestTraceCPUProfile(t *testing.T) {
+       if IsEnabled() {
+               t.Skip("skipping because -test.trace is set")
+       }
+       if goexperiment.ExecTracer2 {
+               // An equivalent test exists in internal/trace/v2.
+               t.Skip("skipping because this test is incompatible with the new tracer")
+       }
+
+       cpuBuf := new(bytes.Buffer)
+       if err := pprof.StartCPUProfile(cpuBuf); err != nil {
+               t.Skipf("failed to start CPU profile: %v", err)
+       }
+
+       buf := new(bytes.Buffer)
+       if err := Start(buf); err != nil {
+               t.Fatalf("failed to start tracing: %v", err)
+       }
+
+       dur := 100 * time.Millisecond
+       func() {
+               // Create a region in the execution trace. Set and clear goroutine
+               // labels fully within that region, so we know that any CPU profile
+               // sample with the label must also be eligible for inclusion in the
+               // execution trace.
+               ctx := context.Background()
+               defer StartRegion(ctx, "cpuHogger").End()
+               pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) {
+                       cpuHogger(cpuHog1, &salt1, dur)
+               })
+               // Be sure the execution trace's view, when filtered to this goroutine
+               // via the explicit goroutine ID in each event, gets many more samples
+               // than the CPU profiler when filtered to this goroutine via labels.
+               cpuHogger(cpuHog1, &salt1, dur)
+       }()
+
+       Stop()
+       pprof.StopCPUProfile()
+       saveTrace(t, buf, "TestTraceCPUProfile")
+
+       prof, err := profile.Parse(cpuBuf)
+       if err != nil {
+               t.Fatalf("failed to parse CPU profile: %v", err)
+       }
+       // Examine the CPU profiler's view. Filter it to only include samples from
+       // the single test goroutine. Use labels to execute that filter: they should
+       // apply to all work done while that goroutine is getg().m.curg, and they
+       // should apply to no other goroutines.
+       pprofSamples := 0
+       pprofStacks := make(map[string]int)
+       for _, s := range prof.Sample {
+               if s.Label["tracing"] != nil {
+                       var fns []string
+                       var leaf string
+                       for _, loc := range s.Location {
+                               for _, line := range loc.Line {
+                                       fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line))
+                                       leaf = line.Function.Name
+                               }
+                       }
+                       // runtime.sigprof synthesizes call stacks when "normal traceback is
+                       // impossible or has failed", using particular placeholder functions
+                       // to represent common failure cases. Look for those functions in
+                       // the leaf position as a sign that the call stack and its
+                       // symbolization are more complex than this test can handle.
+                       //
+                       // TODO: Make the symbolization done by the execution tracer and CPU
+                       // profiler match up even in these harder cases. See #53378.
+                       switch leaf {
+                       case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO":
+                               continue
+                       }
+                       stack := strings.Join(fns, " ")
+                       samples := int(s.Value[0])
+                       pprofSamples += samples
+                       pprofStacks[stack] += samples
+               }
+       }
+       if pprofSamples == 0 {
+               t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof)
+       }
+
+       // Examine the execution tracer's view of the CPU profile samples. Filter it
+       // to only include samples from the single test goroutine. Use the goroutine
+       // ID that was recorded in the events: that should reflect getg().m.curg,
+       // same as the profiler's labels (even when the M is using its g0 stack).
+       totalTraceSamples := 0
+       traceSamples := 0
+       traceStacks := make(map[string]int)
+       events, _ := parseTrace(t, buf)
+       var hogRegion *trace.Event
+       for _, ev := range events {
+               if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" {
+                       // mode "0" indicates region start
+                       hogRegion = ev
+               }
+       }
+       if hogRegion == nil {
+               t.Fatalf("execution trace did not identify cpuHogger goroutine")
+       } else if hogRegion.Link == nil {
+               t.Fatalf("execution trace did not close cpuHogger region")
+       }
+       for _, ev := range events {
+               if ev.Type == trace.EvCPUSample {
+                       totalTraceSamples++
+                       if ev.G == hogRegion.G {
+                               traceSamples++
+                               var fns []string
+                               for _, frame := range ev.Stk {
+                                       if frame.Fn != "runtime.goexit" {
+                                               fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line))
+                                       }
+                               }
+                               stack := strings.Join(fns, " ")
+                               traceStacks[stack]++
+                       }
+               }
+       }
+
+       // The execution trace may drop CPU profile samples if the profiling buffer
+       // overflows. Based on the size of profBufWordCount, that takes a bit over
+       // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
+       // hit that case, then we definitely have at least one full buffer's worth
+       // of CPU samples, so we'll call that success.
+       overflowed := totalTraceSamples >= 1900
+       if traceSamples < pprofSamples {
+               t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
+               if !overflowed {
+                       t.Fail()
+               }
+       }
+
+       for stack, traceSamples := range traceStacks {
+               pprofSamples := pprofStacks[stack]
+               delete(pprofStacks, stack)
+               if traceSamples < pprofSamples {
+                       t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
+                               stack, pprofSamples, traceSamples)
+                       if !overflowed {
+                               t.Fail()
+                       }
+               }
+       }
+       for stack, pprofSamples := range pprofStacks {
+               t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
+               if !overflowed {
+                       t.Fail()
+               }
+       }
+
+       if t.Failed() {
+               t.Logf("execution trace CPU samples:")
+               for stack, samples := range traceStacks {
+                       t.Logf("%d: %q", samples, stack)
+               }
+               t.Logf("CPU profile:\n%v", prof)
+       }
+}
+
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
+       // We only need to get one 100 Hz clock tick, so we've got
+       // a large safety buffer.
+       // But do at least 500 iterations (which should take about 100ms),
+       // otherwise TestCPUProfileMultithreaded can fail if only one
+       // thread is scheduled during the testing period.
+       t0 := time.Now()
+       accum := *y
+       for i := 0; i < 500 || time.Since(t0) < dur; i++ {
+               accum = f(accum)
+       }
+       *y = accum
+}
+
+var (
+       salt1 = 0
+)
+
+// The actual CPU hogging function.
+// Must not call other functions nor access heap/globals in the loop,
+// otherwise under race detector the samples will be in the race runtime.
+func cpuHog1(x int) int {
+       return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
+       foo := x
+       for i := 0; i < n; i++ {
+               if i%1000 == 0 {
+                       // Spend time in mcall, stored as gp.m.curg, with g0 running
+                       runtime.Gosched()
+               }
+               if foo > 0 {
+                       foo *= foo
+               } else {
+                       foo *= foo + 1
+               }
+       }
+       return foo
+}
+
+func saveTrace(t *testing.T, buf *bytes.Buffer, name string) {
+       if !*saveTraces {
+               return
+       }
+       if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil {
+               t.Errorf("failed to write trace file: %s", err)
+       }
+}