src/internal/trace/v2/trace_test.go

   1 // Copyright 2023 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package trace_test
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "fmt"
  11         "internal/testenv"
  12         "internal/trace/v2"
  13         "internal/trace/v2/testtrace"
  14         "io"
  15         "os"
  16         "path/filepath"
  17         "runtime"
  18         "strings"
  19         "testing"
  20 )
  21
  22 func TestTraceAnnotations(t *testing.T) {
  23         testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) {
  24                 type evDesc struct {
  25                         kind trace.EventKind
  26                         task trace.TaskID
  27                         args []string
  28                 }
  29                 want := []evDesc{
  30                         {trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}},
  31                         {trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}},
  32                         {trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}},
  33                         {trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}},
  34                         {trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}},
  35                         {trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}},
  36                         {trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}},
  37                         //  Currently, pre-existing region is not recorded to avoid allocations.
  38                         {trace.EventRegionBegin, trace.NoTask, []string{"post-existing region"}},
  39                 }
  40                 r, err := trace.NewReader(bytes.NewReader(tb))
  41                 if err != nil {
  42                         t.Error(err)
  43                 }
  44                 for {
  45                         ev, err := r.ReadEvent()
  46                         if err == io.EOF {
  47                                 break
  48                         }
  49                         if err != nil {
  50                                 t.Fatal(err)
  51                         }
  52                         for i, wantEv := range want {
  53                                 if wantEv.kind != ev.Kind() {
  54                                         continue
  55                                 }
  56                                 match := false
  57                                 switch ev.Kind() {
  58                                 case trace.EventTaskBegin, trace.EventTaskEnd:
  59                                         task := ev.Task()
  60                                         match = task.ID == wantEv.task && task.Type == wantEv.args[0]
  61                                 case trace.EventRegionBegin, trace.EventRegionEnd:
  62                                         reg := ev.Region()
  63                                         match = reg.Task == wantEv.task && reg.Type == wantEv.args[0]
  64                                 case trace.EventLog:
  65                                         log := ev.Log()
  66                                         match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1]
  67                                 }
  68                                 if match {
  69                                         want[i] = want[len(want)-1]
  70                                         want = want[:len(want)-1]
  71                                         break
  72                                 }
  73                         }
  74                 }
  75                 if len(want) != 0 {
  76                         for _, ev := range want {
  77                                 t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args)
  78                         }
  79                 }
  80         })
  81 }
  82
  83 func TestTraceAnnotationsStress(t *testing.T) {
  84         testTraceProg(t, "annotations-stress.go", nil)
  85 }
  86
  87 func TestTraceCgoCallback(t *testing.T) {
  88         switch runtime.GOOS {
  89         case "plan9", "windows":
  90                 t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS)
  91         }
  92         testTraceProg(t, "cgo-callback.go", nil)
  93 }
  94
  95 func TestTraceCPUProfile(t *testing.T) {
  96         testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) {
  97                 // Parse stderr which has a CPU profile summary, if everything went well.
  98                 // (If it didn't, we shouldn't even make it here.)
  99                 scanner := bufio.NewScanner(bytes.NewReader(stderr))
 100                 pprofSamples := 0
 101                 pprofStacks := make(map[string]int)
 102                 for scanner.Scan() {
 103                         var stack string
 104                         var samples int
 105                         _, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples)
 106                         if err != nil {
 107                                 t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr)
 108                         }
 109                         pprofStacks[stack] = samples
 110                         pprofSamples += samples
 111                 }
 112                 if err := scanner.Err(); err != nil {
 113                         t.Fatalf("failed to parse CPU profile summary in stderr: %v", err)
 114                 }
 115                 if pprofSamples == 0 {
 116                         t.Skip("CPU profile did not include any samples while tracing was active")
 117                 }
 118
 119                 // Examine the execution tracer's view of the CPU profile samples. Filter it
 120                 // to only include samples from the single test goroutine. Use the goroutine
 121                 // ID that was recorded in the events: that should reflect getg().m.curg,
 122                 // same as the profiler's labels (even when the M is using its g0 stack).
 123                 totalTraceSamples := 0
 124                 traceSamples := 0
 125                 traceStacks := make(map[string]int)
 126                 r, err := trace.NewReader(bytes.NewReader(tb))
 127                 if err != nil {
 128                         t.Error(err)
 129                 }
 130                 var hogRegion *trace.Event
 131                 var hogRegionClosed bool
 132                 for {
 133                         ev, err := r.ReadEvent()
 134                         if err == io.EOF {
 135                                 break
 136                         }
 137                         if err != nil {
 138                                 t.Fatal(err)
 139                         }
 140                         if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" {
 141                                 hogRegion = &ev
 142                         }
 143                         if ev.Kind() == trace.EventStackSample {
 144                                 totalTraceSamples++
 145                                 if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() {
 146                                         traceSamples++
 147                                         var fns []string
 148                                         ev.Stack().Frames(func(frame trace.StackFrame) bool {
 149                                                 if frame.Func != "runtime.goexit" {
 150                                                         fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line))
 151                                                 }
 152                                                 return true
 153                                         })
 154                                         stack := strings.Join(fns, "|")
 155                                         traceStacks[stack]++
 156                                 }
 157                         }
 158                         if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" {
 159                                 hogRegionClosed = true
 160                         }
 161                 }
 162                 if hogRegion == nil {
 163                         t.Fatalf("execution trace did not identify cpuHogger goroutine")
 164                 } else if !hogRegionClosed {
 165                         t.Fatalf("execution trace did not close cpuHogger region")
 166                 }
 167
 168                 // The execution trace may drop CPU profile samples if the profiling buffer
 169                 // overflows. Based on the size of profBufWordCount, that takes a bit over
 170                 // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
 171                 // hit that case, then we definitely have at least one full buffer's worth
 172                 // of CPU samples, so we'll call that success.
 173                 overflowed := totalTraceSamples >= 1900
 174                 if traceSamples < pprofSamples {
 175                         t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
 176                         if !overflowed {
 177                                 t.Fail()
 178                         }
 179                 }
 180
 181                 for stack, traceSamples := range traceStacks {
 182                         pprofSamples := pprofStacks[stack]
 183                         delete(pprofStacks, stack)
 184                         if traceSamples < pprofSamples {
 185                                 t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
 186                                         stack, pprofSamples, traceSamples)
 187                                 if !overflowed {
 188                                         t.Fail()
 189                                 }
 190                         }
 191                 }
 192                 for stack, pprofSamples := range pprofStacks {
 193                         t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
 194                         if !overflowed {
 195                                 t.Fail()
 196                         }
 197                 }
 198
 199                 if t.Failed() {
 200                         t.Logf("execution trace CPU samples:")
 201                         for stack, samples := range traceStacks {
 202                                 t.Logf("%d: %q", samples, stack)
 203                         }
 204                         t.Logf("CPU profile:\n%s", stderr)
 205                 }
 206         })
 207 }
 208
 209 func TestTraceFutileWakeup(t *testing.T) {
 210         testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) {
 211                 // Check to make sure that no goroutine in the "special" trace region
 212                 // ends up blocking, unblocking, then immediately blocking again.
 213                 //
 214                 // The goroutines are careful to call runtime.GoSched in between blocking,
 215                 // so there should never be a clean block/unblock on the goroutine unless
 216                 // the runtime was generating extraneous events.
 217                 const (
 218                         entered = iota
 219                         blocked
 220                         runnable
 221                         running
 222                 )
 223                 gs := make(map[trace.GoID]int)
 224                 seenSpecialGoroutines := false
 225                 r, err := trace.NewReader(bytes.NewReader(tb))
 226                 if err != nil {
 227                         t.Error(err)
 228                 }
 229                 for {
 230                         ev, err := r.ReadEvent()
 231                         if err == io.EOF {
 232                                 break
 233                         }
 234                         if err != nil {
 235                                 t.Fatal(err)
 236                         }
 237                         // Only track goroutines in the special region we control, so runtime
 238                         // goroutines don't interfere (it's totally valid in traces for a
 239                         // goroutine to block, run, and block again; that's not what we care about).
 240                         if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" {
 241                                 seenSpecialGoroutines = true
 242                                 gs[ev.Goroutine()] = entered
 243                         }
 244                         if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" {
 245                                 delete(gs, ev.Goroutine())
 246                         }
 247                         // Track state transitions for goroutines we care about.
 248                         //
 249                         // The goroutines we care about will advance through the state machine
 250                         // of entered -> blocked -> runnable -> running. If in the running state
 251                         // we block, then we have a futile wakeup. Because of the runtime.Gosched
 252                         // on these specially marked goroutines, we should end up back in runnable
 253                         // first. If at any point we go to a different state, switch back to entered
 254                         // and wait for the next time the goroutine blocks.
 255                         if ev.Kind() != trace.EventStateTransition {
 256                                 continue
 257                         }
 258                         st := ev.StateTransition()
 259                         if st.Resource.Kind != trace.ResourceGoroutine {
 260                                 continue
 261                         }
 262                         id := st.Resource.Goroutine()
 263                         state, ok := gs[id]
 264                         if !ok {
 265                                 continue
 266                         }
 267                         _, new := st.Goroutine()
 268                         switch state {
 269                         case entered:
 270                                 if new == trace.GoWaiting {
 271                                         state = blocked
 272                                 } else {
 273                                         state = entered
 274                                 }
 275                         case blocked:
 276                                 if new == trace.GoRunnable {
 277                                         state = runnable
 278                                 } else {
 279                                         state = entered
 280                                 }
 281                         case runnable:
 282                                 if new == trace.GoRunning {
 283                                         state = running
 284                                 } else {
 285                                         state = entered
 286                                 }
 287                         case running:
 288                                 if new == trace.GoWaiting {
 289                                         t.Fatalf("found futile wakeup on goroutine %d", id)
 290                                 } else {
 291                                         state = entered
 292                                 }
 293                         }
 294                         gs[id] = state
 295                 }
 296                 if !seenSpecialGoroutines {
 297                         t.Fatal("did not see a goroutine in a the region 'special'")
 298                 }
 299         })
 300 }
 301
 302 func TestTraceGCStress(t *testing.T) {
 303         testTraceProg(t, "gc-stress.go", nil)
 304 }
 305
 306 func TestTraceGOMAXPROCS(t *testing.T) {
 307         testTraceProg(t, "gomaxprocs.go", nil)
 308 }
 309
 310 func TestTraceStacks(t *testing.T) {
 311         testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) {
 312                 type frame struct {
 313                         fn   string
 314                         line int
 315                 }
 316                 type evDesc struct {
 317                         kind   trace.EventKind
 318                         match  string
 319                         frames []frame
 320                 }
 321                 // mainLine is the line number of `func main()` in testprog/stacks.go.
 322                 const mainLine = 21
 323                 want := []evDesc{
 324                         {trace.EventStateTransition, "Goroutine Running->Runnable", []frame{
 325                                 {"main.main", mainLine + 82},
 326                         }},
 327                         {trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{
 328                                 {"main.main", mainLine + 11},
 329                         }},
 330                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 331                                 {"runtime.block", 0},
 332                                 {"main.main.func1", 0},
 333                         }},
 334                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 335                                 {"runtime.chansend1", 0},
 336                                 {"main.main.func2", 0},
 337                         }},
 338                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 339                                 {"runtime.chanrecv1", 0},
 340                                 {"main.main.func3", 0},
 341                         }},
 342                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 343                                 {"runtime.chanrecv1", 0},
 344                                 {"main.main.func4", 0},
 345                         }},
 346                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 347                                 {"runtime.chansend1", 0},
 348                                 {"main.main", mainLine + 84},
 349                         }},
 350                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 351                                 {"runtime.chansend1", 0},
 352                                 {"main.main.func5", 0},
 353                         }},
 354                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 355                                 {"runtime.chanrecv1", 0},
 356                                 {"main.main", mainLine + 85},
 357                         }},
 358                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 359                                 {"runtime.selectgo", 0},
 360                                 {"main.main.func6", 0},
 361                         }},
 362                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 363                                 {"runtime.selectgo", 0},
 364                                 {"main.main", mainLine + 86},
 365                         }},
 366                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 367                                 {"sync.(*Mutex).Lock", 0},
 368                                 {"main.main.func7", 0},
 369                         }},
 370                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 371                                 {"sync.(*Mutex).Unlock", 0},
 372                                 {"main.main", 0},
 373                         }},
 374                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 375                                 {"sync.(*WaitGroup).Wait", 0},
 376                                 {"main.main.func8", 0},
 377                         }},
 378                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 379                                 {"sync.(*WaitGroup).Add", 0},
 380                                 {"sync.(*WaitGroup).Done", 0},
 381                                 {"main.main", mainLine + 91},
 382                         }},
 383                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 384                                 {"sync.(*Cond).Wait", 0},
 385                                 {"main.main.func9", 0},
 386                         }},
 387                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 388                                 {"sync.(*Cond).Signal", 0},
 389                                 {"main.main", 0},
 390                         }},
 391                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 392                                 {"time.Sleep", 0},
 393                                 {"main.main", 0},
 394                         }},
 395                         {trace.EventMetric, "/sched/gomaxprocs:threads", []frame{
 396                                 {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
 397                                 {"runtime.startTheWorldGC", 0},
 398                                 {"runtime.GOMAXPROCS", 0},
 399                                 {"main.main", 0},
 400                         }},
 401                 }
 402                 if !stress {
 403                         // Only check for this stack if !stress because traceAdvance alone could
 404                         // allocate enough memory to trigger a GC if called frequently enough.
 405                         // This might cause the runtime.GC call we're trying to match against to
 406                         // coalesce with an active GC triggered this by traceAdvance. In that case
 407                         // we won't have an EventRangeBegin event that matches the stace trace we're
 408                         // looking for, since runtime.GC will not have triggered the GC.
 409                         gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{
 410                                 {"runtime.GC", 0},
 411                                 {"main.main", 0},
 412                         }}
 413                         want = append(want, gcEv)
 414                 }
 415                 if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
 416                         want = append(want, []evDesc{
 417                                 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 418                                         {"internal/poll.(*FD).Accept", 0},
 419                                         {"net.(*netFD).accept", 0},
 420                                         {"net.(*TCPListener).accept", 0},
 421                                         {"net.(*TCPListener).Accept", 0},
 422                                         {"main.main.func10", 0},
 423                                 }},
 424                                 {trace.EventStateTransition, "Goroutine Running->Syscall", []frame{
 425                                         {"syscall.read", 0},
 426                                         {"syscall.Read", 0},
 427                                         {"internal/poll.ignoringEINTRIO", 0},
 428                                         {"internal/poll.(*FD).Read", 0},
 429                                         {"os.(*File).read", 0},
 430                                         {"os.(*File).Read", 0},
 431                                         {"main.main.func11", 0},
 432                                 }},
 433                         }...)
 434                 }
 435                 stackMatches := func(stk trace.Stack, frames []frame) bool {
 436                         i := 0
 437                         match := true
 438                         stk.Frames(func(f trace.StackFrame) bool {
 439                                 if f.Func != frames[i].fn {
 440                                         match = false
 441                                         return false
 442                                 }
 443                                 if line := uint64(frames[i].line); line != 0 && line != f.Line {
 444                                         match = false
 445                                         return false
 446                                 }
 447                                 i++
 448                                 return true
 449                         })
 450                         return match
 451                 }
 452                 r, err := trace.NewReader(bytes.NewReader(tb))
 453                 if err != nil {
 454                         t.Error(err)
 455                 }
 456                 for {
 457                         ev, err := r.ReadEvent()
 458                         if err == io.EOF {
 459                                 break
 460                         }
 461                         if err != nil {
 462                                 t.Fatal(err)
 463                         }
 464                         for i, wantEv := range want {
 465                                 if wantEv.kind != ev.Kind() {
 466                                         continue
 467                                 }
 468                                 match := false
 469                                 switch ev.Kind() {
 470                                 case trace.EventStateTransition:
 471                                         st := ev.StateTransition()
 472                                         str := ""
 473                                         switch st.Resource.Kind {
 474                                         case trace.ResourceGoroutine:
 475                                                 old, new := st.Goroutine()
 476                                                 str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new)
 477                                         }
 478                                         match = str == wantEv.match
 479                                 case trace.EventRangeBegin:
 480                                         rng := ev.Range()
 481                                         match = rng.Name == wantEv.match
 482                                 case trace.EventMetric:
 483                                         metric := ev.Metric()
 484                                         match = metric.Name == wantEv.match
 485                                 }
 486                                 match = match && stackMatches(ev.Stack(), wantEv.frames)
 487                                 if match {
 488                                         want[i] = want[len(want)-1]
 489                                         want = want[:len(want)-1]
 490                                         break
 491                                 }
 492                         }
 493                 }
 494                 if len(want) != 0 {
 495                         for _, ev := range want {
 496                                 t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames)
 497                         }
 498                 }
 499         })
 500 }
 501
 502 func TestTraceStress(t *testing.T) {
 503         switch runtime.GOOS {
 504         case "js", "wasip1":
 505                 t.Skip("no os.Pipe on " + runtime.GOOS)
 506         }
 507         testTraceProg(t, "stress.go", nil)
 508 }
 509
 510 func TestTraceStressStartStop(t *testing.T) {
 511         switch runtime.GOOS {
 512         case "js", "wasip1":
 513                 t.Skip("no os.Pipe on " + runtime.GOOS)
 514         }
 515         testTraceProg(t, "stress-start-stop.go", nil)
 516 }
 517
 518 func TestTraceManyStartStop(t *testing.T) {
 519         testTraceProg(t, "many-start-stop.go", nil)
 520 }
 521
 522 func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) {
 523         testenv.MustHaveGoRun(t)
 524
 525         // Check if we're on a builder.
 526         onBuilder := testenv.Builder() != ""
 527
 528         testPath := filepath.Join("./testdata/testprog", progName)
 529         testName := progName
 530         runTest := func(t *testing.T, stress bool) {
 531                 // Run the program and capture the trace, which is always written to stdout.
 532                 cmd := testenv.Command(t, testenv.GoToolPath(t), "run", testPath)
 533                 cmd.Env = append(os.Environ(), "GOEXPERIMENT=exectracer2")
 534                 if stress {
 535                         // Advance a generation constantly.
 536                         cmd.Env = append(cmd.Env, "GODEBUG=traceadvanceperiod=0")
 537                 }
 538                 // Capture stdout and stderr.
 539                 //
 540                 // The protoocol for these programs is that stdout contains the trace data
 541                 // and stderr is an expectation in string format.
 542                 var traceBuf, errBuf bytes.Buffer
 543                 cmd.Stdout = &traceBuf
 544                 cmd.Stderr = &errBuf
 545                 // Run the program.
 546                 if err := cmd.Run(); err != nil {
 547                         if errBuf.Len() != 0 {
 548                                 t.Logf("stderr: %s", string(errBuf.Bytes()))
 549                         }
 550                         t.Fatal(err)
 551                 }
 552                 tb := traceBuf.Bytes()
 553
 554                 // Test the trace and the parser.
 555                 testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess())
 556
 557                 // Run some extra validation.
 558                 if !t.Failed() && extra != nil {
 559                         extra(t, tb, errBuf.Bytes(), stress)
 560                 }
 561
 562                 // Dump some more information on failure.
 563                 if t.Failed() && onBuilder {
 564                         // Dump directly to the test log on the builder, since this
 565                         // data is critical for debugging and this is the only way
 566                         // we can currently make sure it's retained.
 567                         t.Log("found bad trace; dumping to test log...")
 568                         t.Log(dumpTraceToText(t, tb))
 569                 } else if t.Failed() || *dumpTraces {
 570                         // We asked to dump the trace or failed. Write the trace to a file.
 571                         t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb))
 572                 }
 573         }
 574         t.Run("Default", func(t *testing.T) {
 575                 runTest(t, false)
 576         })
 577         t.Run("Stress", func(t *testing.T) {
 578                 if testing.Short() {
 579                         t.Skip("skipping trace reader stress tests in short mode")
 580                 }
 581                 runTest(t, true)
 582         })
 583 }