src/internal/trace/v2/trace_test.go

   1 // Copyright 2023 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package trace_test
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "fmt"
  11         "internal/testenv"
  12         "internal/trace/v2"
  13         "internal/trace/v2/testtrace"
  14         "io"
  15         "os"
  16         "path/filepath"
  17         "runtime"
  18         "strings"
  19         "testing"
  20 )
  21
  22 func TestTraceAnnotations(t *testing.T) {
  23         testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) {
  24                 type evDesc struct {
  25                         kind trace.EventKind
  26                         task trace.TaskID
  27                         args []string
  28                 }
  29                 want := []evDesc{
  30                         {trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}},
  31                         {trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}},
  32                         {trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}},
  33                         {trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}},
  34                         {trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}},
  35                         {trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}},
  36                         {trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}},
  37                         //  Currently, pre-existing region is not recorded to avoid allocations.
  38                         {trace.EventRegionBegin, trace.NoTask, []string{"post-existing region"}},
  39                 }
  40                 r, err := trace.NewReader(bytes.NewReader(tb))
  41                 if err != nil {
  42                         t.Error(err)
  43                 }
  44                 for {
  45                         ev, err := r.ReadEvent()
  46                         if err == io.EOF {
  47                                 break
  48                         }
  49                         if err != nil {
  50                                 t.Fatal(err)
  51                         }
  52                         for i, wantEv := range want {
  53                                 if wantEv.kind != ev.Kind() {
  54                                         continue
  55                                 }
  56                                 match := false
  57                                 switch ev.Kind() {
  58                                 case trace.EventTaskBegin, trace.EventTaskEnd:
  59                                         task := ev.Task()
  60                                         match = task.ID == wantEv.task && task.Type == wantEv.args[0]
  61                                 case trace.EventRegionBegin, trace.EventRegionEnd:
  62                                         reg := ev.Region()
  63                                         match = reg.Task == wantEv.task && reg.Type == wantEv.args[0]
  64                                 case trace.EventLog:
  65                                         log := ev.Log()
  66                                         match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1]
  67                                 }
  68                                 if match {
  69                                         want[i] = want[len(want)-1]
  70                                         want = want[:len(want)-1]
  71                                         break
  72                                 }
  73                         }
  74                 }
  75                 if len(want) != 0 {
  76                         for _, ev := range want {
  77                                 t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args)
  78                         }
  79                 }
  80         })
  81 }
  82
  83 func TestTraceAnnotationsStress(t *testing.T) {
  84         testTraceProg(t, "annotations-stress.go", nil)
  85 }
  86
  87 func TestTraceCgoCallback(t *testing.T) {
  88         testenv.MustHaveCGO(t)
  89
  90         switch runtime.GOOS {
  91         case "plan9", "windows":
  92                 t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS)
  93         }
  94         testTraceProg(t, "cgo-callback.go", nil)
  95 }
  96
  97 func TestTraceCPUProfile(t *testing.T) {
  98         testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) {
  99                 // Parse stderr which has a CPU profile summary, if everything went well.
 100                 // (If it didn't, we shouldn't even make it here.)
 101                 scanner := bufio.NewScanner(bytes.NewReader(stderr))
 102                 pprofSamples := 0
 103                 pprofStacks := make(map[string]int)
 104                 for scanner.Scan() {
 105                         var stack string
 106                         var samples int
 107                         _, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples)
 108                         if err != nil {
 109                                 t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr)
 110                         }
 111                         pprofStacks[stack] = samples
 112                         pprofSamples += samples
 113                 }
 114                 if err := scanner.Err(); err != nil {
 115                         t.Fatalf("failed to parse CPU profile summary in stderr: %v", err)
 116                 }
 117                 if pprofSamples == 0 {
 118                         t.Skip("CPU profile did not include any samples while tracing was active")
 119                 }
 120
 121                 // Examine the execution tracer's view of the CPU profile samples. Filter it
 122                 // to only include samples from the single test goroutine. Use the goroutine
 123                 // ID that was recorded in the events: that should reflect getg().m.curg,
 124                 // same as the profiler's labels (even when the M is using its g0 stack).
 125                 totalTraceSamples := 0
 126                 traceSamples := 0
 127                 traceStacks := make(map[string]int)
 128                 r, err := trace.NewReader(bytes.NewReader(tb))
 129                 if err != nil {
 130                         t.Error(err)
 131                 }
 132                 var hogRegion *trace.Event
 133                 var hogRegionClosed bool
 134                 for {
 135                         ev, err := r.ReadEvent()
 136                         if err == io.EOF {
 137                                 break
 138                         }
 139                         if err != nil {
 140                                 t.Fatal(err)
 141                         }
 142                         if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" {
 143                                 hogRegion = &ev
 144                         }
 145                         if ev.Kind() == trace.EventStackSample {
 146                                 totalTraceSamples++
 147                                 if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() {
 148                                         traceSamples++
 149                                         var fns []string
 150                                         ev.Stack().Frames(func(frame trace.StackFrame) bool {
 151                                                 if frame.Func != "runtime.goexit" {
 152                                                         fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line))
 153                                                 }
 154                                                 return true
 155                                         })
 156                                         stack := strings.Join(fns, "|")
 157                                         traceStacks[stack]++
 158                                 }
 159                         }
 160                         if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" {
 161                                 hogRegionClosed = true
 162                         }
 163                 }
 164                 if hogRegion == nil {
 165                         t.Fatalf("execution trace did not identify cpuHogger goroutine")
 166                 } else if !hogRegionClosed {
 167                         t.Fatalf("execution trace did not close cpuHogger region")
 168                 }
 169
 170                 // The execution trace may drop CPU profile samples if the profiling buffer
 171                 // overflows. Based on the size of profBufWordCount, that takes a bit over
 172                 // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
 173                 // hit that case, then we definitely have at least one full buffer's worth
 174                 // of CPU samples, so we'll call that success.
 175                 overflowed := totalTraceSamples >= 1900
 176                 if traceSamples < pprofSamples {
 177                         t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
 178                         if !overflowed {
 179                                 t.Fail()
 180                         }
 181                 }
 182
 183                 for stack, traceSamples := range traceStacks {
 184                         pprofSamples := pprofStacks[stack]
 185                         delete(pprofStacks, stack)
 186                         if traceSamples < pprofSamples {
 187                                 t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
 188                                         stack, pprofSamples, traceSamples)
 189                                 if !overflowed {
 190                                         t.Fail()
 191                                 }
 192                         }
 193                 }
 194                 for stack, pprofSamples := range pprofStacks {
 195                         t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
 196                         if !overflowed {
 197                                 t.Fail()
 198                         }
 199                 }
 200
 201                 if t.Failed() {
 202                         t.Logf("execution trace CPU samples:")
 203                         for stack, samples := range traceStacks {
 204                                 t.Logf("%d: %q", samples, stack)
 205                         }
 206                         t.Logf("CPU profile:\n%s", stderr)
 207                 }
 208         })
 209 }
 210
 211 func TestTraceFutileWakeup(t *testing.T) {
 212         testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) {
 213                 // Check to make sure that no goroutine in the "special" trace region
 214                 // ends up blocking, unblocking, then immediately blocking again.
 215                 //
 216                 // The goroutines are careful to call runtime.GoSched in between blocking,
 217                 // so there should never be a clean block/unblock on the goroutine unless
 218                 // the runtime was generating extraneous events.
 219                 const (
 220                         entered = iota
 221                         blocked
 222                         runnable
 223                         running
 224                 )
 225                 gs := make(map[trace.GoID]int)
 226                 seenSpecialGoroutines := false
 227                 r, err := trace.NewReader(bytes.NewReader(tb))
 228                 if err != nil {
 229                         t.Error(err)
 230                 }
 231                 for {
 232                         ev, err := r.ReadEvent()
 233                         if err == io.EOF {
 234                                 break
 235                         }
 236                         if err != nil {
 237                                 t.Fatal(err)
 238                         }
 239                         // Only track goroutines in the special region we control, so runtime
 240                         // goroutines don't interfere (it's totally valid in traces for a
 241                         // goroutine to block, run, and block again; that's not what we care about).
 242                         if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" {
 243                                 seenSpecialGoroutines = true
 244                                 gs[ev.Goroutine()] = entered
 245                         }
 246                         if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" {
 247                                 delete(gs, ev.Goroutine())
 248                         }
 249                         // Track state transitions for goroutines we care about.
 250                         //
 251                         // The goroutines we care about will advance through the state machine
 252                         // of entered -> blocked -> runnable -> running. If in the running state
 253                         // we block, then we have a futile wakeup. Because of the runtime.Gosched
 254                         // on these specially marked goroutines, we should end up back in runnable
 255                         // first. If at any point we go to a different state, switch back to entered
 256                         // and wait for the next time the goroutine blocks.
 257                         if ev.Kind() != trace.EventStateTransition {
 258                                 continue
 259                         }
 260                         st := ev.StateTransition()
 261                         if st.Resource.Kind != trace.ResourceGoroutine {
 262                                 continue
 263                         }
 264                         id := st.Resource.Goroutine()
 265                         state, ok := gs[id]
 266                         if !ok {
 267                                 continue
 268                         }
 269                         _, new := st.Goroutine()
 270                         switch state {
 271                         case entered:
 272                                 if new == trace.GoWaiting {
 273                                         state = blocked
 274                                 } else {
 275                                         state = entered
 276                                 }
 277                         case blocked:
 278                                 if new == trace.GoRunnable {
 279                                         state = runnable
 280                                 } else {
 281                                         state = entered
 282                                 }
 283                         case runnable:
 284                                 if new == trace.GoRunning {
 285                                         state = running
 286                                 } else {
 287                                         state = entered
 288                                 }
 289                         case running:
 290                                 if new == trace.GoWaiting {
 291                                         t.Fatalf("found futile wakeup on goroutine %d", id)
 292                                 } else {
 293                                         state = entered
 294                                 }
 295                         }
 296                         gs[id] = state
 297                 }
 298                 if !seenSpecialGoroutines {
 299                         t.Fatal("did not see a goroutine in a the region 'special'")
 300                 }
 301         })
 302 }
 303
 304 func TestTraceGCStress(t *testing.T) {
 305         testTraceProg(t, "gc-stress.go", nil)
 306 }
 307
 308 func TestTraceGOMAXPROCS(t *testing.T) {
 309         testTraceProg(t, "gomaxprocs.go", nil)
 310 }
 311
 312 func TestTraceStacks(t *testing.T) {
 313         testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) {
 314                 type frame struct {
 315                         fn   string
 316                         line int
 317                 }
 318                 type evDesc struct {
 319                         kind   trace.EventKind
 320                         match  string
 321                         frames []frame
 322                 }
 323                 // mainLine is the line number of `func main()` in testprog/stacks.go.
 324                 const mainLine = 21
 325                 want := []evDesc{
 326                         {trace.EventStateTransition, "Goroutine Running->Runnable", []frame{
 327                                 {"main.main", mainLine + 82},
 328                         }},
 329                         {trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{
 330                                 {"main.main", mainLine + 11},
 331                         }},
 332                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 333                                 {"runtime.block", 0},
 334                                 {"main.main.func1", 0},
 335                         }},
 336                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 337                                 {"runtime.chansend1", 0},
 338                                 {"main.main.func2", 0},
 339                         }},
 340                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 341                                 {"runtime.chanrecv1", 0},
 342                                 {"main.main.func3", 0},
 343                         }},
 344                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 345                                 {"runtime.chanrecv1", 0},
 346                                 {"main.main.func4", 0},
 347                         }},
 348                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 349                                 {"runtime.chansend1", 0},
 350                                 {"main.main", mainLine + 84},
 351                         }},
 352                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 353                                 {"runtime.chansend1", 0},
 354                                 {"main.main.func5", 0},
 355                         }},
 356                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 357                                 {"runtime.chanrecv1", 0},
 358                                 {"main.main", mainLine + 85},
 359                         }},
 360                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 361                                 {"runtime.selectgo", 0},
 362                                 {"main.main.func6", 0},
 363                         }},
 364                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 365                                 {"runtime.selectgo", 0},
 366                                 {"main.main", mainLine + 86},
 367                         }},
 368                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 369                                 {"sync.(*Mutex).Lock", 0},
 370                                 {"main.main.func7", 0},
 371                         }},
 372                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 373                                 {"sync.(*Mutex).Unlock", 0},
 374                                 {"main.main", 0},
 375                         }},
 376                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 377                                 {"sync.(*WaitGroup).Wait", 0},
 378                                 {"main.main.func8", 0},
 379                         }},
 380                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 381                                 {"sync.(*WaitGroup).Add", 0},
 382                                 {"sync.(*WaitGroup).Done", 0},
 383                                 {"main.main", mainLine + 91},
 384                         }},
 385                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 386                                 {"sync.(*Cond).Wait", 0},
 387                                 {"main.main.func9", 0},
 388                         }},
 389                         {trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
 390                                 {"sync.(*Cond).Signal", 0},
 391                                 {"main.main", 0},
 392                         }},
 393                         {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 394                                 {"time.Sleep", 0},
 395                                 {"main.main", 0},
 396                         }},
 397                         {trace.EventMetric, "/sched/gomaxprocs:threads", []frame{
 398                                 {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
 399                                 {"runtime.startTheWorldGC", 0},
 400                                 {"runtime.GOMAXPROCS", 0},
 401                                 {"main.main", 0},
 402                         }},
 403                 }
 404                 if !stress {
 405                         // Only check for this stack if !stress because traceAdvance alone could
 406                         // allocate enough memory to trigger a GC if called frequently enough.
 407                         // This might cause the runtime.GC call we're trying to match against to
 408                         // coalesce with an active GC triggered this by traceAdvance. In that case
 409                         // we won't have an EventRangeBegin event that matches the stace trace we're
 410                         // looking for, since runtime.GC will not have triggered the GC.
 411                         gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{
 412                                 {"runtime.GC", 0},
 413                                 {"main.main", 0},
 414                         }}
 415                         want = append(want, gcEv)
 416                 }
 417                 if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
 418                         want = append(want, []evDesc{
 419                                 {trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
 420                                         {"internal/poll.(*FD).Accept", 0},
 421                                         {"net.(*netFD).accept", 0},
 422                                         {"net.(*TCPListener).accept", 0},
 423                                         {"net.(*TCPListener).Accept", 0},
 424                                         {"main.main.func10", 0},
 425                                 }},
 426                                 {trace.EventStateTransition, "Goroutine Running->Syscall", []frame{
 427                                         {"syscall.read", 0},
 428                                         {"syscall.Read", 0},
 429                                         {"internal/poll.ignoringEINTRIO", 0},
 430                                         {"internal/poll.(*FD).Read", 0},
 431                                         {"os.(*File).read", 0},
 432                                         {"os.(*File).Read", 0},
 433                                         {"main.main.func11", 0},
 434                                 }},
 435                         }...)
 436                 }
 437                 stackMatches := func(stk trace.Stack, frames []frame) bool {
 438                         i := 0
 439                         match := true
 440                         stk.Frames(func(f trace.StackFrame) bool {
 441                                 if f.Func != frames[i].fn {
 442                                         match = false
 443                                         return false
 444                                 }
 445                                 if line := uint64(frames[i].line); line != 0 && line != f.Line {
 446                                         match = false
 447                                         return false
 448                                 }
 449                                 i++
 450                                 return true
 451                         })
 452                         return match
 453                 }
 454                 r, err := trace.NewReader(bytes.NewReader(tb))
 455                 if err != nil {
 456                         t.Error(err)
 457                 }
 458                 for {
 459                         ev, err := r.ReadEvent()
 460                         if err == io.EOF {
 461                                 break
 462                         }
 463                         if err != nil {
 464                                 t.Fatal(err)
 465                         }
 466                         for i, wantEv := range want {
 467                                 if wantEv.kind != ev.Kind() {
 468                                         continue
 469                                 }
 470                                 match := false
 471                                 switch ev.Kind() {
 472                                 case trace.EventStateTransition:
 473                                         st := ev.StateTransition()
 474                                         str := ""
 475                                         switch st.Resource.Kind {
 476                                         case trace.ResourceGoroutine:
 477                                                 old, new := st.Goroutine()
 478                                                 str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new)
 479                                         }
 480                                         match = str == wantEv.match
 481                                 case trace.EventRangeBegin:
 482                                         rng := ev.Range()
 483                                         match = rng.Name == wantEv.match
 484                                 case trace.EventMetric:
 485                                         metric := ev.Metric()
 486                                         match = metric.Name == wantEv.match
 487                                 }
 488                                 match = match && stackMatches(ev.Stack(), wantEv.frames)
 489                                 if match {
 490                                         want[i] = want[len(want)-1]
 491                                         want = want[:len(want)-1]
 492                                         break
 493                                 }
 494                         }
 495                 }
 496                 if len(want) != 0 {
 497                         for _, ev := range want {
 498                                 t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames)
 499                         }
 500                 }
 501         })
 502 }
 503
 504 func TestTraceStress(t *testing.T) {
 505         switch runtime.GOOS {
 506         case "js", "wasip1":
 507                 t.Skip("no os.Pipe on " + runtime.GOOS)
 508         }
 509         testTraceProg(t, "stress.go", nil)
 510 }
 511
 512 func TestTraceStressStartStop(t *testing.T) {
 513         switch runtime.GOOS {
 514         case "js", "wasip1":
 515                 t.Skip("no os.Pipe on " + runtime.GOOS)
 516         }
 517         testTraceProg(t, "stress-start-stop.go", nil)
 518 }
 519
 520 func TestTraceManyStartStop(t *testing.T) {
 521         testTraceProg(t, "many-start-stop.go", nil)
 522 }
 523
 524 func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) {
 525         testenv.MustHaveGoRun(t)
 526
 527         if runtime.GOOS == "windows" {
 528                 t.Skip("temporarily disabled on Windows for #64061")
 529         }
 530
 531         // Check if we're on a builder.
 532         onBuilder := testenv.Builder() != ""
 533
 534         testPath := filepath.Join("./testdata/testprog", progName)
 535         testName := progName
 536         runTest := func(t *testing.T, stress bool) {
 537                 // Run the program and capture the trace, which is always written to stdout.
 538                 cmd := testenv.Command(t, testenv.GoToolPath(t), "run", testPath)
 539                 cmd.Env = append(os.Environ(), "GOEXPERIMENT=exectracer2")
 540                 if stress {
 541                         // Advance a generation constantly.
 542                         cmd.Env = append(cmd.Env, "GODEBUG=traceadvanceperiod=0")
 543                 }
 544                 // Capture stdout and stderr.
 545                 //
 546                 // The protoocol for these programs is that stdout contains the trace data
 547                 // and stderr is an expectation in string format.
 548                 var traceBuf, errBuf bytes.Buffer
 549                 cmd.Stdout = &traceBuf
 550                 cmd.Stderr = &errBuf
 551                 // Run the program.
 552                 if err := cmd.Run(); err != nil {
 553                         if errBuf.Len() != 0 {
 554                                 t.Logf("stderr: %s", string(errBuf.Bytes()))
 555                         }
 556                         t.Fatal(err)
 557                 }
 558                 tb := traceBuf.Bytes()
 559
 560                 // Test the trace and the parser.
 561                 testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess())
 562
 563                 // Run some extra validation.
 564                 if !t.Failed() && extra != nil {
 565                         extra(t, tb, errBuf.Bytes(), stress)
 566                 }
 567
 568                 // Dump some more information on failure.
 569                 if t.Failed() && onBuilder {
 570                         // Dump directly to the test log on the builder, since this
 571                         // data is critical for debugging and this is the only way
 572                         // we can currently make sure it's retained.
 573                         t.Log("found bad trace; dumping to test log...")
 574                         t.Log(dumpTraceToText(t, tb))
 575                 } else if t.Failed() || *dumpTraces {
 576                         // We asked to dump the trace or failed. Write the trace to a file.
 577                         t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb))
 578                 }
 579         }
 580         t.Run("Default", func(t *testing.T) {
 581                 runTest(t, false)
 582         })
 583         t.Run("Stress", func(t *testing.T) {
 584                 if testing.Short() {
 585                         t.Skip("skipping trace reader stress tests in short mode")
 586                 }
 587                 runTest(t, true)
 588         })
 589 }