runtime: add execution tracer v2 behind GOEXPERIMENT=exectracer2

[gostls13.git] / src / runtime / proc.go
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index d0cb998cffd89acaf5185d288a1c73fc8aaf6e5e..4f6e3a3ced2d000789d5bd070fe761f83cc4856c 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -8,6 +8,7 @@ import (
         "internal/abi"
         "internal/cpu"
         "internal/goarch"
+       "internal/goexperiment"
         "internal/goos"
         "runtime/internal/atomic"
         "runtime/internal/sys"
@@ -1344,7 +1345,10 @@ func stopTheWorld(reason stwReason) {
                 // must have preempted all goroutines, including any attempting
                 // to scan our stack, in which case, any stack shrinking will
                 // have already completed by the time we exit.
-               // Don't provide a wait reason because we're still executing.
+               //
+               // N.B. The execution tracer is not aware of this status
+               // transition and handles it specially based on the
+               // wait reason.
                 casGToWaiting(gp, _Grunning, waitReasonStoppingTheWorld)
                 stopTheWorldWithSema(reason)
                 casgstatus(gp, _Gwaiting, _Grunning)
@@ -1451,7 +1455,7 @@ func stopTheWorldWithSema(reason stwReason) {
                 if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) {
                         if trace.ok() {
                                 trace.GoSysBlock(pp)
-                               trace.ProcStop(pp)
+                               trace.ProcSteal(pp, false)
                         }
                         pp.syscalltick++
                         sched.stopwait--
@@ -1772,6 +1776,8 @@ func mexit(osStack bool) {
         }
         throw("m not found in allm")
  found:
+       // Events must not be traced after this point.
+
         // Delay reaping m until it's done with the stack.
         //
         // Put mp on the free list, though it will not be reaped while freeWait
@@ -1781,6 +1787,9 @@ found:
         //
         // Note that the free list must not be linked through alllink because
         // some functions walk allm without locking, so may be using alllink.
+       //
+       // N.B. It's important that the M appears on the free list simultaneously
+       // with it being removed so that the tracer can find it.
         mp.freeWait.Store(freeMWait)
         mp.freelink = sched.freem
         sched.freem = mp
@@ -1904,21 +1913,25 @@ func forEachPInternal(fn func(*p)) {
  
         // Force Ps currently in _Psyscall into _Pidle and hand them
         // off to induce safe point function execution.
-       trace := traceAcquire()
         for _, p2 := range allp {
                 s := p2.status
+
+               // We need to be fine-grained about tracing here, since handoffp
+               // might call into the tracer, and the tracer is non-reentrant.
+               trace := traceAcquire()
                 if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) {
                         if trace.ok() {
+                               // It's important that we traceRelease before we call handoffp, which may also traceAcquire.
                                 trace.GoSysBlock(p2)
-                               trace.ProcStop(p2)
+                               trace.ProcSteal(p2, false)
+                               traceRelease(trace)
                         }
                         p2.syscalltick++
                         handoffp(p2)
+               } else if trace.ok() {
+                       traceRelease(trace)
                 }
         }
-       if trace.ok() {
-               traceRelease(trace)
-       }
  
         // Wait for remaining Ps to run fn.
         if wait {
@@ -2016,6 +2029,7 @@ func allocm(pp *p, fn func(), id int64) *m {
                 lock(&sched.lock)
                 var newList *m
                 for freem := sched.freem; freem != nil; {
+                       // Wait for freeWait to indicate that freem's stack is unused.
                         wait := freem.freeWait.Load()
                         if wait == freeMWait {
                                 next := freem.freelink
@@ -2024,6 +2038,12 @@ func allocm(pp *p, fn func(), id int64) *m {
                                 freem = next
                                 continue
                         }
+                       // Drop any remaining trace resources.
+                       // Ms can continue to emit events all the way until wait != freeMWait,
+                       // so it's only safe to call traceThreadDestroy at this point.
+                       if traceEnabled() || traceShuttingDown() {
+                               traceThreadDestroy(freem)
+                       }
                         // Free the stack if needed. For freeMRef, there is
                         // nothing to do except drop freem from the sched.freem
                         // list.
@@ -2162,9 +2182,27 @@ func needm(signal bool) {
         asminit()
         minit()
  
+       // Emit a trace event for this dead -> syscall transition,
+       // but only in the new tracer and only if we're not in a signal handler.
+       //
+       // N.B. the tracer can run on a bare M just fine, we just have
+       // to make sure to do this before setg(nil) and unminit.
+       var trace traceLocker
+       if goexperiment.ExecTracer2 && !signal {
+               trace = traceAcquire()
+       }
+
         // mp.curg is now a real goroutine.
         casgstatus(mp.curg, _Gdead, _Gsyscall)
         sched.ngsys.Add(-1)
+
+       if goexperiment.ExecTracer2 && !signal {
+               if trace.ok() {
+                       trace.GoCreateSyscall(mp.curg)
+                       traceRelease(trace)
+               }
+       }
+       mp.isExtraInSig = signal
  }
  
  // Acquire an extra m and bind it to the C thread when a pthread key has been created.
@@ -2284,11 +2322,57 @@ func dropm() {
         // with no pointer manipulation.
         mp := getg().m
  
+       // Emit a trace event for this syscall -> dead transition,
+       // but only in the new tracer.
+       //
+       // N.B. the tracer can run on a bare M just fine, we just have
+       // to make sure to do this before setg(nil) and unminit.
+       var trace traceLocker
+       if goexperiment.ExecTracer2 && !mp.isExtraInSig {
+               trace = traceAcquire()
+       }
+
         // Return mp.curg to dead state.
         casgstatus(mp.curg, _Gsyscall, _Gdead)
         mp.curg.preemptStop = false
         sched.ngsys.Add(1)
  
+       if goexperiment.ExecTracer2 && !mp.isExtraInSig {
+               if trace.ok() {
+                       trace.GoDestroySyscall()
+                       traceRelease(trace)
+               }
+       }
+
+       if goexperiment.ExecTracer2 {
+               // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore.
+               //
+               // In the new tracer, we model needm and dropm and a goroutine being created and
+               // destroyed respectively. The m then might get reused with a different procid but
+               // still with a reference to oldp, and still with the same syscalltick. The next
+               // time a G is "created" in needm, it'll return and quietly reacquire its P from a
+               // different m with a different procid, which will confuse the trace parser. By
+               // trashing syscalltick, we ensure that it'll appear as if we lost the P to the
+               // tracer parser and that we just reacquired it.
+               //
+               // Trash the value by decrementing because that gets us as far away from the value
+               // the syscall exit code expects as possible. Setting to zero is risky because
+               // syscalltick could already be zero (and in fact, is initialized to zero).
+               mp.syscalltick--
+       }
+
+       // Reset trace state unconditionally. This goroutine is being 'destroyed'
+       // from the perspective of the tracer.
+       mp.curg.trace.reset()
+
+       // Flush all the M's buffers. This is necessary because the M might
+       // be used on a different thread with a different procid, so we have
+       // to make sure we don't write into the same buffer.
+       if traceEnabled() || traceShuttingDown() {
+               traceThreadDestroy(mp)
+       }
+       mp.isExtraInSig = false
+
         // Block signals before unminit.
         // Unminit unregisters the signal handling stack (but needs g on some systems).
         // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
@@ -2982,8 +3066,8 @@ func execute(gp *g, inheritTime bool) {
         if trace.ok() {
                 // GoSysExit has to happen when we have a P, but before GoStart.
                 // So we emit it here.
-               if gp.syscallsp != 0 {
-                       trace.GoSysExit()
+               if !goexperiment.ExecTracer2 && gp.syscallsp != 0 {
+                       trace.GoSysExit(true)
                 }
                 trace.GoStart()
                 traceRelease(trace)
@@ -4154,7 +4238,7 @@ func save(pc, sp uintptr) {
  // must always point to a valid stack frame. entersyscall below is the normal
  // entry point for syscalls, which obtains the SP and PC from the caller.
  //
-// Syscall tracing:
+// Syscall tracing (old tracer):
  // At the start of a syscall we emit traceGoSysCall to capture the stack trace.
  // If the syscall does not block, that is it, we do not emit any other events.
  // If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
@@ -4264,6 +4348,8 @@ func entersyscall_gcwait() {
                 trace := traceAcquire()
                 if trace.ok() {
                         trace.GoSysBlock(pp)
+                       // N.B. ProcSteal not necessary because if we succeed we're
+                       // always stopping the P we just put into the syscall status.
                         trace.ProcStop(pp)
                         traceRelease(trace)
                 }
@@ -4364,11 +4450,23 @@ func exitsyscall() {
                 }
                 trace := traceAcquire()
                 if trace.ok() {
-                       if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick {
-                               systemstack(func() {
+                       lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick
+                       systemstack(func() {
+                               if goexperiment.ExecTracer2 {
+                                       // Write out syscall exit eagerly in the experiment.
+                                       //
+                                       // It's important that we write this *after* we know whether we
+                                       // lost our P or not (determined by exitsyscallfast).
+                                       trace.GoSysExit(lostP)
+                               }
+                               if lostP {
+                                       // We lost the P at some point, even though we got it back here.
+                                       // Trace that we're starting again, because there was a traceGoSysBlock
+                                       // call somewhere in exitsyscallfast (indicating that this goroutine
+                                       // had blocked) and we're about to start running again.
                                         trace.GoStart()
-                               })
-                       }
+                               }
+                       })
                 }
                 // There's a cpu for us, so we can run.
                 gp.m.p.ptr().syscalltick++
@@ -4399,19 +4497,15 @@ func exitsyscall() {
                 return
         }
  
-       trace := traceAcquire()
-       if trace.ok() {
-               // Wait till traceGoSysBlock event is emitted.
-               // This ensures consistency of the trace (the goroutine is started after it is blocked).
-               for oldp != nil && oldp.syscalltick == gp.m.syscalltick {
-                       osyield()
+       if !goexperiment.ExecTracer2 {
+               // In the old tracer, because we don't have a P we can't
+               // actually record the true time we exited the syscall.
+               // Record it.
+               trace := traceAcquire()
+               if trace.ok() {
+                       trace.RecordSyscallExitedTime(gp, oldp)
+                       traceRelease(trace)
                 }
-               // We can't trace syscall exit right now because we don't have a P.
-               // Tracing code can invoke write barriers that cannot run without a P.
-               // So instead we remember the syscall exit time and emit the event
-               // in execute when we have a P.
-               gp.trace.sysExitTime = traceClockNow()
-               traceRelease(trace)
         }
  
         gp.m.locks--
@@ -4452,7 +4546,7 @@ func exitsyscallfast(oldp *p) bool {
                 var ok bool
                 systemstack(func() {
                         ok = exitsyscallfast_pidle()
-                       if ok {
+                       if ok && !goexperiment.ExecTracer2 {
                                 trace := traceAcquire()
                                 if trace.ok() {
                                         if oldp != nil {
@@ -4462,7 +4556,9 @@ func exitsyscallfast(oldp *p) bool {
                                                         osyield()
                                                 }
                                         }
-                                       trace.GoSysExit()
+                                       // In the experiment, we write this in exitsyscall.
+                                       // Don't write it here unless the experiment is off.
+                                       trace.GoSysExit(true)
                                         traceRelease(trace)
                                 }
                         }
@@ -4488,10 +4584,17 @@ func exitsyscallfast_reacquired() {
                         // traceGoSysBlock for this syscall was already emitted,
                         // but here we effectively retake the p from the new syscall running on the same p.
                         systemstack(func() {
-                               // Denote blocking of the new syscall.
-                               trace.GoSysBlock(gp.m.p.ptr())
-                               // Denote completion of the current syscall.
-                               trace.GoSysExit()
+                               if goexperiment.ExecTracer2 {
+                                       // In the experiment, we're stealing the P. It's treated
+                                       // as if it temporarily stopped running. Then, start running.
+                                       trace.ProcSteal(gp.m.p.ptr(), true)
+                                       trace.ProcStart()
+                               } else {
+                                       // Denote blocking of the new syscall.
+                                       trace.GoSysBlock(gp.m.p.ptr())
+                                       // Denote completion of the current syscall.
+                                       trace.GoSysExit(true)
+                               }
                                 traceRelease(trace)
                         })
                 }
@@ -4521,7 +4624,23 @@ func exitsyscallfast_pidle() bool {
  //
  //go:nowritebarrierrec
  func exitsyscall0(gp *g) {
+       var trace traceLocker
+       if goexperiment.ExecTracer2 {
+               traceExitingSyscall()
+               trace = traceAcquire()
+       }
         casgstatus(gp, _Gsyscall, _Grunnable)
+       if goexperiment.ExecTracer2 {
+               traceExitedSyscall()
+               if trace.ok() {
+                       // Write out syscall exit eagerly in the experiment.
+                       //
+                       // It's important that we write this *after* we know whether we
+                       // lost our P or not (determined by exitsyscallfast).
+                       trace.GoSysExit(true)
+                       traceRelease(trace)
+               }
+       }
         dropg()
         lock(&sched.lock)
         var pp *p
@@ -4772,6 +4891,7 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
         }
         newg.goid = pp.goidcache
         pp.goidcache++
+       newg.trace.reset()
         if trace.ok() {
                 trace.GoCreate(newg, newg.startpc)
                 traceRelease(trace)
@@ -5204,14 +5324,16 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
                 cpuprof.add(tagPtr, stk[:n])
  
                 gprof := gp
+               var mp *m
                 var pp *p
                 if gp != nil && gp.m != nil {
                         if gp.m.curg != nil {
                                 gprof = gp.m.curg
                         }
+                       mp = gp.m
                         pp = gp.m.p.ptr()
                 }
-               traceCPUSample(gprof, pp, stk[:n])
+               traceCPUSample(gprof, mp, pp, stk[:n])
         }
         getg().m.mallocing--
  }
@@ -5580,6 +5702,16 @@ func wirep(pp *p) {
  
  // Disassociate p and the current m.
  func releasep() *p {
+       trace := traceAcquire()
+       if trace.ok() {
+               trace.ProcStop(getg().m.p.ptr())
+               traceRelease(trace)
+       }
+       return releasepNoTrace()
+}
+
+// Disassociate p and the current m without tracing an event.
+func releasepNoTrace() *p {
         gp := getg()
  
         if gp.m.p == 0 {
@@ -5590,11 +5722,6 @@ func releasep() *p {
                 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
                 throw("releasep: invalid p state")
         }
-       trace := traceAcquire()
-       if trace.ok() {
-               trace.ProcStop(gp.m.p.ptr())
-               traceRelease(trace)
-       }
         gp.m.p = 0
         pp.m = 0
         pp.status = _Pidle
@@ -5943,7 +6070,7 @@ func retake(now int64) uint32 {
                                 trace := traceAcquire()
                                 if trace.ok() {
                                         trace.GoSysBlock(pp)
-                                       trace.ProcStop(pp)
+                                       trace.ProcSteal(pp, false)
                                         traceRelease(trace)
                                 }
                                 n++