From: Michael Anthony Knyszek Date: Thu, 27 Jul 2023 19:04:04 +0000 (+0000) Subject: runtime: refactor runtime->tracer API to appear more like a lock X-Git-Tag: go1.22rc1~360 X-Git-Url: http://www.git.cypherpunks.ru/?a=commitdiff_plain;h=f119abb65dbe42f6cb40db698b54be3668357934;hp=e3585c67576bc1b0b161448b617eb2725e9c9d69;p=gostls13.git runtime: refactor runtime->tracer API to appear more like a lock Currently the execution tracer synchronizes with itself using very heavyweight operations. As a result, it's totally fine for most of the tracer code to look like: if traceEnabled() { traceXXX(...) } However, if we want to make that synchronization more lightweight (as issue #60773 proposes), then this is insufficient. In particular, we need to make sure the tracer can't observe an inconsistency between g atomicstatus and the event that would be emitted for a particular g transition. This means making the g status change appear to happen atomically with the corresponding trace event being written out from the perspective of the tracer. This requires a change in API to something more like a lock. While we're here, we might as well make sure that trace events can *only* be emitted while this lock is held. This change introduces such an API: traceAcquire, which returns a value that can emit events, and traceRelease, which requires the value that was returned by traceAcquire. In practice, this won't be a real lock, it'll be more like a seqlock. For the current tracer, this API is completely overkill and the value returned by traceAcquire basically just checks trace.enabled. But it's necessary for the tracer described in #60773 and we can implement that more cleanly if we do this refactoring now instead of later. For #60773. Change-Id: Ibb9ff5958376339fafc2b5180aef65cf2ba18646 Reviewed-on: https://go-review.googlesource.com/c/go/+/515635 LUCI-TryBot-Result: Go LUCI Auto-Submit: Michael Knyszek Reviewed-by: Michael Pratt --- diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go index 98ab413ff4..5dd83063ff 100644 --- a/src/runtime/debugcall.go +++ b/src/runtime/debugcall.go @@ -166,10 +166,12 @@ func debugCallWrap(dispatch uintptr) { gp.schedlink = 0 // Park the calling goroutine. - if traceEnabled() { - traceGoPark(traceBlockDebugCall, 1) - } + trace := traceAcquire() casGToWaiting(gp, _Grunning, waitReasonDebugCall) + if trace.ok() { + trace.GoPark(traceBlockDebugCall, 1) + traceRelease(trace) + } dropg() // Directly execute the new goroutine. The debug @@ -225,19 +227,23 @@ func debugCallWrap1() { // Switch back to the calling goroutine. At some point // the scheduler will schedule us again and we'll // finish exiting. - if traceEnabled() { - traceGoSched() - } + trace := traceAcquire() casgstatus(gp, _Grunning, _Grunnable) + if trace.ok() { + trace.GoSched() + traceRelease(trace) + } dropg() lock(&sched.lock) globrunqput(gp) unlock(&sched.lock) - if traceEnabled() { - traceGoUnpark(callingG, 0) - } + trace = traceAcquire() casgstatus(callingG, _Gwaiting, _Grunnable) + if trace.ok() { + trace.GoUnpark(callingG, 0) + traceRelease(trace) + } execute(callingG, true) }) } diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index b1dcbd7e7d..e190b56c86 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -84,8 +84,10 @@ func (c *mcentral) cacheSpan() *mspan { deductSweepCredit(spanBytes, 0) traceDone := false - if traceEnabled() { - traceGCSweepStart() + trace := traceAcquire() + if trace.ok() { + trace.GCSweepStart() + traceRelease(trace) } // If we sweep spanBudget spans without finding any free @@ -157,9 +159,11 @@ func (c *mcentral) cacheSpan() *mspan { } sweep.active.end(sl) } - if traceEnabled() { - traceGCSweepDone() + trace = traceAcquire() + if trace.ok() { + trace.GCSweepDone() traceDone = true + traceRelease(trace) } // We failed to get a span from the mcentral so get one from mheap. @@ -170,8 +174,12 @@ func (c *mcentral) cacheSpan() *mspan { // At this point s is a span that should have free slots. havespan: - if traceEnabled() && !traceDone { - traceGCSweepDone() + if !traceDone { + trace := traceAcquire() + if trace.ok() { + trace.GCSweepDone() + traceRelease(trace) + } } n := int(s.nelems) - int(s.allocCount) if n == 0 || s.freeindex == s.nelems || s.allocCount == s.nelems { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 8a4c58888e..30d2f1d385 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -647,8 +647,10 @@ func gcStart(trigger gcTrigger) { // Update it under gcsema to avoid gctrace getting wrong values. work.userForced = trigger.kind == gcTriggerCycle - if traceEnabled() { - traceGCStart() + trace := traceAcquire() + if trace.ok() { + trace.GCStart() + traceRelease(trace) } // Check that all Ps have finished deferred mcache flushes. @@ -989,8 +991,10 @@ func gcMarkTermination() { mp.traceback = 0 casgstatus(curgp, _Gwaiting, _Grunning) - if traceEnabled() { - traceGCDone() + trace := traceAcquire() + if trace.ok() { + trace.GCDone() + traceRelease(trace) } // all done diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 96f88f782f..17412bf723 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -420,7 +420,11 @@ retry: // If the CPU limiter is enabled, intentionally don't // assist to reduce the amount of CPU time spent in the GC. if traced { - traceGCMarkAssistDone() + trace := traceAcquire() + if trace.ok() { + trace.GCMarkAssistDone() + traceRelease(trace) + } } return } @@ -461,15 +465,22 @@ retry: // We were able to steal all of the credit we // needed. if traced { - traceGCMarkAssistDone() + trace := traceAcquire() + if trace.ok() { + trace.GCMarkAssistDone() + traceRelease(trace) + } } return } } - if traceEnabled() && !traced { - traced = true - traceGCMarkAssistStart() + trace := traceAcquire() + if trace.ok() { + traced = true + trace.GCMarkAssistStart() + traceRelease(trace) + } } // Perform assist work @@ -515,7 +526,11 @@ retry: // this G's assist debt, or the GC cycle is over. } if traced { - traceGCMarkAssistDone() + trace := traceAcquire() + if trace.ok() { + trace.GCMarkAssistDone() + traceRelease(trace) + } } } diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 10767df26c..716e3efccc 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -807,9 +807,11 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) { // Run the background mark worker. gp := node.gp.ptr() + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, now } @@ -828,8 +830,10 @@ func (c *gcControllerState) resetLive(bytesMarked uint64) { c.triggered = ^uint64(0) // Reset triggered. // heapLive was updated, so emit a trace event. - if traceEnabled() { - traceHeapAlloc(bytesMarked) + trace := traceAcquire() + if trace.ok() { + trace.HeapAlloc(bytesMarked) + traceRelease(trace) } } @@ -856,10 +860,12 @@ func (c *gcControllerState) markWorkerStop(mode gcMarkWorkerMode, duration int64 func (c *gcControllerState) update(dHeapLive, dHeapScan int64) { if dHeapLive != 0 { + trace := traceAcquire() live := gcController.heapLive.Add(dHeapLive) - if traceEnabled() { + if trace.ok() { // gcController.heapLive changed. - traceHeapAlloc(live) + trace.HeapAlloc(live) + traceRelease(trace) } } if gcBlackenEnabled == 0 { @@ -1428,8 +1434,10 @@ func gcControllerCommit() { // TODO(mknyszek): This isn't really accurate any longer because the heap // goal is computed dynamically. Still useful to snapshot, but not as useful. - if traceEnabled() { - traceHeapGoal() + trace := traceAcquire() + if trace.ok() { + trace.HeapGoal() + traceRelease(trace) } trigger, heapGoal := gcController.trigger() diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 03772d22bf..2d84c0d07c 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -516,8 +516,10 @@ func (sl *sweepLocked) sweep(preserve bool) bool { throw("mspan.sweep: bad span state") } - if traceEnabled() { - traceGCSweepSpan(s.npages * _PageSize) + trace := traceAcquire() + if trace.ok() { + trace.GCSweepSpan(s.npages * _PageSize) + traceRelease(trace) } mheap_.pagesSwept.Add(int64(s.npages)) @@ -889,8 +891,10 @@ func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { return } - if traceEnabled() { - traceGCSweepStart() + trace := traceAcquire() + if trace.ok() { + trace.GCSweepStart() + traceRelease(trace) } // Fix debt if necessary. @@ -929,8 +933,10 @@ retry: } } - if traceEnabled() { - traceGCSweepDone() + trace = traceAcquire() + if trace.ok() { + trace.GCSweepDone() + traceRelease(trace) } } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index ab8f5e34e2..0bbda4aa3b 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -791,8 +791,10 @@ func (h *mheap) reclaim(npage uintptr) { // traceGCSweepStart/Done pair on the P. mp := acquirem() - if traceEnabled() { - traceGCSweepStart() + trace := traceAcquire() + if trace.ok() { + trace.GCSweepStart() + traceRelease(trace) } arenas := h.sweepArenas @@ -839,8 +841,10 @@ func (h *mheap) reclaim(npage uintptr) { unlock(&h.lock) } - if traceEnabled() { - traceGCSweepDone() + trace = traceAcquire() + if trace.ok() { + trace.GCSweepDone() + traceRelease(trace) } releasem(mp) } @@ -911,10 +915,12 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { n -= uintptr(len(inUse) * 8) } sweep.active.end(sl) - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { unlock(&h.lock) // Account for pages scanned but not reclaimed. - traceGCSweepSpan((n0 - nFreed) * pageSize) + trace.GCSweepSpan((n0 - nFreed) * pageSize) + traceRelease(trace) lock(&h.lock) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 7ed3068063..ae2562a5b7 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -946,10 +946,6 @@ func fastrandinit() { // Mark gp ready to run. func ready(gp *g, traceskip int, next bool) { - if traceEnabled() { - traceGoUnpark(gp, traceskip) - } - status := readgstatus(gp) // Mark runnable. @@ -960,7 +956,12 @@ func ready(gp *g, traceskip int, next bool) { } // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) + if trace.ok() { + trace.GoUnpark(gp, traceskip) + traceRelease(trace) + } runqput(mp.p.ptr(), gp, next) wakep() releasem(mp) @@ -1407,8 +1408,10 @@ var gcsema uint32 = 1 // Holding worldsema causes any other goroutines invoking // stopTheWorld to block. func stopTheWorldWithSema(reason stwReason) { - if traceEnabled() { - traceSTWStart(reason) + trace := traceAcquire() + if trace.ok() { + trace.STWStart(reason) + traceRelease(trace) } gp := getg() @@ -1426,17 +1429,22 @@ func stopTheWorldWithSema(reason stwReason) { gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. sched.stopwait-- // try to retake all P's in Psyscall status + trace = traceAcquire() for _, pp := range allp { s := pp.status if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) { - if traceEnabled() { - traceGoSysBlock(pp) - traceProcStop(pp) + if trace.ok() { + trace.GoSysBlock(pp) + trace.ProcStop(pp) } pp.syscalltick++ sched.stopwait-- } } + if trace.ok() { + traceRelease(trace) + } + // stop idle P's now := nanotime() for { @@ -1533,8 +1541,10 @@ func startTheWorldWithSema() int64 { // Capture start-the-world time before doing clean-up tasks. startTime := nanotime() - if traceEnabled() { - traceSTWDone() + trace := traceAcquire() + if trace.ok() { + trace.STWDone() + traceRelease(trace) } // Wakeup an additional proc in case we have excessive runnable goroutines @@ -1853,17 +1863,21 @@ func forEachP(fn func(*p)) { // Force Ps currently in _Psyscall into _Pidle and hand them // off to induce safe point function execution. + trace := traceAcquire() for _, p2 := range allp { s := p2.status if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) { - if traceEnabled() { - traceGoSysBlock(p2) - traceProcStop(p2) + if trace.ok() { + trace.GoSysBlock(p2) + trace.ProcStop(p2) } p2.syscalltick++ handoffp(p2) } } + if trace.ok() { + traceRelease(trace) + } // Wait for remaining Ps to run fn. if wait { @@ -2172,8 +2186,10 @@ func oneNewExtraM() { if raceenabled { gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) } - if traceEnabled() { - traceOneNewExtraM(gp) + trace := traceAcquire() + if trace.ok() { + trace.OneNewExtraM(gp) + traceRelease(trace) } // put on allg for garbage collector allgadd(gp) @@ -2921,13 +2937,15 @@ func execute(gp *g, inheritTime bool) { setThreadCPUProfiler(hz) } - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { // GoSysExit has to happen when we have a P, but before GoStart. // So we emit it here. if gp.syscallsp != 0 { - traceGoSysExit() + trace.GoSysExit() } - traceGoStart() + trace.GoStart() + traceRelease(trace) } gogo(&gp.sched) @@ -2964,8 +2982,12 @@ top: if traceEnabled() || traceShuttingDown() { gp := traceReader() if gp != nil { + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) + } return gp, false, true } } @@ -3028,9 +3050,11 @@ top: gp := list.pop() injectglist(&list) netpollAdjustWaiters(delta) + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, false, false } @@ -3073,9 +3097,12 @@ top: if node != nil { pp.gcMarkWorkerMode = gcMarkWorkerIdleMode gp := node.gp.ptr() + + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, false, false } @@ -3088,9 +3115,11 @@ top: // until a callback was triggered. gp, otherReady := beforeIdle(now, pollUntil) if gp != nil { + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, false, false } @@ -3216,9 +3245,11 @@ top: // Run the idle worker. pp.gcMarkWorkerMode = gcMarkWorkerIdleMode + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, false, false } @@ -3278,9 +3309,11 @@ top: gp := list.pop() injectglist(&list) netpollAdjustWaiters(delta) + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) - if traceEnabled() { - traceGoUnpark(gp, 0) + if trace.ok() { + trace.GoUnpark(gp, 0) + traceRelease(trace) } return gp, false, false } @@ -3548,10 +3581,12 @@ func injectglist(glist *gList) { if glist.empty() { return } - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { - traceGoUnpark(gp, 0) + trace.GoUnpark(gp, 0) } + traceRelease(trace) } // Mark all the goroutines as runnable before we put them @@ -3791,13 +3826,16 @@ func parkunlock_c(gp *g, lock unsafe.Pointer) bool { func park_m(gp *g) { mp := getg().m - if traceEnabled() { - traceGoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) - } + trace := traceAcquire() // N.B. Not using casGToWaiting here because the waitreason is // set by park_m's caller. casgstatus(gp, _Grunning, _Gwaiting) + if trace.ok() { + trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) + traceRelease(trace) + } + dropg() if fn := mp.waitunlockf; fn != nil { @@ -3805,23 +3843,35 @@ func park_m(gp *g) { mp.waitunlockf = nil mp.waitlock = nil if !ok { - if traceEnabled() { - traceGoUnpark(gp, 2) - } + trace := traceAcquire() casgstatus(gp, _Gwaiting, _Grunnable) + if trace.ok() { + trace.GoUnpark(gp, 2) + traceRelease(trace) + } execute(gp, true) // Schedule it back, never returns. } } schedule() } -func goschedImpl(gp *g) { +func goschedImpl(gp *g, preempted bool) { + trace := traceAcquire() status := readgstatus(gp) if status&^_Gscan != _Grunning { dumpgstatus(gp) throw("bad g status") } casgstatus(gp, _Grunning, _Grunnable) + if trace.ok() { + if preempted { + trace.GoPreempt() + } else { + trace.GoSched() + } + traceRelease(trace) + } + dropg() lock(&sched.lock) globrunqput(gp) @@ -3836,39 +3886,25 @@ func goschedImpl(gp *g) { // Gosched continuation on g0. func gosched_m(gp *g) { - if traceEnabled() { - traceGoSched() - } - goschedImpl(gp) + goschedImpl(gp, false) } // goschedguarded is a forbidden-states-avoided version of gosched_m. func goschedguarded_m(gp *g) { - if !canPreemptM(gp.m) { gogo(&gp.sched) // never return } - - if traceEnabled() { - traceGoSched() - } - goschedImpl(gp) + goschedImpl(gp, false) } func gopreempt_m(gp *g) { - if traceEnabled() { - traceGoPreempt() - } - goschedImpl(gp) + goschedImpl(gp, true) } // preemptPark parks gp and puts it in _Gpreempted. // //go:systemstack func preemptPark(gp *g) { - if traceEnabled() { - traceGoPark(traceBlockPreempted, 0) - } status := readgstatus(gp) if status&^_Gscan != _Grunning { dumpgstatus(gp) @@ -3897,7 +3933,30 @@ func preemptPark(gp *g) { // transitions until we can dropg. casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) dropg() + + // Be careful about how we trace this next event. The ordering + // is subtle. + // + // The moment we CAS into _Gpreempted, suspendG could CAS to + // _Gwaiting, do its work, and ready the goroutine. All of + // this could happen before we even get the chance to emit + // an event. The end result is that the events could appear + // out of order, and the tracer generally assumes the scheduler + // takes care of the ordering between GoPark and GoUnpark. + // + // The answer here is simple: emit the event while we still hold + // the _Gscan bit on the goroutine. We still need to traceAcquire + // and traceRelease across the CAS because the tracer could be + // what's calling suspendG in the first place, and we want the + // CAS and event emission to appear atomic to the tracer. + trace := traceAcquire() + if trace.ok() { + trace.GoPark(traceBlockPreempted, 0) + } casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) + if trace.ok() { + traceRelease(trace) + } schedule() } @@ -3910,11 +3969,13 @@ func goyield() { } func goyield_m(gp *g) { - if traceEnabled() { - traceGoPreempt() - } + trace := traceAcquire() pp := gp.m.p.ptr() casgstatus(gp, _Grunning, _Grunnable) + if trace.ok() { + trace.GoPreempt() + traceRelease(trace) + } dropg() runqput(pp, gp, false) schedule() @@ -3925,8 +3986,10 @@ func goexit1() { if raceenabled { racegoend() } - if traceEnabled() { - traceGoEnd() + trace := traceAcquire() + if trace.ok() { + trace.GoEnd() + traceRelease(trace) } mcall(goexit0) } @@ -4065,6 +4128,7 @@ func save(pc, sp uintptr) { // //go:nosplit func reentersyscall(pc, sp uintptr) { + trace := traceAcquire() gp := getg() // Disable preemption because during this function g is in Gsyscall status, @@ -4095,8 +4159,11 @@ func reentersyscall(pc, sp uintptr) { }) } - if traceEnabled() { - systemstack(traceGoSysCall) + if trace.ok() { + systemstack(func() { + trace.GoSysCall() + traceRelease(trace) + }) // systemstack itself clobbers g.sched.{pc,sp} and we might // need them later when the G is genuinely blocked in a // syscall @@ -4153,9 +4220,11 @@ func entersyscall_gcwait() { lock(&sched.lock) if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) { - if traceEnabled() { - traceGoSysBlock(pp) - traceProcStop(pp) + trace := traceAcquire() + if trace.ok() { + trace.GoSysBlock(pp) + trace.ProcStop(pp) + traceRelease(trace) } pp.syscalltick++ if sched.stopwait--; sched.stopwait == 0 { @@ -4209,9 +4278,11 @@ func entersyscallblock() { } func entersyscallblock_handoff() { - if traceEnabled() { - traceGoSysCall() - traceGoSysBlock(getg().m.p.ptr()) + trace := traceAcquire() + if trace.ok() { + trace.GoSysCall() + trace.GoSysBlock(getg().m.p.ptr()) + traceRelease(trace) } handoffp(releasep()) } @@ -4250,15 +4321,21 @@ func exitsyscall() { tryRecordGoroutineProfileWB(gp) }) } - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick { - systemstack(traceGoStart) + systemstack(func() { + trace.GoStart() + }) } } // There's a cpu for us, so we can run. gp.m.p.ptr().syscalltick++ // We need to cas the status and scan before resuming... casgstatus(gp, _Gsyscall, _Grunning) + if trace.ok() { + traceRelease(trace) + } // Garbage collector isn't running (since we are), // so okay to clear syscallsp. @@ -4281,7 +4358,8 @@ func exitsyscall() { return } - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { // Wait till traceGoSysBlock event is emitted. // This ensures consistency of the trace (the goroutine is started after it is blocked). for oldp != nil && oldp.syscalltick == gp.m.syscalltick { @@ -4292,6 +4370,7 @@ func exitsyscall() { // So instead we remember the syscall exit time and emit the event // in execute when we have a P. gp.trace.sysExitTime = traceClockNow() + traceRelease(trace) } gp.m.locks-- @@ -4332,15 +4411,19 @@ func exitsyscallfast(oldp *p) bool { var ok bool systemstack(func() { ok = exitsyscallfast_pidle() - if ok && traceEnabled() { - if oldp != nil { - // Wait till traceGoSysBlock event is emitted. - // This ensures consistency of the trace (the goroutine is started after it is blocked). - for oldp.syscalltick == gp.m.syscalltick { - osyield() + if ok { + trace := traceAcquire() + if trace.ok() { + if oldp != nil { + // Wait till traceGoSysBlock event is emitted. + // This ensures consistency of the trace (the goroutine is started after it is blocked). + for oldp.syscalltick == gp.m.syscalltick { + osyield() + } } + trace.GoSysExit() + traceRelease(trace) } - traceGoSysExit() } }) if ok { @@ -4358,15 +4441,17 @@ func exitsyscallfast(oldp *p) bool { func exitsyscallfast_reacquired() { gp := getg() if gp.m.syscalltick != gp.m.p.ptr().syscalltick { - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed). // traceGoSysBlock for this syscall was already emitted, // but here we effectively retake the p from the new syscall running on the same p. systemstack(func() { // Denote blocking of the new syscall. - traceGoSysBlock(gp.m.p.ptr()) + trace.GoSysBlock(gp.m.p.ptr()) // Denote completion of the current syscall. - traceGoSysExit() + trace.GoSysExit() + traceRelease(trace) }) } gp.m.p.ptr().syscalltick++ @@ -4631,9 +4716,11 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g { if newg.trackingSeq%gTrackingPeriod == 0 { newg.tracking = true } - casgstatus(newg, _Gdead, _Grunnable) gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) + // Get a goid and switch to runnable. Make all this atomic to the tracer. + trace := traceAcquire() + casgstatus(newg, _Gdead, _Grunnable) if pp.goidcache == pp.goidcacheend { // Sched.goidgen is the last allocated id, // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. @@ -4644,6 +4731,12 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g { } newg.goid = pp.goidcache pp.goidcache++ + if trace.ok() { + trace.GoCreate(newg, newg.startpc) + traceRelease(trace) + } + + // Set up race context. if raceenabled { newg.racectx = racegostart(callerpc) newg.raceignore = 0 @@ -4653,9 +4746,6 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g { racereleasemergeg(newg, unsafe.Pointer(&labelSync)) } } - if traceEnabled() { - traceGoCreate(newg, newg.startpc) - } releasem(mp) return newg @@ -5264,8 +5354,10 @@ func procresize(nprocs int32) *p { if old < 0 || nprocs <= 0 { throw("procresize: invalid arg") } - if traceEnabled() { - traceGomaxprocs(nprocs) + trace := traceAcquire() + if trace.ok() { + trace.Gomaxprocs(nprocs) + traceRelease(trace) } // update statistics @@ -5330,12 +5422,14 @@ func procresize(nprocs int32) *p { // because p.destroy itself has write barriers, so we // need to do that from a valid P. if gp.m.p != 0 { - if traceEnabled() { + trace := traceAcquire() + if trace.ok() { // Pretend that we were descheduled // and then scheduled again to keep // the trace sane. - traceGoSched() - traceProcStop(gp.m.p.ptr()) + trace.GoSched() + trace.ProcStop(gp.m.p.ptr()) + traceRelease(trace) } gp.m.p.ptr().m = 0 } @@ -5344,8 +5438,10 @@ func procresize(nprocs int32) *p { pp.m = 0 pp.status = _Pidle acquirep(pp) - if traceEnabled() { - traceGoStart() + trace := traceAcquire() + if trace.ok() { + trace.GoStart() + traceRelease(trace) } } @@ -5409,8 +5505,10 @@ func acquirep(pp *p) { // from a potentially stale mcache. pp.mcache.prepareForSweep() - if traceEnabled() { - traceProcStart() + trace := traceAcquire() + if trace.ok() { + trace.ProcStart() + traceRelease(trace) } } @@ -5451,8 +5549,10 @@ func releasep() *p { print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") throw("releasep: invalid p state") } - if traceEnabled() { - traceProcStop(gp.m.p.ptr()) + trace := traceAcquire() + if trace.ok() { + trace.ProcStop(gp.m.p.ptr()) + traceRelease(trace) } gp.m.p = 0 pp.m = 0 @@ -5799,9 +5899,11 @@ func retake(now int64) uint32 { // increment nmidle and report deadlock. incidlelocked(-1) if atomic.Cas(&pp.status, s, _Pidle) { - if traceEnabled() { - traceGoSysBlock(pp) - traceProcStop(pp) + trace := traceAcquire() + if trace.ok() { + trace.GoSysBlock(pp) + trace.ProcStop(pp) + traceRelease(trace) } n++ pp.syscalltick++ diff --git a/src/runtime/trace.go b/src/runtime/trace.go index cfb1ae7a92..a3bcdee010 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -258,6 +258,8 @@ func traceBufPtrOf(b *traceBuf) traceBufPtr { // traceEnabled returns true if the trace is currently enabled. // +// nosplit because it's called on the syscall path when stack movement is forbidden. +// //go:nosplit func traceEnabled() bool { return trace.enabled @@ -270,6 +272,52 @@ func traceShuttingDown() bool { return trace.shutdown } +// traceLocker represents an M writing trace events. While a traceLocker value +// is valid, the tracer observes all operations on the G/M/P or trace events being +// written as happening atomically. +// +// This doesn't do much for the current tracer, because the current tracer doesn't +// need atomicity around non-trace runtime operations. All the state it needs it +// collects carefully during a STW. +type traceLocker struct { + enabled bool +} + +// traceAcquire prepares this M for writing one or more trace events. +// +// This exists for compatibility with the upcoming new tracer; it doesn't do much +// in the current tracer. +// +// nosplit because it's called on the syscall path when stack movement is forbidden. +// +//go:nosplit +func traceAcquire() traceLocker { + if !traceEnabled() { + return traceLocker{false} + } + return traceLocker{true} +} + +// ok returns true if the traceLocker is valid (i.e. tracing is enabled). +// +// nosplit because it's called on the syscall path when stack movement is forbidden. +// +//go:nosplit +func (tl traceLocker) ok() bool { + return tl.enabled +} + +// traceRelease indicates that this M is done writing trace events. +// +// This exists for compatibility with the upcoming new tracer; it doesn't do anything +// in the current tracer. +// +// nosplit because it's called on the syscall path when stack movement is forbidden. +// +//go:nosplit +func traceRelease(tl traceLocker) { +} + // StartTrace enables tracing for the current process. // While tracing, the data will be buffered and available via ReadTrace. // StartTrace returns an error if tracing is already enabled. @@ -367,8 +415,10 @@ func StartTrace() error { gp.trace.tracedSyscallEnter = false } }) - traceProcStart() - traceGoStart() + // Use a dummy traceLocker. The trace isn't enabled yet, but we can still write events. + tl := traceLocker{} + tl.ProcStart() + tl.GoStart() // Note: startTicks needs to be set after we emit traceEvGoInSyscall events. // If we do it the other way around, it is possible that exitsyscall will // query sysExitTime after startTicks but before traceEvGoInSyscall timestamp. @@ -401,7 +451,10 @@ func StartTrace() error { unlock(&sched.sysmonlock) // Record the current state of HeapGoal to avoid information loss in trace. - traceHeapGoal() + // + // Use the same dummy trace locker. The trace can't end until after we start + // the world, and we can safely trace from here. + tl.HeapGoal() startTheWorldGC() return nil @@ -427,7 +480,10 @@ func StopTrace() { return } - traceGoSched() + // Trace GoSched for us, and use a dummy locker. The world is stopped + // and we control whether the trace is enabled, so this is safe. + tl := traceLocker{} + tl.GoSched() atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), nil) trace.cpuLogRead.close() @@ -847,7 +903,7 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by // profiling buffer. It is called from a signal handler, so is limited in what // it can do. func traceCPUSample(gp *g, pp *p, stk []uintptr) { - if !trace.enabled { + if !traceEnabled() { // Tracing is usually turned off; don't spend time acquiring the signal // lock unless it's active. return @@ -1475,15 +1531,15 @@ func (a *traceAlloc) drop() { // The following functions write specific events to trace. -func traceGomaxprocs(procs int32) { +func (_ traceLocker) Gomaxprocs(procs int32) { traceEvent(traceEvGomaxprocs, 1, uint64(procs)) } -func traceProcStart() { +func (_ traceLocker) ProcStart() { traceEvent(traceEvProcStart, -1, uint64(getg().m.id)) } -func traceProcStop(pp *p) { +func (_ traceLocker) ProcStop(pp *p) { // Sysmon and stopTheWorld can stop Ps blocked in syscalls, // to handle this we temporary employ the P. mp := acquirem() @@ -1494,16 +1550,16 @@ func traceProcStop(pp *p) { releasem(mp) } -func traceGCStart() { +func (_ traceLocker) GCStart() { traceEvent(traceEvGCStart, 3, trace.seqGC) trace.seqGC++ } -func traceGCDone() { +func (_ traceLocker) GCDone() { traceEvent(traceEvGCDone, -1) } -func traceSTWStart(reason stwReason) { +func (_ traceLocker) STWStart(reason stwReason) { // Don't trace if this STW is for trace start/stop, since traceEnabled // switches during a STW. if reason == stwStartTrace || reason == stwStopTrace { @@ -1513,7 +1569,7 @@ func traceSTWStart(reason stwReason) { traceEvent(traceEvSTWStart, -1, uint64(reason)) } -func traceSTWDone() { +func (_ traceLocker) STWDone() { mp := getg().m if !mp.trace.tracedSTWStart { return @@ -1527,7 +1583,7 @@ func traceSTWDone() { // // traceGCSweepStart must be paired with traceGCSweepDone and there // must be no preemption points between these two calls. -func traceGCSweepStart() { +func (_ traceLocker) GCSweepStart() { // Delay the actual GCSweepStart event until the first span // sweep. If we don't sweep anything, don't emit any events. pp := getg().m.p.ptr() @@ -1541,7 +1597,7 @@ func traceGCSweepStart() { // // This may be called outside a traceGCSweepStart/traceGCSweepDone // pair; however, it will not emit any trace events in this case. -func traceGCSweepSpan(bytesSwept uintptr) { +func (_ traceLocker) GCSweepSpan(bytesSwept uintptr) { pp := getg().m.p.ptr() if pp.trace.inSweep { if pp.trace.swept == 0 { @@ -1551,7 +1607,7 @@ func traceGCSweepSpan(bytesSwept uintptr) { } } -func traceGCSweepDone() { +func (_ traceLocker) GCSweepDone() { pp := getg().m.p.ptr() if !pp.trace.inSweep { throw("missing traceGCSweepStart") @@ -1562,15 +1618,15 @@ func traceGCSweepDone() { pp.trace.inSweep = false } -func traceGCMarkAssistStart() { +func (_ traceLocker) GCMarkAssistStart() { traceEvent(traceEvGCMarkAssistStart, 1) } -func traceGCMarkAssistDone() { +func (_ traceLocker) GCMarkAssistDone() { traceEvent(traceEvGCMarkAssistDone, -1) } -func traceGoCreate(newg *g, pc uintptr) { +func (_ traceLocker) GoCreate(newg *g, pc uintptr) { newg.trace.seq = 0 newg.trace.lastP = getg().m.p // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. @@ -1578,7 +1634,7 @@ func traceGoCreate(newg *g, pc uintptr) { traceEvent(traceEvGoCreate, 2, newg.goid, uint64(id)) } -func traceGoStart() { +func (_ traceLocker) GoStart() { gp := getg().m.curg pp := gp.m.p gp.trace.seq++ @@ -1592,29 +1648,29 @@ func traceGoStart() { } } -func traceGoEnd() { +func (_ traceLocker) GoEnd() { traceEvent(traceEvGoEnd, -1) } -func traceGoSched() { +func (_ traceLocker) GoSched() { gp := getg() gp.trace.lastP = gp.m.p traceEvent(traceEvGoSched, 1) } -func traceGoPreempt() { +func (_ traceLocker) GoPreempt() { gp := getg() gp.trace.lastP = gp.m.p traceEvent(traceEvGoPreempt, 1) } -func traceGoPark(reason traceBlockReason, skip int) { +func (_ traceLocker) GoPark(reason traceBlockReason, skip int) { // Convert the block reason directly to a trace event type. // See traceBlockReason for more information. traceEvent(byte(reason), skip) } -func traceGoUnpark(gp *g, skip int) { +func (_ traceLocker) GoUnpark(gp *g, skip int) { pp := getg().m.p gp.trace.seq++ if gp.trace.lastP == pp { @@ -1625,7 +1681,7 @@ func traceGoUnpark(gp *g, skip int) { } } -func traceGoSysCall() { +func (_ traceLocker) GoSysCall() { var skip int switch { case tracefpunwindoff(): @@ -1646,7 +1702,7 @@ func traceGoSysCall() { traceEvent(traceEvGoSysCall, skip) } -func traceGoSysExit() { +func (_ traceLocker) GoSysExit() { gp := getg().m.curg if !gp.trace.tracedSyscallEnter { // There was no syscall entry traced for us at all, so there's definitely @@ -1673,7 +1729,7 @@ func traceGoSysExit() { traceEvent(traceEvGoSysExit, -1, gp.goid, gp.trace.seq, uint64(ts)) } -func traceGoSysBlock(pp *p) { +func (_ traceLocker) GoSysBlock(pp *p) { // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked, // to handle this we temporary employ the P. mp := acquirem() @@ -1684,11 +1740,11 @@ func traceGoSysBlock(pp *p) { releasem(mp) } -func traceHeapAlloc(live uint64) { +func (_ traceLocker) HeapAlloc(live uint64) { traceEvent(traceEvHeapAlloc, -1, live) } -func traceHeapGoal() { +func (_ traceLocker) HeapGoal() { heapGoal := gcController.heapGoal() if heapGoal == ^uint64(0) { // Heap-based triggering is disabled. @@ -1789,15 +1845,15 @@ func startPCforTrace(pc uintptr) uintptr { return f.datap.textAddr(*(*uint32)(w)) } -// traceOneNewExtraM registers the fact that a new extra M was created with +// OneNewExtraM registers the fact that a new extra M was created with // the tracer. This matters if the M (which has an attached G) is used while // the trace is still active because if it is, we need the fact that it exists // to show up in the final trace. -func traceOneNewExtraM(gp *g) { +func (tl traceLocker) OneNewExtraM(gp *g) { // Trigger two trace events for the locked g in the extra m, // since the next event of the g will be traceEvGoSysExit in exitsyscall, // while calling from C thread to Go. - traceGoCreate(gp, 0) // no start pc + tl.GoCreate(gp, 0) // no start pc gp.trace.seq++ traceEvent(traceEvGoInSyscall, -1, gp.goid) }