// list can arrive a few different ways, but it will always
// contain the init tasks computed by the linker for all the
// packages in the program (excluding those added at runtime
- // by package plugin).
- for _, m := range activeModules() {
+ // by package plugin). Run through the modules in dependency
+ // order (the order they are initialized by the dynamic
+ // loader, i.e. they are added to the moduledata linked list).
+ for m := &firstmoduledata; m != nil; m = m.next {
doInit(m.inittasks)
}
//go:nosplit
//go:nowritebarrierrec
func badmorestackg0() {
- writeErrStr("fatal: morestack on g0\n")
+ if !crashStackImplemented {
+ writeErrStr("fatal: morestack on g0\n")
+ return
+ }
+
+ g := getg()
+ switchToCrashStack(func() {
+ print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n")
+ g.m.traceback = 2 // include pc and sp in stack trace
+ traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0)
+ print("\n")
+
+ throw("morestack on g0")
+ })
}
//go:nosplit
throw("ctxt != 0")
}
+// gcrash is a fake g that can be used when crashing due to bad
+// stack conditions.
+var gcrash g
+
+var crashingG atomic.Pointer[g]
+
+// Switch to crashstack and call fn, with special handling of
+// concurrent and recursive cases.
+//
+// Nosplit as it is called in a bad stack condition (we know
+// morestack would fail).
+//
+//go:nosplit
+//go:nowritebarrierrec
+func switchToCrashStack(fn func()) {
+ me := getg()
+ if crashingG.CompareAndSwapNoWB(nil, me) {
+ switchToCrashStack0(fn) // should never return
+ abort()
+ }
+ if crashingG.Load() == me {
+ // recursive crashing. too bad.
+ writeErrStr("fatal: recursive switchToCrashStack\n")
+ abort()
+ }
+ // Another g is crashing. Give it some time, hopefully it will finish traceback.
+ usleep_no_g(100)
+ writeErrStr("fatal: concurrent switchToCrashStack\n")
+ abort()
+}
+
+const crashStackImplemented = GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "mips64" || GOARCH == "mips64le" || GOARCH == "riscv64"
+
+//go:noescape
+func switchToCrashStack0(fn func()) // in assembly
+
func lockedOSThread() bool {
gp := getg()
return gp.lockedm != 0 && gp.m.lockedg != 0
parsedebugvars()
gcinit()
+ // Allocate stack space that can be used when crashing due to bad stack
+ // conditions, e.g. morestack on g0.
+ gcrash.stack = stackalloc(16384)
+ gcrash.stackguard0 = gcrash.stack.lo + 1000
+ gcrash.stackguard1 = gcrash.stack.lo + 1000
+
// if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile.
// Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is
// set to true by the linker, it means that nothing is consuming the profile, it is
// Mark gp ready to run.
func ready(gp *g, traceskip int, next bool) {
- if traceEnabled() {
- traceGoUnpark(gp, traceskip)
- }
-
status := readgstatus(gp)
// Mark runnable.
}
// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
+ if trace.ok() {
+ trace.GoUnpark(gp, traceskip)
+ traceRelease(trace)
+ }
runqput(mp.p.ptr(), gp, next)
wakep()
releasem(mp)
// Holding worldsema causes any other goroutines invoking
// stopTheWorld to block.
func stopTheWorldWithSema(reason stwReason) {
- if traceEnabled() {
- traceSTWStart(reason)
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.STWStart(reason)
+ traceRelease(trace)
}
gp := getg()
gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
sched.stopwait--
// try to retake all P's in Psyscall status
+ trace = traceAcquire()
for _, pp := range allp {
s := pp.status
if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) {
- if traceEnabled() {
- traceGoSysBlock(pp)
- traceProcStop(pp)
+ if trace.ok() {
+ trace.GoSysBlock(pp)
+ trace.ProcStop(pp)
}
pp.syscalltick++
sched.stopwait--
}
}
+ if trace.ok() {
+ traceRelease(trace)
+ }
+
// stop idle P's
now := nanotime()
for {
// Capture start-the-world time before doing clean-up tasks.
startTime := nanotime()
- if traceEnabled() {
- traceSTWDone()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.STWDone()
+ traceRelease(trace)
}
// Wakeup an additional proc in case we have excessive runnable goroutines
// but is somewhat arbitrary.
size := gp.stack.hi
if size == 0 {
- size = 8192 * sys.StackGuardMultiplier
+ size = 16384 * sys.StackGuardMultiplier
}
gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
gp.stack.lo = gp.stack.hi - size + 1024
// Force Ps currently in _Psyscall into _Pidle and hand them
// off to induce safe point function execution.
+ trace := traceAcquire()
for _, p2 := range allp {
s := p2.status
if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) {
- if traceEnabled() {
- traceGoSysBlock(p2)
- traceProcStop(p2)
+ if trace.ok() {
+ trace.GoSysBlock(p2)
+ trace.ProcStop(p2)
}
p2.syscalltick++
handoffp(p2)
}
}
+ if trace.ok() {
+ traceRelease(trace)
+ }
// Wait for remaining Ps to run fn.
if wait {
if iscgo || mStackIsSystemAllocated() {
mp.g0 = malg(-1)
} else {
- mp.g0 = malg(8192 * sys.StackGuardMultiplier)
+ mp.g0 = malg(16384 * sys.StackGuardMultiplier)
}
mp.g0.m = mp
osSetupTLS(mp)
// Install g (= m->g0) and set the stack bounds
- // to match the current stack. If we don't actually know
- // how big the stack is, like we don't know how big any
- // scheduling stack is, but we assume there's at least 32 kB.
- // If we can get a more accurate stack bound from pthread,
- // use that.
+ // to match the current stack.
setg(mp.g0)
- gp := getg()
- gp.stack.hi = getcallersp() + 1024
- gp.stack.lo = getcallersp() - 32*1024
- if !signal && _cgo_getstackbound != nil {
- // Don't adjust if called from the signal handler.
- // We are on the signal stack, not the pthread stack.
- // (We could get the stack bounds from sigaltstack, but
- // we're getting out of the signal handler very soon
- // anyway. Not worth it.)
- var bounds [2]uintptr
- asmcgocall(_cgo_getstackbound, unsafe.Pointer(&bounds))
- // getstackbound is an unsupported no-op on Windows.
- if bounds[0] != 0 {
- gp.stack.lo = bounds[0]
- gp.stack.hi = bounds[1]
- }
- }
- gp.stackguard0 = gp.stack.lo + stackGuard
+ sp := getcallersp()
+ callbackUpdateSystemStack(mp, sp, signal)
// Should mark we are already in Go now.
// Otherwise, we may call needm again when we get a signal, before cgocallbackg1,
if raceenabled {
gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum)
}
- if traceEnabled() {
- traceOneNewExtraM(gp)
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.OneNewExtraM(gp)
+ traceRelease(trace)
}
// put on allg for garbage collector
allgadd(gp)
// So that the destructor would invoke dropm while the non-Go thread is exiting.
// This is much faster since it avoids expensive signal-related syscalls.
//
-// NOTE: this always runs without a P, so, nowritebarrierrec required.
+// This always runs without a P, so //go:nowritebarrierrec is required.
+//
+// This may run with a different stack than was recorded in g0 (there is no
+// call to callbackUpdateSystemStack prior to dropm), so this must be
+// //go:nosplit to avoid the stack bounds check.
//
//go:nowritebarrierrec
+//go:nosplit
func dropm() {
// Clear m and g, and return m to the extra list.
// After the call to setg we can only call nosplit functions
setg(nil)
+ // Clear g0 stack bounds to ensure that needm always refreshes the
+ // bounds when reusing this M.
+ g0 := mp.g0
+ g0.stack.hi = 0
+ g0.stack.lo = 0
+ g0.stackguard0 = 0
+ g0.stackguard1 = 0
+
putExtraM(mp)
msigrestore(sigmask)
setThreadCPUProfiler(hz)
}
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
// GoSysExit has to happen when we have a P, but before GoStart.
// So we emit it here.
if gp.syscallsp != 0 {
- traceGoSysExit()
+ trace.GoSysExit()
}
- traceGoStart()
+ trace.GoStart()
+ traceRelease(trace)
}
gogo(&gp.sched)
if traceEnabled() || traceShuttingDown() {
gp := traceReader()
if gp != nil {
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
+ }
return gp, false, true
}
}
gp := list.pop()
injectglist(&list)
netpollAdjustWaiters(delta)
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- if traceEnabled() {
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
}
return gp, false, false
}
if node != nil {
pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
gp := node.gp.ptr()
+
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- if traceEnabled() {
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
}
return gp, false, false
}
// until a callback was triggered.
gp, otherReady := beforeIdle(now, pollUntil)
if gp != nil {
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- if traceEnabled() {
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
}
return gp, false, false
}
// Run the idle worker.
pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- if traceEnabled() {
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
}
return gp, false, false
}
gp := list.pop()
injectglist(&list)
netpollAdjustWaiters(delta)
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
- if traceEnabled() {
- traceGoUnpark(gp, 0)
+ if trace.ok() {
+ trace.GoUnpark(gp, 0)
+ traceRelease(trace)
}
return gp, false, false
}
if glist.empty() {
return
}
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
- traceGoUnpark(gp, 0)
+ trace.GoUnpark(gp, 0)
}
+ traceRelease(trace)
}
// Mark all the goroutines as runnable before we put them
func park_m(gp *g) {
mp := getg().m
- if traceEnabled() {
- traceGoPark(mp.waitTraceBlockReason, mp.waitTraceSkip)
- }
+ trace := traceAcquire()
// N.B. Not using casGToWaiting here because the waitreason is
// set by park_m's caller.
casgstatus(gp, _Grunning, _Gwaiting)
+ if trace.ok() {
+ trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip)
+ traceRelease(trace)
+ }
+
dropg()
if fn := mp.waitunlockf; fn != nil {
mp.waitunlockf = nil
mp.waitlock = nil
if !ok {
- if traceEnabled() {
- traceGoUnpark(gp, 2)
- }
+ trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
+ if trace.ok() {
+ trace.GoUnpark(gp, 2)
+ traceRelease(trace)
+ }
execute(gp, true) // Schedule it back, never returns.
}
}
schedule()
}
-func goschedImpl(gp *g) {
+func goschedImpl(gp *g, preempted bool) {
+ trace := traceAcquire()
status := readgstatus(gp)
if status&^_Gscan != _Grunning {
dumpgstatus(gp)
throw("bad g status")
}
casgstatus(gp, _Grunning, _Grunnable)
+ if trace.ok() {
+ if preempted {
+ trace.GoPreempt()
+ } else {
+ trace.GoSched()
+ }
+ traceRelease(trace)
+ }
+
dropg()
lock(&sched.lock)
globrunqput(gp)
// Gosched continuation on g0.
func gosched_m(gp *g) {
- if traceEnabled() {
- traceGoSched()
- }
- goschedImpl(gp)
+ goschedImpl(gp, false)
}
// goschedguarded is a forbidden-states-avoided version of gosched_m.
func goschedguarded_m(gp *g) {
-
if !canPreemptM(gp.m) {
gogo(&gp.sched) // never return
}
-
- if traceEnabled() {
- traceGoSched()
- }
- goschedImpl(gp)
+ goschedImpl(gp, false)
}
func gopreempt_m(gp *g) {
- if traceEnabled() {
- traceGoPreempt()
- }
- goschedImpl(gp)
+ goschedImpl(gp, true)
}
// preemptPark parks gp and puts it in _Gpreempted.
//
//go:systemstack
func preemptPark(gp *g) {
- if traceEnabled() {
- traceGoPark(traceBlockPreempted, 0)
- }
status := readgstatus(gp)
if status&^_Gscan != _Grunning {
dumpgstatus(gp)
// transitions until we can dropg.
casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
dropg()
+
+ // Be careful about how we trace this next event. The ordering
+ // is subtle.
+ //
+ // The moment we CAS into _Gpreempted, suspendG could CAS to
+ // _Gwaiting, do its work, and ready the goroutine. All of
+ // this could happen before we even get the chance to emit
+ // an event. The end result is that the events could appear
+ // out of order, and the tracer generally assumes the scheduler
+ // takes care of the ordering between GoPark and GoUnpark.
+ //
+ // The answer here is simple: emit the event while we still hold
+ // the _Gscan bit on the goroutine. We still need to traceAcquire
+ // and traceRelease across the CAS because the tracer could be
+ // what's calling suspendG in the first place, and we want the
+ // CAS and event emission to appear atomic to the tracer.
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoPark(traceBlockPreempted, 0)
+ }
casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
+ if trace.ok() {
+ traceRelease(trace)
+ }
schedule()
}
}
func goyield_m(gp *g) {
- if traceEnabled() {
- traceGoPreempt()
- }
+ trace := traceAcquire()
pp := gp.m.p.ptr()
casgstatus(gp, _Grunning, _Grunnable)
+ if trace.ok() {
+ trace.GoPreempt()
+ traceRelease(trace)
+ }
dropg()
runqput(pp, gp, false)
schedule()
if raceenabled {
racegoend()
}
- if traceEnabled() {
- traceGoEnd()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoEnd()
+ traceRelease(trace)
}
mcall(goexit0)
}
//
//go:nosplit
func reentersyscall(pc, sp uintptr) {
+ trace := traceAcquire()
gp := getg()
// Disable preemption because during this function g is in Gsyscall status,
})
}
- if traceEnabled() {
- systemstack(traceGoSysCall)
+ if trace.ok() {
+ systemstack(func() {
+ trace.GoSysCall()
+ traceRelease(trace)
+ })
// systemstack itself clobbers g.sched.{pc,sp} and we might
// need them later when the G is genuinely blocked in a
// syscall
lock(&sched.lock)
if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
- if traceEnabled() {
- traceGoSysBlock(pp)
- traceProcStop(pp)
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoSysBlock(pp)
+ trace.ProcStop(pp)
+ traceRelease(trace)
}
pp.syscalltick++
if sched.stopwait--; sched.stopwait == 0 {
}
func entersyscallblock_handoff() {
- if traceEnabled() {
- traceGoSysCall()
- traceGoSysBlock(getg().m.p.ptr())
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoSysCall()
+ trace.GoSysBlock(getg().m.p.ptr())
+ traceRelease(trace)
}
handoffp(releasep())
}
tryRecordGoroutineProfileWB(gp)
})
}
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
if oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick {
- systemstack(traceGoStart)
+ systemstack(func() {
+ trace.GoStart()
+ })
}
}
// There's a cpu for us, so we can run.
gp.m.p.ptr().syscalltick++
// We need to cas the status and scan before resuming...
casgstatus(gp, _Gsyscall, _Grunning)
+ if trace.ok() {
+ traceRelease(trace)
+ }
// Garbage collector isn't running (since we are),
// so okay to clear syscallsp.
return
}
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
// Wait till traceGoSysBlock event is emitted.
// This ensures consistency of the trace (the goroutine is started after it is blocked).
for oldp != nil && oldp.syscalltick == gp.m.syscalltick {
// So instead we remember the syscall exit time and emit the event
// in execute when we have a P.
gp.trace.sysExitTime = traceClockNow()
+ traceRelease(trace)
}
gp.m.locks--
var ok bool
systemstack(func() {
ok = exitsyscallfast_pidle()
- if ok && traceEnabled() {
- if oldp != nil {
- // Wait till traceGoSysBlock event is emitted.
- // This ensures consistency of the trace (the goroutine is started after it is blocked).
- for oldp.syscalltick == gp.m.syscalltick {
- osyield()
+ if ok {
+ trace := traceAcquire()
+ if trace.ok() {
+ if oldp != nil {
+ // Wait till traceGoSysBlock event is emitted.
+ // This ensures consistency of the trace (the goroutine is started after it is blocked).
+ for oldp.syscalltick == gp.m.syscalltick {
+ osyield()
+ }
}
+ trace.GoSysExit()
+ traceRelease(trace)
}
- traceGoSysExit()
}
})
if ok {
func exitsyscallfast_reacquired() {
gp := getg()
if gp.m.syscalltick != gp.m.p.ptr().syscalltick {
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
// The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed).
// traceGoSysBlock for this syscall was already emitted,
// but here we effectively retake the p from the new syscall running on the same p.
systemstack(func() {
// Denote blocking of the new syscall.
- traceGoSysBlock(gp.m.p.ptr())
+ trace.GoSysBlock(gp.m.p.ptr())
// Denote completion of the current syscall.
- traceGoSysExit()
+ trace.GoSysExit()
+ traceRelease(trace)
})
}
gp.m.p.ptr().syscalltick++
totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame
totalSize = alignUp(totalSize, sys.StackAlign)
sp := newg.stack.hi - totalSize
- spArg := sp
if usesLR {
// caller's LR
*(*uintptr)(unsafe.Pointer(sp)) = 0
prepGoExitFrame(sp)
- spArg += sys.MinFrameSize
+ }
+ if GOARCH == "arm64" {
+ // caller's FP
+ *(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0
}
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
if newg.trackingSeq%gTrackingPeriod == 0 {
newg.tracking = true
}
- casgstatus(newg, _Gdead, _Grunnable)
gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo))
+ // Get a goid and switch to runnable. Make all this atomic to the tracer.
+ trace := traceAcquire()
+ casgstatus(newg, _Gdead, _Grunnable)
if pp.goidcache == pp.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
}
newg.goid = pp.goidcache
pp.goidcache++
+ if trace.ok() {
+ trace.GoCreate(newg, newg.startpc)
+ traceRelease(trace)
+ }
+
+ // Set up race context.
if raceenabled {
newg.racectx = racegostart(callerpc)
newg.raceignore = 0
racereleasemergeg(newg, unsafe.Pointer(&labelSync))
}
}
- if traceEnabled() {
- traceGoCreate(newg, newg.startpc)
- }
releasem(mp)
return newg
if old < 0 || nprocs <= 0 {
throw("procresize: invalid arg")
}
- if traceEnabled() {
- traceGomaxprocs(nprocs)
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.Gomaxprocs(nprocs)
+ traceRelease(trace)
}
// update statistics
// because p.destroy itself has write barriers, so we
// need to do that from a valid P.
if gp.m.p != 0 {
- if traceEnabled() {
+ trace := traceAcquire()
+ if trace.ok() {
// Pretend that we were descheduled
// and then scheduled again to keep
// the trace sane.
- traceGoSched()
- traceProcStop(gp.m.p.ptr())
+ trace.GoSched()
+ trace.ProcStop(gp.m.p.ptr())
+ traceRelease(trace)
}
gp.m.p.ptr().m = 0
}
pp.m = 0
pp.status = _Pidle
acquirep(pp)
- if traceEnabled() {
- traceGoStart()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoStart()
+ traceRelease(trace)
}
}
// from a potentially stale mcache.
pp.mcache.prepareForSweep()
- if traceEnabled() {
- traceProcStart()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.ProcStart()
+ traceRelease(trace)
}
}
print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
throw("releasep: invalid p state")
}
- if traceEnabled() {
- traceProcStop(gp.m.p.ptr())
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.ProcStop(gp.m.p.ptr())
+ traceRelease(trace)
}
gp.m.p = 0
pp.m = 0
// increment nmidle and report deadlock.
incidlelocked(-1)
if atomic.Cas(&pp.status, s, _Pidle) {
- if traceEnabled() {
- traceGoSysBlock(pp)
- traceProcStop(pp)
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GoSysBlock(pp)
+ trace.ProcStop(pp)
+ traceRelease(trace)
}
n++
pp.syscalltick++