package runtime
import (
+ "internal/abi"
"internal/goarch"
"internal/goexperiment"
"runtime/internal/atomic"
userG := getg().m.curg
selfScan := gp == userG && readgstatus(userG) == _Grunning
if selfScan {
- casgstatus(userG, _Grunning, _Gwaiting)
- userG.waitreason = waitReasonGarbageCollectionScan
+ casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
}
// TODO: suspendG blocks (and spins) until gp
}
})
}
- if goexperiment.PacerRedesign {
- if workCounter != nil && workDone != 0 {
- workCounter.Add(workDone)
- if flushBgCredit {
- gcFlushBgCredit(workDone)
- }
+ if workCounter != nil && workDone != 0 {
+ workCounter.Add(workDone)
+ if flushBgCredit {
+ gcFlushBgCredit(workDone)
}
}
return workDone
// Mark everything that can be reached from
// the object (but *not* the object itself or
// we'll never collect it).
- scanobject(p, gcw)
+ if !s.spanclass.noscan() {
+ scanobject(p, gcw)
+ }
// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
}
// gcAssistAlloc performs GC work to make gp's assist debt positive.
-// gp must be the calling user gorountine.
+// gp must be the calling user goroutine.
//
// This must be called with preemption enabled.
func gcAssistAlloc(gp *g) {
return
}
- traced := false
+ // This extremely verbose boolean indicates whether we've
+ // entered mark assist from the perspective of the tracer.
+ //
+ // In the old tracer, this is just before we call gcAssistAlloc1
+ // *and* tracing is enabled. Because the old tracer doesn't
+ // do any extra tracking, we need to be careful to not emit an
+ // "end" event if there was no corresponding "begin" for the
+ // mark assist.
+ //
+ // In the new tracer, this is just before we call gcAssistAlloc1
+ // *regardless* of whether tracing is enabled. This is because
+ // the new tracer allows for tracing to begin (and advance
+ // generations) in the middle of a GC mark phase, so we need to
+ // record some state so that the tracer can pick it up to ensure
+ // a consistent trace result.
+ //
+ // TODO(mknyszek): Hide the details of inMarkAssist in tracer
+ // functions and simplify all the state tracking. This is a lot.
+ enteredMarkAssistForTracing := false
retry:
+ if gcCPULimiter.limiting() {
+ // If the CPU limiter is enabled, intentionally don't
+ // assist to reduce the amount of CPU time spent in the GC.
+ if enteredMarkAssistForTracing {
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GCMarkAssistDone()
+ // Set this *after* we trace the end to make sure
+ // that we emit an in-progress event if this is
+ // the first event for the goroutine in the trace
+ // or trace generation. Also, do this between
+ // acquire/release because this is part of the
+ // goroutine's trace state, and it must be atomic
+ // with respect to the tracer.
+ gp.inMarkAssist = false
+ traceRelease(trace)
+ } else {
+ // This state is tracked even if tracing isn't enabled.
+ // It's only used by the new tracer.
+ // See the comment on enteredMarkAssistForTracing.
+ gp.inMarkAssist = false
+ }
+ }
+ return
+ }
// Compute the amount of scan work we need to do to make the
// balance positive. When the required amount of work is low,
// we over-assist to build up credit for future allocations
// will just cause steals to fail until credit is accumulated
// again, so in the long run it doesn't really matter, but we
// do have to handle the negative credit case.
- bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
+ bgScanCredit := gcController.bgScanCredit.Load()
stolen := int64(0)
if bgScanCredit > 0 {
if bgScanCredit < scanWork {
stolen = scanWork
gp.gcAssistBytes += debtBytes
}
- atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
+ gcController.bgScanCredit.Add(-stolen)
scanWork -= stolen
if scanWork == 0 {
// We were able to steal all of the credit we
// needed.
- if traced {
- traceGCMarkAssistDone()
+ if enteredMarkAssistForTracing {
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GCMarkAssistDone()
+ // Set this *after* we trace the end to make sure
+ // that we emit an in-progress event if this is
+ // the first event for the goroutine in the trace
+ // or trace generation. Also, do this between
+ // acquire/release because this is part of the
+ // goroutine's trace state, and it must be atomic
+ // with respect to the tracer.
+ gp.inMarkAssist = false
+ traceRelease(trace)
+ } else {
+ // This state is tracked even if tracing isn't enabled.
+ // It's only used by the new tracer.
+ // See the comment on enteredMarkAssistForTracing.
+ gp.inMarkAssist = false
+ }
}
return
}
}
-
- if trace.enabled && !traced {
- traced = true
- traceGCMarkAssistStart()
+ if !enteredMarkAssistForTracing {
+ trace := traceAcquire()
+ if trace.ok() {
+ if !goexperiment.ExecTracer2 {
+ // In the old tracer, enter mark assist tracing only
+ // if we actually traced an event. Otherwise a goroutine
+ // waking up from mark assist post-GC might end up
+ // writing a stray "end" event.
+ //
+ // This means inMarkAssist will not be meaningful
+ // in the old tracer; that's OK, it's unused.
+ //
+ // See the comment on enteredMarkAssistForTracing.
+ enteredMarkAssistForTracing = true
+ }
+ trace.GCMarkAssistStart()
+ // Set this *after* we trace the start, otherwise we may
+ // emit an in-progress event for an assist we're about to start.
+ gp.inMarkAssist = true
+ traceRelease(trace)
+ } else {
+ gp.inMarkAssist = true
+ }
+ if goexperiment.ExecTracer2 {
+ // In the new tracer, set enter mark assist tracing if we
+ // ever pass this point, because we must manage inMarkAssist
+ // correctly.
+ //
+ // See the comment on enteredMarkAssistForTracing.
+ enteredMarkAssistForTracing = true
+ }
}
// Perform assist work
// At this point either background GC has satisfied
// this G's assist debt, or the GC cycle is over.
}
- if traced {
- traceGCMarkAssistDone()
+ if enteredMarkAssistForTracing {
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GCMarkAssistDone()
+ // Set this *after* we trace the end to make sure
+ // that we emit an in-progress event if this is
+ // the first event for the goroutine in the trace
+ // or trace generation. Also, do this between
+ // acquire/release because this is part of the
+ // goroutine's trace state, and it must be atomic
+ // with respect to the tracer.
+ gp.inMarkAssist = false
+ traceRelease(trace)
+ } else {
+ // This state is tracked even if tracing isn't enabled.
+ // It's only used by the new tracer.
+ // See the comment on enteredMarkAssistForTracing.
+ gp.inMarkAssist = false
+ }
}
}
// The gcBlackenEnabled check in malloc races with the
// store that clears it but an atomic check in every malloc
// would be a performance hit.
- // Instead we recheck it here on the non-preemptable system
+ // Instead we recheck it here on the non-preemptible system
// stack to determine if we should perform an assist.
// GC is done, so ignore any remaining debt.
// Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can
// just measure start and end time.
+ //
+ // Limiter event tracking might be disabled if we end up here
+ // while on a mark worker.
startTime := nanotime()
+ trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
}
// gcDrainN requires the caller to be preemptible.
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = waitReasonGCAssistMarking
+ casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
// drain own cached work first in the hopes that it
// will be more cache friendly.
// a valid pointer).
gp.param = unsafe.Pointer(gp)
}
- duration := nanotime() - startTime
- _p_ := gp.m.p.ptr()
- _p_.gcAssistTime += duration
- if _p_.gcAssistTime > gcAssistTimeSlack {
- atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
- _p_.gcAssistTime = 0
+ now := nanotime()
+ duration := now - startTime
+ pp := gp.m.p.ptr()
+ pp.gcAssistTime += duration
+ if trackLimiterEvent {
+ pp.limiterEvent.stop(limiterEventMarkAssist, now)
+ }
+ if pp.gcAssistTime > gcAssistTimeSlack {
+ gcController.assistTime.Add(pp.gcAssistTime)
+ gcCPULimiter.update(now)
+ pp.gcAssistTime = 0
}
}
// the queue, but can still back out. This avoids a
// race in case background marking has flushed more
// credit since we checked above.
- if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
+ if gcController.bgScanCredit.Load() > 0 {
work.assistQueue.q = oldList
if oldList.tail != 0 {
oldList.tail.ptr().schedlink.set(nil)
return false
}
// Park.
- goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
+ goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceBlockGCMarkAssist, 2)
return true
}
// small window here where an assist may add itself to
// the blocked queue and park. If that happens, we'll
// just get it on the next flush.
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
return
}
// Convert from scan bytes back to work.
assistWorkPerByte := gcController.assistWorkPerByte.Load()
scanWork = int64(float64(scanBytes) * assistWorkPerByte)
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
}
unlock(&work.assistQueue.lock)
}
// scanstack scans gp's stack, greying all pointers found on the stack.
//
-// For goexperiment.PacerRedesign:
// Returns the amount of scan work performed, but doesn't update
// gcController.stackScanWork or flush any credit. Any background credit produced
// by this function should be flushed by its caller. scanstack itself can't
throw("can't scan our own stack")
}
- // stackSize is the amount of work we'll be reporting.
+ // scannedSize is the amount of work we'll be reporting.
//
- // We report the total stack size, more than we scan,
- // because this number needs to line up with gcControllerState's
- // stackScan and scannableStackSize fields.
- //
- // See the documentation on those fields for more information.
- stackSize := gp.stack.hi - gp.stack.lo
+ // It is less than the allocated size (which is hi-lo).
+ var sp uintptr
+ if gp.syscallsp != 0 {
+ sp = gp.syscallsp // If in a system call this is the stack pointer (gp.sched.sp can be 0 in this case on Windows).
+ } else {
+ sp = gp.sched.sp
+ }
+ scannedSize := gp.stack.hi - sp
+
+ // Keep statistics for initial stack size calculation.
+ // Note that this accumulates the scanned size, not the allocated size.
+ p := getg().m.p.ptr()
+ p.scannedStackSize += uint64(scannedSize)
+ p.scannedStacks++
if isShrinkStackSafe(gp) {
// Shrink the stack if not much of it is being used.
}
// Scan the stack. Accumulate a list of stack objects.
- scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
- scanframeworker(frame, &state, gcw)
- return true
+ var u unwinder
+ for u.init(gp, 0); u.valid(); u.next() {
+ scanframeworker(&u.frame, &state, gcw)
}
- gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
// Find additional pointers that point into the stack from the heap.
// Currently this includes defers and panics. See also function copystack.
if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
throw("remaining pointer buffers")
}
- return int64(stackSize)
+ return int64(scannedSize)
}
// Scan a stack frame: local variables and function arguments/results.
+//
//go:nowritebarrier
func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
if _DebugGC > 1 && frame.continpc != 0 {
print("scanframe ", funcname(frame.fn), "\n")
}
- isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
- isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2
+ isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == abi.FuncID_asyncPreempt
+ isDebugCall := frame.fn.valid() && frame.fn.funcID == abi.FuncID_debugCallV2
if state.conservative || isAsyncPreempt || isDebugCall {
if debugScanConservative {
println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
}
// Scan arguments to this frame.
- if frame.arglen != 0 {
+ if n := frame.argBytes(); n != 0 {
// TODO: We could pass the entry argument map
// to narrow this down further.
- scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+ scanConservative(frame.argp, n, nil, gcw, state)
}
if isAsyncPreempt || isDebugCall {
return
}
- locals, args, objs := getStackMap(frame, &state.cache, false)
+ locals, args, objs := frame.getStackMap(false)
// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
gcDrainFractional
)
+// gcDrainMarkWorkerIdle is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerIdle(gcw *gcWork) {
+ gcDrain(gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
+// gcDrainMarkWorkerDedicated is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerDedicated(gcw *gcWork, untilPreempt bool) {
+ flags := gcDrainFlushBgCredit
+ if untilPreempt {
+ flags |= gcDrainUntilPreempt
+ }
+ gcDrain(gcw, flags)
+}
+
+// gcDrainMarkWorkerFractional is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerFractional(gcw *gcWork) {
+ gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
// gcDrain scans roots and objects in work buffers, blackening grey
// objects until it is unable to get more work. It may return before
// GC is done; it's the caller's responsibility to balance work from
// credit to gcController.bgScanCredit every gcCreditSlack units of
// scan work.
//
-// gcDrain will always return if there is a pending STW.
+// gcDrain will always return if there is a pending STW or forEachP.
+//
+// Disabling write barriers is necessary to ensure that after we've
+// confirmed that we've drained gcw, that we don't accidentally end
+// up flipping that condition by immediately adding work in the form
+// of a write barrier buffer flush.
+//
+// Don't set nowritebarrierrec because it's safe for some callees to
+// have write barriers enabled.
//
//go:nowritebarrier
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrain phase incorrect")
}
+ // N.B. We must be running in a non-preemptible context, so it's
+ // safe to hold a reference to our P here.
gp := getg().m.curg
+ pp := gp.m.p.ptr()
preemptible := flags&gcDrainUntilPreempt != 0
flushBgCredit := flags&gcDrainFlushBgCredit != 0
idle := flags&gcDrainIdle != 0
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
- // Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ // Stop if we're preemptible, if someone wants to STW, or if
+ // someone is calling forEachP.
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job >= work.markrootJobs {
break
}
// Drain heap marking jobs.
- // Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ //
+ // Stop if we're preemptible, if someone wants to STW, or if
+ // someone is calling forEachP.
+ //
+ // TODO(mknyszek): Consider always checking gp.preempt instead
+ // of having the preempt flag, and making an exception for certain
+ // mark workers in retake. That might be simpler than trying to
+ // enumerate all the reasons why we might want to preempt, even
+ // if we're supposed to be mostly non-preemptible.
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
// Flush the write barrier
// buffer; this may create
// more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
//go:nowritebarrier
//go:systemstack
func gcDrainN(gcw *gcWork, scanWork int64) int64 {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrainN phase incorrect")
}
// want to claim was done by this call.
workFlushed := -gcw.heapScanWork
+ // In addition to backing out because of a preemption, back out
+ // if the GC CPU limiter is enabled.
gp := getg().m.curg
- for !gp.preempt && workFlushed+gcw.heapScanWork < scanWork {
+ for !gp.preempt && !gcCPULimiter.limiting() && workFlushed+gcw.heapScanWork < scanWork {
// See gcDrain comment.
if work.full == 0 {
gcw.balance()
if b == 0 {
// Flush the write barrier buffer;
// this may create more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
if work.markrootNext < work.markrootJobs {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job < work.markrootJobs {
- work := markroot(gcw, job, false)
- if goexperiment.PacerRedesign {
- workFlushed += work
- }
+ workFlushed += markroot(gcw, job, false)
continue
}
}
// gcw.bytesMarked or gcw.heapScanWork.
//
// If stk != nil, possible stack pointers are also reported to stk.putPtr.
+//
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
// Use local copies of original parameters, so that a stack trace
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
- hbits := heapBitsForAddr(b)
s := spanOfUnchecked(b)
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
+ if s.spanclass.noscan() {
+ // Correctness-wise this is ok, but it's inefficient
+ // if noscan objects reach here.
+ throw("scanobject of a noscan object")
+ }
+ var tp typePointers
if n > maxObletBytes {
// Large object. Break into oblets for better
// parallelism and lower latency.
if b == s.base() {
- // It's possible this is a noscan object (not
- // from greyobject, but from other code
- // paths), in which case we must *not* enqueue
- // oblets since their bitmaps will be
- // uninitialized.
- if s.spanclass.noscan() {
- // Bypass the whole scan.
- gcw.bytesMarked += uint64(n)
- return
- }
-
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
// must be a large object, s.base() is the beginning
// of the object.
n = s.base() + s.elemsize - b
- if n > maxObletBytes {
- n = maxObletBytes
+ n = min(n, maxObletBytes)
+ if goexperiment.AllocHeaders {
+ tp = s.typePointersOfUnchecked(s.base())
+ tp = tp.fastForward(b-tp.addr, b+n)
+ }
+ } else {
+ if goexperiment.AllocHeaders {
+ tp = s.typePointersOfUnchecked(b)
}
}
- var i uintptr
- for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
- // Load bits once. See CL 22712 and issue 16973 for discussion.
- bits := hbits.bits()
- if bits&bitScan == 0 {
- break // no more pointers in this object
- }
- if bits&bitPointer == 0 {
- continue // not a pointer
+ var hbits heapBits
+ if !goexperiment.AllocHeaders {
+ hbits = heapBitsForAddr(b, n)
+ }
+ var scanSize uintptr
+ for {
+ var addr uintptr
+ if goexperiment.AllocHeaders {
+ if tp, addr = tp.nextFast(); addr == 0 {
+ if tp, addr = tp.next(b + n); addr == 0 {
+ break
+ }
+ }
+ } else {
+ if hbits, addr = hbits.nextFast(); addr == 0 {
+ if hbits, addr = hbits.next(); addr == 0 {
+ break
+ }
+ }
}
+ // Keep track of farthest pointer we found, so we can
+ // update heapScanWork. TODO: is there a better metric,
+ // now that we can skip scalar portions pretty efficiently?
+ scanSize = addr - b + goarch.PtrSize
+
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
- obj := *(*uintptr)(unsafe.Pointer(b + i))
+ obj := *(*uintptr)(unsafe.Pointer(addr))
// At this point we have extracted the next potential pointer.
// Quickly filter out nil and pointers back to the current object.
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
- if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
- greyobject(obj, b, i, span, gcw, objIndex)
+ if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
+ greyobject(obj, b, addr-b, span, gcw, objIndex)
}
}
}
gcw.bytesMarked += uint64(n)
- gcw.heapScanWork += int64(i)
+ gcw.heapScanWork += int64(scanSize)
}
// scanConservative scans block [b, b+n) conservatively, treating any
// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
// Preemption must be disabled.
+//
//go:nowritebarrier
func shade(b uintptr) {
if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
//
//go:nowritebarrier
//go:nosplit
-func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size uintptr) {
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
gcw := &getg().m.p.ptr().gcw
gcw.bytesMarked += uint64(size)
- if !goexperiment.PacerRedesign {
- // The old pacer counts newly allocated memory toward
- // heapScanWork because heapScan is continuously updated
- // throughout the GC cycle with newly allocated memory. However,
- // these objects are never actually scanned, so we need
- // to account for them in heapScanWork here, "faking" their work.
- // Otherwise the pacer will think it's always behind, potentially
- // by a large margin.
- //
- // The new pacer doesn't care about this because it ceases to updated
- // heapScan once a GC cycle starts, effectively snapshotting it.
- gcw.heapScanWork += int64(scanSize)
- }
}
// gcMarkTinyAllocs greys all active tiny alloc blocks.