package runtime
import (
+ "internal/abi"
"internal/goarch"
"runtime/internal/atomic"
"runtime/internal/sys"
userG := getg().m.curg
selfScan := gp == userG && readgstatus(userG) == _Grunning
if selfScan {
- casgstatus(userG, _Grunning, _Gwaiting)
- userG.waitreason = waitReasonGarbageCollectionScan
+ casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
}
// TODO: suspendG blocks (and spins) until gp
// Mark everything that can be reached from
// the object (but *not* the object itself or
// we'll never collect it).
- scanobject(p, gcw)
+ if !s.spanclass.noscan() {
+ scanobject(p, gcw)
+ }
// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
traced := false
retry:
- if go119MemoryLimitSupport && gcCPULimiter.limiting() {
+ if gcCPULimiter.limiting() {
// If the CPU limiter is enabled, intentionally don't
// assist to reduce the amount of CPU time spent in the GC.
if traced {
// will just cause steals to fail until credit is accumulated
// again, so in the long run it doesn't really matter, but we
// do have to handle the negative credit case.
- bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
+ bgScanCredit := gcController.bgScanCredit.Load()
stolen := int64(0)
if bgScanCredit > 0 {
if bgScanCredit < scanWork {
stolen = scanWork
gp.gcAssistBytes += debtBytes
}
- atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
+ gcController.bgScanCredit.Add(-stolen)
scanWork -= stolen
}
}
- if trace.enabled && !traced {
+ if traceEnabled() && !traced {
traced = true
traceGCMarkAssistStart()
}
// The gcBlackenEnabled check in malloc races with the
// store that clears it but an atomic check in every malloc
// would be a performance hit.
- // Instead we recheck it here on the non-preemptable system
+ // Instead we recheck it here on the non-preemptible system
// stack to determine if we should perform an assist.
// GC is done, so ignore any remaining debt.
// Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can
// just measure start and end time.
+ //
+ // Limiter event tracking might be disabled if we end up here
+ // while on a mark worker.
startTime := nanotime()
+ trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
}
// gcDrainN requires the caller to be preemptible.
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = waitReasonGCAssistMarking
+ casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
// drain own cached work first in the hopes that it
// will be more cache friendly.
}
now := nanotime()
duration := now - startTime
- _p_ := gp.m.p.ptr()
- _p_.gcAssistTime += duration
- if _p_.gcAssistTime > gcAssistTimeSlack {
- assistTime := gcController.assistTime.Add(_p_.gcAssistTime)
- _p_.gcAssistTime = 0
- gcCPULimiter.update(assistTime+mheap_.pages.scav.assistTime.Load(), now)
+ pp := gp.m.p.ptr()
+ pp.gcAssistTime += duration
+ if trackLimiterEvent {
+ pp.limiterEvent.stop(limiterEventMarkAssist, now)
+ }
+ if pp.gcAssistTime > gcAssistTimeSlack {
+ gcController.assistTime.Add(pp.gcAssistTime)
+ gcCPULimiter.update(now)
+ pp.gcAssistTime = 0
}
}
// the queue, but can still back out. This avoids a
// race in case background marking has flushed more
// credit since we checked above.
- if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
+ if gcController.bgScanCredit.Load() > 0 {
work.assistQueue.q = oldList
if oldList.tail != 0 {
oldList.tail.ptr().schedlink.set(nil)
return false
}
// Park.
- goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
+ goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceBlockGCMarkAssist, 2)
return true
}
// small window here where an assist may add itself to
// the blocked queue and park. If that happens, we'll
// just get it on the next flush.
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
return
}
// Convert from scan bytes back to work.
assistWorkPerByte := gcController.assistWorkPerByte.Load()
scanWork = int64(float64(scanBytes) * assistWorkPerByte)
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
}
unlock(&work.assistQueue.lock)
}
}
// Scan the stack. Accumulate a list of stack objects.
- scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
- scanframeworker(frame, &state, gcw)
- return true
+ var u unwinder
+ for u.init(gp, 0); u.valid(); u.next() {
+ scanframeworker(&u.frame, &state, gcw)
}
- gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
// Find additional pointers that point into the stack from the heap.
// Currently this includes defers and panics. See also function copystack.
print("scanframe ", funcname(frame.fn), "\n")
}
- isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
- isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2
+ isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == abi.FuncID_asyncPreempt
+ isDebugCall := frame.fn.valid() && frame.fn.funcID == abi.FuncID_debugCallV2
if state.conservative || isAsyncPreempt || isDebugCall {
if debugScanConservative {
println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
}
// Scan arguments to this frame.
- if frame.arglen != 0 {
+ if n := frame.argBytes(); n != 0 {
// TODO: We could pass the entry argument map
// to narrow this down further.
- scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+ scanConservative(frame.argp, n, nil, gcw, state)
}
if isAsyncPreempt || isDebugCall {
return
}
- locals, args, objs := getStackMap(frame, &state.cache, false)
+ locals, args, objs := frame.getStackMap(false)
// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
gcDrainFractional
)
+// gcDrainMarkWorkerIdle is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerIdle(gcw *gcWork) {
+ gcDrain(gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
+// gcDrainMarkWorkerDedicated is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerDedicated(gcw *gcWork, untilPreempt bool) {
+ flags := gcDrainFlushBgCredit
+ if untilPreempt {
+ flags |= gcDrainUntilPreempt
+ }
+ gcDrain(gcw, flags)
+}
+
+// gcDrainMarkWorkerFractional is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerFractional(gcw *gcWork) {
+ gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
// gcDrain scans roots and objects in work buffers, blackening grey
// objects until it is unable to get more work. It may return before
// GC is done; it's the caller's responsibility to balance work from
//
// gcDrain will always return if there is a pending STW.
//
+// Disabling write barriers is necessary to ensure that after we've
+// confirmed that we've drained gcw, that we don't accidentally end
+// up flipping that condition by immediately adding work in the form
+// of a write barrier buffer flush.
+//
+// Don't set nowritebarrierrec because it's safe for some callees to
+// have write barriers enabled.
+//
//go:nowritebarrier
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrain phase incorrect")
}
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job >= work.markrootJobs {
break
// Drain heap marking jobs.
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
// Flush the write barrier
// buffer; this may create
// more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
//go:nowritebarrier
//go:systemstack
func gcDrainN(gcw *gcWork, scanWork int64) int64 {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrainN phase incorrect")
}
// want to claim was done by this call.
workFlushed := -gcw.heapScanWork
+ // In addition to backing out because of a preemption, back out
+ // if the GC CPU limiter is enabled.
gp := getg().m.curg
- for !gp.preempt && workFlushed+gcw.heapScanWork < scanWork {
+ for !gp.preempt && !gcCPULimiter.limiting() && workFlushed+gcw.heapScanWork < scanWork {
// See gcDrain comment.
if work.full == 0 {
gcw.balance()
if b == 0 {
// Flush the write barrier buffer;
// this may create more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
- hbits := heapBitsForAddr(b)
s := spanOfUnchecked(b)
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
+ if s.spanclass.noscan() {
+ // Correctness-wise this is ok, but it's inefficient
+ // if noscan objects reach here.
+ throw("scanobject of a noscan object")
+ }
if n > maxObletBytes {
// Large object. Break into oblets for better
// parallelism and lower latency.
if b == s.base() {
- // It's possible this is a noscan object (not
- // from greyobject, but from other code
- // paths), in which case we must *not* enqueue
- // oblets since their bitmaps will be
- // uninitialized.
- if s.spanclass.noscan() {
- // Bypass the whole scan.
- gcw.bytesMarked += uint64(n)
- return
- }
-
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
// must be a large object, s.base() is the beginning
// of the object.
n = s.base() + s.elemsize - b
- if n > maxObletBytes {
- n = maxObletBytes
- }
+ n = min(n, maxObletBytes)
}
- var i uintptr
- for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
- // Load bits once. See CL 22712 and issue 16973 for discussion.
- bits := hbits.bits()
- if bits&bitScan == 0 {
- break // no more pointers in this object
- }
- if bits&bitPointer == 0 {
- continue // not a pointer
+ hbits := heapBitsForAddr(b, n)
+ var scanSize uintptr
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.nextFast(); addr == 0 {
+ if hbits, addr = hbits.next(); addr == 0 {
+ break
+ }
}
+ // Keep track of farthest pointer we found, so we can
+ // update heapScanWork. TODO: is there a better metric,
+ // now that we can skip scalar portions pretty efficiently?
+ scanSize = addr - b + goarch.PtrSize
+
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
- obj := *(*uintptr)(unsafe.Pointer(b + i))
+ obj := *(*uintptr)(unsafe.Pointer(addr))
// At this point we have extracted the next potential pointer.
// Quickly filter out nil and pointers back to the current object.
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
- if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
- greyobject(obj, b, i, span, gcw, objIndex)
+ if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
+ greyobject(obj, b, addr-b, span, gcw, objIndex)
}
}
}
gcw.bytesMarked += uint64(n)
- gcw.heapScanWork += int64(i)
+ gcw.heapScanWork += int64(scanSize)
}
// scanConservative scans block [b, b+n) conservatively, treating any
//
//go:nowritebarrier
//go:nosplit
-func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size uintptr) {
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}