package runtime
import (
+ "internal/abi"
"internal/goarch"
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
// ignore them because they begin life without any roots, so
// there's nothing to scan, and any roots they create during
// the concurrent phase will be caught by the write barrier.
- work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+ work.stackRoots = allGsSnapshot()
+ work.nStackRoots = len(work.stackRoots)
work.markrootNext = 0
work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
//
// Preemption must be disabled (because this uses a gcWork).
//
+// Returns the amount of GC work credit produced by the operation.
+// If flushBgCredit is true, then that credit is also flushed
+// to the background credit pool.
+//
// nowritebarrier is only advisory here.
//
//go:nowritebarrier
-func markroot(gcw *gcWork, i uint32) {
+func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
// Note: if you add a case here, please also update heapdump.go:dumproots.
+ var workDone int64
+ var workCounter *atomic.Int64
switch {
case work.baseData <= i && i < work.baseBSS:
+ workCounter = &gcController.globalsScanWork
for _, datap := range activeModules() {
- markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
+ workDone += markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
}
case work.baseBSS <= i && i < work.baseSpans:
+ workCounter = &gcController.globalsScanWork
for _, datap := range activeModules() {
- markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
+ workDone += markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
}
case i == fixedRootFinalizers:
default:
// the rest is scanning goroutine stacks
- var gp *g
- if work.baseStacks <= i && i < work.baseEnd {
- // N.B. Atomic read of allglen in gcMarkRootPrepare
- // acts as a barrier to ensure that allgs must be large
- // enough to contain all relevant Gs.
- gp = allgs[i-work.baseStacks]
- } else {
+ workCounter = &gcController.stackScanWork
+ if i < work.baseStacks || work.baseEnd <= i {
+ printlock()
+ print("runtime: markroot index ", i, " not in stack roots range [", work.baseStacks, ", ", work.baseEnd, ")\n")
throw("markroot: bad index")
}
+ gp := work.stackRoots[i-work.baseStacks]
// remember when we've first observed the G blocked
// needed only to output in traceback
userG := getg().m.curg
selfScan := gp == userG && readgstatus(userG) == _Grunning
if selfScan {
- casgstatus(userG, _Grunning, _Gwaiting)
- userG.waitreason = waitReasonGarbageCollectionScan
+ casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
}
// TODO: suspendG blocks (and spins) until gp
if gp.gcscandone {
throw("g already scanned")
}
- scanstack(gp, gcw)
+ workDone += scanstack(gp, gcw)
gp.gcscandone = true
resumeG(stopped)
}
})
}
+ if workCounter != nil && workDone != 0 {
+ workCounter.Add(workDone)
+ if flushBgCredit {
+ gcFlushBgCredit(workDone)
+ }
+ }
+ return workDone
}
// markrootBlock scans the shard'th shard of the block of memory [b0,
// b0+n0), with the given pointer mask.
//
+// Returns the amount of work done.
+//
//go:nowritebarrier
-func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
+func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) int64 {
if rootBlockBytes%(8*goarch.PtrSize) != 0 {
// This is necessary to pick byte offsets in ptrmask0.
throw("rootBlockBytes must be a multiple of 8*ptrSize")
// These tests are written to avoid any possible overflow.
off := uintptr(shard) * rootBlockBytes
if off >= n0 {
- return
+ return 0
}
b := b0 + off
ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*goarch.PtrSize))))
// Scan this shard.
scanblock(b, n, ptrmask, gcw, nil)
+ return int64(n)
}
// markrootFreeGStacks frees stacks of dead Gs.
// Mark everything that can be reached from
// the object (but *not* the object itself or
// we'll never collect it).
- scanobject(p, gcw)
+ if !s.spanclass.noscan() {
+ scanobject(p, gcw)
+ }
// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
}
// gcAssistAlloc performs GC work to make gp's assist debt positive.
-// gp must be the calling user gorountine.
+// gp must be the calling user goroutine.
//
// This must be called with preemption enabled.
func gcAssistAlloc(gp *g) {
traced := false
retry:
+ if gcCPULimiter.limiting() {
+ // If the CPU limiter is enabled, intentionally don't
+ // assist to reduce the amount of CPU time spent in the GC.
+ if traced {
+ traceGCMarkAssistDone()
+ }
+ return
+ }
// Compute the amount of scan work we need to do to make the
// balance positive. When the required amount of work is low,
// we over-assist to build up credit for future allocations
// and amortize the cost of assisting.
- assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
- assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+ assistWorkPerByte := gcController.assistWorkPerByte.Load()
+ assistBytesPerWork := gcController.assistBytesPerWork.Load()
debtBytes := -gp.gcAssistBytes
scanWork := int64(assistWorkPerByte * float64(debtBytes))
if scanWork < gcOverAssistWork {
// will just cause steals to fail until credit is accumulated
// again, so in the long run it doesn't really matter, but we
// do have to handle the negative credit case.
- bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
+ bgScanCredit := gcController.bgScanCredit.Load()
stolen := int64(0)
if bgScanCredit > 0 {
if bgScanCredit < scanWork {
stolen = scanWork
gp.gcAssistBytes += debtBytes
}
- atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
+ gcController.bgScanCredit.Add(-stolen)
scanWork -= stolen
}
}
- if trace.enabled && !traced {
+ if traceEnabled() && !traced {
traced = true
traceGCMarkAssistStart()
}
// The gcBlackenEnabled check in malloc races with the
// store that clears it but an atomic check in every malloc
// would be a performance hit.
- // Instead we recheck it here on the non-preemptable system
+ // Instead we recheck it here on the non-preemptible system
// stack to determine if we should perform an assist.
// GC is done, so ignore any remaining debt.
// Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can
// just measure start and end time.
+ //
+ // Limiter event tracking might be disabled if we end up here
+ // while on a mark worker.
startTime := nanotime()
+ trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
}
// gcDrainN requires the caller to be preemptible.
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = waitReasonGCAssistMarking
+ casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
// drain own cached work first in the hopes that it
// will be more cache friendly.
// this scan work counts for. The "1+" is a poor man's
// round-up, to ensure this adds credit even if
// assistBytesPerWork is very low.
- assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+ assistBytesPerWork := gcController.assistBytesPerWork.Load()
gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone))
// If this is the last worker and we ran out of work,
// a valid pointer).
gp.param = unsafe.Pointer(gp)
}
- duration := nanotime() - startTime
- _p_ := gp.m.p.ptr()
- _p_.gcAssistTime += duration
- if _p_.gcAssistTime > gcAssistTimeSlack {
- atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
- _p_.gcAssistTime = 0
+ now := nanotime()
+ duration := now - startTime
+ pp := gp.m.p.ptr()
+ pp.gcAssistTime += duration
+ if trackLimiterEvent {
+ pp.limiterEvent.stop(limiterEventMarkAssist, now)
+ }
+ if pp.gcAssistTime > gcAssistTimeSlack {
+ gcController.assistTime.Add(pp.gcAssistTime)
+ gcCPULimiter.update(now)
+ pp.gcAssistTime = 0
}
}
//
// gcParkAssist reports whether the assist is now satisfied. If it
// returns false, the caller must retry the assist.
-//
-//go:nowritebarrier
func gcParkAssist() bool {
lock(&work.assistQueue.lock)
// If the GC cycle finished while we were getting the lock,
// the queue, but can still back out. This avoids a
// race in case background marking has flushed more
// credit since we checked above.
- if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
+ if gcController.bgScanCredit.Load() > 0 {
work.assistQueue.q = oldList
if oldList.tail != 0 {
oldList.tail.ptr().schedlink.set(nil)
return false
}
// Park.
- goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
+ goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceBlockGCMarkAssist, 2)
return true
}
// small window here where an assist may add itself to
// the blocked queue and park. If that happens, we'll
// just get it on the next flush.
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
return
}
- assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+ assistBytesPerWork := gcController.assistBytesPerWork.Load()
scanBytes := int64(float64(scanWork) * assistBytesPerWork)
lock(&work.assistQueue.lock)
if scanBytes > 0 {
// Convert from scan bytes back to work.
- assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+ assistWorkPerByte := gcController.assistWorkPerByte.Load()
scanWork = int64(float64(scanBytes) * assistWorkPerByte)
- atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+ gcController.bgScanCredit.Add(scanWork)
}
unlock(&work.assistQueue.lock)
}
// scanstack scans gp's stack, greying all pointers found on the stack.
//
+// Returns the amount of scan work performed, but doesn't update
+// gcController.stackScanWork or flush any credit. Any background credit produced
+// by this function should be flushed by its caller. scanstack itself can't
+// safely flush because it may result in trying to wake up a goroutine that
+// was just scanned, resulting in a self-deadlock.
+//
// scanstack will also shrink the stack if it is safe to do so. If it
// is not, it schedules a stack shrink for the next synchronous safe
// point.
//
//go:nowritebarrier
//go:systemstack
-func scanstack(gp *g, gcw *gcWork) {
+func scanstack(gp *g, gcw *gcWork) int64 {
if readgstatus(gp)&_Gscan == 0 {
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
throw("scanstack - bad status")
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
throw("mark - bad status")
case _Gdead:
- return
+ return 0
case _Grunning:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
throw("scanstack: goroutine not stopped")
throw("can't scan our own stack")
}
+ // scannedSize is the amount of work we'll be reporting.
+ //
+ // It is less than the allocated size (which is hi-lo).
+ var sp uintptr
+ if gp.syscallsp != 0 {
+ sp = gp.syscallsp // If in a system call this is the stack pointer (gp.sched.sp can be 0 in this case on Windows).
+ } else {
+ sp = gp.sched.sp
+ }
+ scannedSize := gp.stack.hi - sp
+
+ // Keep statistics for initial stack size calculation.
+ // Note that this accumulates the scanned size, not the allocated size.
+ p := getg().m.p.ptr()
+ p.scannedStackSize += uint64(scannedSize)
+ p.scannedStacks++
+
if isShrinkStackSafe(gp) {
// Shrink the stack if not much of it is being used.
shrinkstack(gp)
}
// Scan the stack. Accumulate a list of stack objects.
- scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
- scanframeworker(frame, &state, gcw)
- return true
+ var u unwinder
+ for u.init(gp, 0); u.valid(); u.next() {
+ scanframeworker(&u.frame, &state, gcw)
}
- gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
// Find additional pointers that point into the stack from the heap.
// Currently this includes defers and panics. See also function copystack.
println()
printunlock()
}
- gcdata := r.gcdata
+ gcdata := r.gcdata()
var s *mspan
if r.useGCProg() {
// This path is pretty unlikely, an object large enough
if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
throw("remaining pointer buffers")
}
+ return int64(scannedSize)
}
// Scan a stack frame: local variables and function arguments/results.
+//
//go:nowritebarrier
func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
if _DebugGC > 1 && frame.continpc != 0 {
print("scanframe ", funcname(frame.fn), "\n")
}
- isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
- isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2
+ isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == abi.FuncID_asyncPreempt
+ isDebugCall := frame.fn.valid() && frame.fn.funcID == abi.FuncID_debugCallV2
if state.conservative || isAsyncPreempt || isDebugCall {
if debugScanConservative {
println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
}
// Scan arguments to this frame.
- if frame.arglen != 0 {
+ if n := frame.argBytes(); n != 0 {
// TODO: We could pass the entry argument map
// to narrow this down further.
- scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+ scanConservative(frame.argp, n, nil, gcw, state)
}
if isAsyncPreempt || isDebugCall {
return
}
- locals, args, objs := getStackMap(frame, &state.cache, false)
+ locals, args, objs := frame.getStackMap(false)
// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
// varp is 0 for defers, where there are no locals.
// In that case, there can't be a pointer to its args, either.
// (And all args would be scanned above anyway.)
- for i, obj := range objs {
+ for i := range objs {
+ obj := &objs[i]
off := obj.off
base := frame.varp // locals base pointer
if off >= 0 {
if stackTraceDebug {
println("stkobj at", hex(ptr), "of size", obj.size)
}
- state.addObject(ptr, &objs[i])
+ state.addObject(ptr, obj)
}
}
}
gcDrainFractional
)
+// gcDrainMarkWorkerIdle is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerIdle(gcw *gcWork) {
+ gcDrain(gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
+// gcDrainMarkWorkerDedicated is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerDedicated(gcw *gcWork, untilPreempt bool) {
+ flags := gcDrainFlushBgCredit
+ if untilPreempt {
+ flags |= gcDrainUntilPreempt
+ }
+ gcDrain(gcw, flags)
+}
+
+// gcDrainMarkWorkerFractional is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerFractional(gcw *gcWork) {
+ gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
// gcDrain scans roots and objects in work buffers, blackening grey
// objects until it is unable to get more work. It may return before
// GC is done; it's the caller's responsibility to balance work from
//
// gcDrain will always return if there is a pending STW.
//
+// Disabling write barriers is necessary to ensure that after we've
+// confirmed that we've drained gcw, that we don't accidentally end
+// up flipping that condition by immediately adding work in the form
+// of a write barrier buffer flush.
+//
+// Don't set nowritebarrierrec because it's safe for some callees to
+// have write barriers enabled.
+//
//go:nowritebarrier
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrain phase incorrect")
}
flushBgCredit := flags&gcDrainFlushBgCredit != 0
idle := flags&gcDrainIdle != 0
- initScanWork := gcw.scanWork
+ initScanWork := gcw.heapScanWork
// checkWork is the scan work before performing the next
// self-preempt check.
// Drain root marking jobs.
if work.markrootNext < work.markrootJobs {
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job >= work.markrootJobs {
break
}
- markroot(gcw, job)
+ markroot(gcw, job, flushBgCredit)
if check != nil && check() {
goto done
}
// Drain heap marking jobs.
// Stop if we're preemptible or if someone wants to STW.
- for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+ for !(gp.preempt && (preemptible || sched.gcwaiting.Load())) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
// Flush the write barrier
// buffer; this may create
// more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
// Flush background scan work credit to the global
// account if we've accumulated enough locally so
// mutator assists can draw on it.
- if gcw.scanWork >= gcCreditSlack {
- atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+ if gcw.heapScanWork >= gcCreditSlack {
+ gcController.heapScanWork.Add(gcw.heapScanWork)
if flushBgCredit {
- gcFlushBgCredit(gcw.scanWork - initScanWork)
+ gcFlushBgCredit(gcw.heapScanWork - initScanWork)
initScanWork = 0
}
- checkWork -= gcw.scanWork
- gcw.scanWork = 0
+ checkWork -= gcw.heapScanWork
+ gcw.heapScanWork = 0
if checkWork <= 0 {
checkWork += drainCheckThreshold
done:
// Flush remaining scan work credit.
- if gcw.scanWork > 0 {
- atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+ if gcw.heapScanWork > 0 {
+ gcController.heapScanWork.Add(gcw.heapScanWork)
if flushBgCredit {
- gcFlushBgCredit(gcw.scanWork - initScanWork)
+ gcFlushBgCredit(gcw.heapScanWork - initScanWork)
}
- gcw.scanWork = 0
+ gcw.heapScanWork = 0
}
}
//go:nowritebarrier
//go:systemstack
func gcDrainN(gcw *gcWork, scanWork int64) int64 {
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
throw("gcDrainN phase incorrect")
}
// There may already be scan work on the gcw, which we don't
// want to claim was done by this call.
- workFlushed := -gcw.scanWork
+ workFlushed := -gcw.heapScanWork
+ // In addition to backing out because of a preemption, back out
+ // if the GC CPU limiter is enabled.
gp := getg().m.curg
- for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
+ for !gp.preempt && !gcCPULimiter.limiting() && workFlushed+gcw.heapScanWork < scanWork {
// See gcDrain comment.
if work.full == 0 {
gcw.balance()
}
- // This might be a good place to add prefetch code...
- // if(wbuf.nobj > 4) {
- // PREFETCH(wbuf->obj[wbuf.nobj - 3];
- // }
- //
b := gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
if b == 0 {
// Flush the write barrier buffer;
// this may create more work.
- wbBufFlush(nil, 0)
+ wbBufFlush()
b = gcw.tryGet()
}
}
if b == 0 {
// Try to do a root job.
- //
- // TODO: Assists should get credit for this
- // work.
if work.markrootNext < work.markrootJobs {
job := atomic.Xadd(&work.markrootNext, +1) - 1
if job < work.markrootJobs {
- markroot(gcw, job)
+ workFlushed += markroot(gcw, job, false)
continue
}
}
// No heap or root jobs.
break
}
+
scanobject(b, gcw)
// Flush background scan work credit.
- if gcw.scanWork >= gcCreditSlack {
- atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
- workFlushed += gcw.scanWork
- gcw.scanWork = 0
+ if gcw.heapScanWork >= gcCreditSlack {
+ gcController.heapScanWork.Add(gcw.heapScanWork)
+ workFlushed += gcw.heapScanWork
+ gcw.heapScanWork = 0
}
}
// here because this never flushes to bgScanCredit and
// gcw.dispose will flush any remaining work to scanWork.
- return workFlushed + gcw.scanWork
+ return workFlushed + gcw.heapScanWork
}
// scanblock scans b as scanobject would, but using an explicit
// pointer bitmap instead of the heap bitmap.
//
// This is used to scan non-heap roots, so it does not update
-// gcw.bytesMarked or gcw.scanWork.
+// gcw.bytesMarked or gcw.heapScanWork.
//
// If stk != nil, possible stack pointers are also reported to stk.putPtr.
+//
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
// Use local copies of original parameters, so that a stack trace
//
//go:nowritebarrier
func scanobject(b uintptr, gcw *gcWork) {
+ // Prefetch object before we scan it.
+ //
+ // This will overlap fetching the beginning of the object with initial
+ // setup before we start scanning the object.
+ sys.Prefetch(b)
+
// Find the bits for b and the size of the object at b.
//
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
- hbits := heapBitsForAddr(b)
s := spanOfUnchecked(b)
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
+ if s.spanclass.noscan() {
+ // Correctness-wise this is ok, but it's inefficient
+ // if noscan objects reach here.
+ throw("scanobject of a noscan object")
+ }
if n > maxObletBytes {
// Large object. Break into oblets for better
// parallelism and lower latency.
if b == s.base() {
- // It's possible this is a noscan object (not
- // from greyobject, but from other code
- // paths), in which case we must *not* enqueue
- // oblets since their bitmaps will be
- // uninitialized.
- if s.spanclass.noscan() {
- // Bypass the whole scan.
- gcw.bytesMarked += uint64(n)
- return
- }
-
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
// must be a large object, s.base() is the beginning
// of the object.
n = s.base() + s.elemsize - b
- if n > maxObletBytes {
- n = maxObletBytes
- }
+ n = min(n, maxObletBytes)
}
- var i uintptr
- for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
- // Load bits once. See CL 22712 and issue 16973 for discussion.
- bits := hbits.bits()
- if bits&bitScan == 0 {
- break // no more pointers in this object
- }
- if bits&bitPointer == 0 {
- continue // not a pointer
+ hbits := heapBitsForAddr(b, n)
+ var scanSize uintptr
+ for {
+ var addr uintptr
+ if hbits, addr = hbits.nextFast(); addr == 0 {
+ if hbits, addr = hbits.next(); addr == 0 {
+ break
+ }
}
+ // Keep track of farthest pointer we found, so we can
+ // update heapScanWork. TODO: is there a better metric,
+ // now that we can skip scalar portions pretty efficiently?
+ scanSize = addr - b + goarch.PtrSize
+
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
- obj := *(*uintptr)(unsafe.Pointer(b + i))
+ obj := *(*uintptr)(unsafe.Pointer(addr))
// At this point we have extracted the next potential pointer.
// Quickly filter out nil and pointers back to the current object.
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
- if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
- greyobject(obj, b, i, span, gcw, objIndex)
+ if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
+ greyobject(obj, b, addr-b, span, gcw, objIndex)
}
}
}
gcw.bytesMarked += uint64(n)
- gcw.scanWork += int64(i)
+ gcw.heapScanWork += int64(scanSize)
}
// scanConservative scans block [b, b+n) conservatively, treating any
// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
// Preemption must be disabled.
+//
//go:nowritebarrier
func shade(b uintptr) {
if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
}
}
- // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
- // seems like a nice optimization that can be added back in.
- // There needs to be time between the PREFETCH and the use.
- // Previously we put the obj in an 8 element buffer that is drained at a rate
- // to give the PREFETCH time to do its work.
- // Use of PREFETCHNTA might be more appropriate than PREFETCH
+ // We're adding obj to P's local workbuf, so it's likely
+ // this object will be processed soon by the same P.
+ // Even if the workbuf gets flushed, there will likely still be
+ // some benefit on platforms with inclusive shared caches.
+ sys.Prefetch(obj)
+ // Queue the obj for scanning.
if !gcw.putFast(obj) {
gcw.put(obj)
}
//
//go:nowritebarrier
//go:nosplit
-func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size uintptr) {
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
gcw := &getg().m.p.ptr().gcw
gcw.bytesMarked += uint64(size)
- gcw.scanWork += int64(scanSize)
}
// gcMarkTinyAllocs greys all active tiny alloc blocks.