]> Cypherpunks.ru repositories - gostls13.git/blobdiff - src/runtime/mgcmark.go
runtime: make all GC mark workers yield for forEachP
[gostls13.git] / src / runtime / mgcmark.go
index 3e1a0b560ac8b84618b5b1ffe5627013ac10c00b..004dc88828a0a4efe7fc3b122583631bcad39a73 100644 (file)
@@ -7,7 +7,9 @@
 package runtime
 
 import (
+       "internal/abi"
        "internal/goarch"
+       "internal/goexperiment"
        "runtime/internal/atomic"
        "runtime/internal/sys"
        "unsafe"
@@ -218,8 +220,7 @@ func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
                        userG := getg().m.curg
                        selfScan := gp == userG && readgstatus(userG) == _Grunning
                        if selfScan {
-                               casgstatus(userG, _Grunning, _Gwaiting)
-                               userG.waitreason = waitReasonGarbageCollectionScan
+                               casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
                        }
 
                        // TODO: suspendG blocks (and spins) until gp
@@ -387,7 +388,9 @@ func markrootSpans(gcw *gcWork, shard int) {
                                // Mark everything that can be reached from
                                // the object (but *not* the object itself or
                                // we'll never collect it).
-                               scanobject(p, gcw)
+                               if !s.spanclass.noscan() {
+                                       scanobject(p, gcw)
+                               }
 
                                // The special itself is a root.
                                scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
@@ -413,6 +416,18 @@ func gcAssistAlloc(gp *g) {
 
        traced := false
 retry:
+       if gcCPULimiter.limiting() {
+               // If the CPU limiter is enabled, intentionally don't
+               // assist to reduce the amount of CPU time spent in the GC.
+               if traced {
+                       trace := traceAcquire()
+                       if trace.ok() {
+                               trace.GCMarkAssistDone()
+                               traceRelease(trace)
+                       }
+               }
+               return
+       }
        // Compute the amount of scan work we need to do to make the
        // balance positive. When the required amount of work is low,
        // we over-assist to build up credit for future allocations
@@ -432,7 +447,7 @@ retry:
        // will just cause steals to fail until credit is accumulated
        // again, so in the long run it doesn't really matter, but we
        // do have to handle the negative credit case.
-       bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
+       bgScanCredit := gcController.bgScanCredit.Load()
        stolen := int64(0)
        if bgScanCredit > 0 {
                if bgScanCredit < scanWork {
@@ -442,7 +457,7 @@ retry:
                        stolen = scanWork
                        gp.gcAssistBytes += debtBytes
                }
-               atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
+               gcController.bgScanCredit.Add(-stolen)
 
                scanWork -= stolen
 
@@ -450,15 +465,22 @@ retry:
                        // We were able to steal all of the credit we
                        // needed.
                        if traced {
-                               traceGCMarkAssistDone()
+                               trace := traceAcquire()
+                               if trace.ok() {
+                                       trace.GCMarkAssistDone()
+                                       traceRelease(trace)
+                               }
                        }
                        return
                }
        }
-
-       if trace.enabled && !traced {
-               traced = true
-               traceGCMarkAssistStart()
+       if traceEnabled() && !traced {
+               trace := traceAcquire()
+               if trace.ok() {
+                       traced = true
+                       trace.GCMarkAssistStart()
+                       traceRelease(trace)
+               }
        }
 
        // Perform assist work
@@ -504,7 +526,11 @@ retry:
                // this G's assist debt, or the GC cycle is over.
        }
        if traced {
-               traceGCMarkAssistDone()
+               trace := traceAcquire()
+               if trace.ok() {
+                       trace.GCMarkAssistDone()
+                       traceRelease(trace)
+               }
        }
 }
 
@@ -527,7 +553,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
                // The gcBlackenEnabled check in malloc races with the
                // store that clears it but an atomic check in every malloc
                // would be a performance hit.
-               // Instead we recheck it here on the non-preemptable system
+               // Instead we recheck it here on the non-preemptible system
                // stack to determine if we should perform an assist.
 
                // GC is done, so ignore any remaining debt.
@@ -537,7 +563,11 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
        // Track time spent in this assist. Since we're on the
        // system stack, this is non-preemptible, so we can
        // just measure start and end time.
+       //
+       // Limiter event tracking might be disabled if we end up here
+       // while on a mark worker.
        startTime := nanotime()
+       trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
 
        decnwait := atomic.Xadd(&work.nwait, -1)
        if decnwait == work.nproc {
@@ -546,8 +576,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
        }
 
        // gcDrainN requires the caller to be preemptible.
-       casgstatus(gp, _Grunning, _Gwaiting)
-       gp.waitreason = waitReasonGCAssistMarking
+       casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
 
        // drain own cached work first in the hopes that it
        // will be more cache friendly.
@@ -581,12 +610,17 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
                // a valid pointer).
                gp.param = unsafe.Pointer(gp)
        }
-       duration := nanotime() - startTime
-       _p_ := gp.m.p.ptr()
-       _p_.gcAssistTime += duration
-       if _p_.gcAssistTime > gcAssistTimeSlack {
-               atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
-               _p_.gcAssistTime = 0
+       now := nanotime()
+       duration := now - startTime
+       pp := gp.m.p.ptr()
+       pp.gcAssistTime += duration
+       if trackLimiterEvent {
+               pp.limiterEvent.stop(limiterEventMarkAssist, now)
+       }
+       if pp.gcAssistTime > gcAssistTimeSlack {
+               gcController.assistTime.Add(pp.gcAssistTime)
+               gcCPULimiter.update(now)
+               pp.gcAssistTime = 0
        }
 }
 
@@ -622,7 +656,7 @@ func gcParkAssist() bool {
        // the queue, but can still back out. This avoids a
        // race in case background marking has flushed more
        // credit since we checked above.
-       if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
+       if gcController.bgScanCredit.Load() > 0 {
                work.assistQueue.q = oldList
                if oldList.tail != 0 {
                        oldList.tail.ptr().schedlink.set(nil)
@@ -631,7 +665,7 @@ func gcParkAssist() bool {
                return false
        }
        // Park.
-       goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
+       goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceBlockGCMarkAssist, 2)
        return true
 }
 
@@ -651,7 +685,7 @@ func gcFlushBgCredit(scanWork int64) {
                // small window here where an assist may add itself to
                // the blocked queue and park. If that happens, we'll
                // just get it on the next flush.
-               atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+               gcController.bgScanCredit.Add(scanWork)
                return
        }
 
@@ -691,7 +725,7 @@ func gcFlushBgCredit(scanWork int64) {
                // Convert from scan bytes back to work.
                assistWorkPerByte := gcController.assistWorkPerByte.Load()
                scanWork = int64(float64(scanBytes) * assistWorkPerByte)
-               atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
+               gcController.bgScanCredit.Add(scanWork)
        }
        unlock(&work.assistQueue.lock)
 }
@@ -736,14 +770,22 @@ func scanstack(gp *g, gcw *gcWork) int64 {
                throw("can't scan our own stack")
        }
 
-       // stackSize is the amount of work we'll be reporting.
-       //
-       // We report the total stack size, more than we scan,
-       // because this number needs to line up with gcControllerState's
-       // stackScan and scannableStackSize fields.
+       // scannedSize is the amount of work we'll be reporting.
        //
-       // See the documentation on those fields for more information.
-       stackSize := gp.stack.hi - gp.stack.lo
+       // It is less than the allocated size (which is hi-lo).
+       var sp uintptr
+       if gp.syscallsp != 0 {
+               sp = gp.syscallsp // If in a system call this is the stack pointer (gp.sched.sp can be 0 in this case on Windows).
+       } else {
+               sp = gp.sched.sp
+       }
+       scannedSize := gp.stack.hi - sp
+
+       // Keep statistics for initial stack size calculation.
+       // Note that this accumulates the scanned size, not the allocated size.
+       p := getg().m.p.ptr()
+       p.scannedStackSize += uint64(scannedSize)
+       p.scannedStacks++
 
        if isShrinkStackSafe(gp) {
                // Shrink the stack if not much of it is being used.
@@ -772,11 +814,10 @@ func scanstack(gp *g, gcw *gcWork) int64 {
        }
 
        // Scan the stack. Accumulate a list of stack objects.
-       scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
-               scanframeworker(frame, &state, gcw)
-               return true
+       var u unwinder
+       for u.init(gp, 0); u.valid(); u.next() {
+               scanframeworker(&u.frame, &state, gcw)
        }
-       gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
 
        // Find additional pointers that point into the stack from the heap.
        // Currently this includes defers and panics. See also function copystack.
@@ -884,18 +925,19 @@ func scanstack(gp *g, gcw *gcWork) int64 {
        if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
                throw("remaining pointer buffers")
        }
-       return int64(stackSize)
+       return int64(scannedSize)
 }
 
 // Scan a stack frame: local variables and function arguments/results.
+//
 //go:nowritebarrier
 func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
        if _DebugGC > 1 && frame.continpc != 0 {
                print("scanframe ", funcname(frame.fn), "\n")
        }
 
-       isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
-       isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2
+       isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == abi.FuncID_asyncPreempt
+       isDebugCall := frame.fn.valid() && frame.fn.funcID == abi.FuncID_debugCallV2
        if state.conservative || isAsyncPreempt || isDebugCall {
                if debugScanConservative {
                        println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
@@ -917,10 +959,10 @@ func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
                }
 
                // Scan arguments to this frame.
-               if frame.arglen != 0 {
+               if n := frame.argBytes(); n != 0 {
                        // TODO: We could pass the entry argument map
                        // to narrow this down further.
-                       scanConservative(frame.argp, frame.arglen, nil, gcw, state)
+                       scanConservative(frame.argp, n, nil, gcw, state)
                }
 
                if isAsyncPreempt || isDebugCall {
@@ -938,7 +980,7 @@ func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
                return
        }
 
-       locals, args, objs := getStackMap(frame, &state.cache, false)
+       locals, args, objs := frame.getStackMap(false)
 
        // Scan local variables if stack frame has been allocated.
        if locals.n > 0 {
@@ -985,6 +1027,28 @@ const (
        gcDrainFractional
 )
 
+// gcDrainMarkWorkerIdle is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerIdle(gcw *gcWork) {
+       gcDrain(gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
+// gcDrainMarkWorkerDedicated is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerDedicated(gcw *gcWork, untilPreempt bool) {
+       flags := gcDrainFlushBgCredit
+       if untilPreempt {
+               flags |= gcDrainUntilPreempt
+       }
+       gcDrain(gcw, flags)
+}
+
+// gcDrainMarkWorkerFractional is a wrapper for gcDrain that exists to better account
+// mark time in profiles.
+func gcDrainMarkWorkerFractional(gcw *gcWork) {
+       gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+}
+
 // gcDrain scans roots and objects in work buffers, blackening grey
 // objects until it is unable to get more work. It may return before
 // GC is done; it's the caller's responsibility to balance work from
@@ -1004,15 +1068,26 @@ const (
 // credit to gcController.bgScanCredit every gcCreditSlack units of
 // scan work.
 //
-// gcDrain will always return if there is a pending STW.
+// gcDrain will always return if there is a pending STW or forEachP.
+//
+// Disabling write barriers is necessary to ensure that after we've
+// confirmed that we've drained gcw, that we don't accidentally end
+// up flipping that condition by immediately adding work in the form
+// of a write barrier buffer flush.
+//
+// Don't set nowritebarrierrec because it's safe for some callees to
+// have write barriers enabled.
 //
 //go:nowritebarrier
 func gcDrain(gcw *gcWork, flags gcDrainFlags) {
-       if !writeBarrier.needed {
+       if !writeBarrier.enabled {
                throw("gcDrain phase incorrect")
        }
 
+       // N.B. We must be running in a non-preemptible context, so it's
+       // safe to hold a reference to our P here.
        gp := getg().m.curg
+       pp := gp.m.p.ptr()
        preemptible := flags&gcDrainUntilPreempt != 0
        flushBgCredit := flags&gcDrainFlushBgCredit != 0
        idle := flags&gcDrainIdle != 0
@@ -1034,8 +1109,9 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 
        // Drain root marking jobs.
        if work.markrootNext < work.markrootJobs {
-               // Stop if we're preemptible or if someone wants to STW.
-               for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+               // Stop if we're preemptible, if someone wants to STW, or if
+               // someone is calling forEachP.
+               for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
                        job := atomic.Xadd(&work.markrootNext, +1) - 1
                        if job >= work.markrootJobs {
                                break
@@ -1048,8 +1124,16 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
        }
 
        // Drain heap marking jobs.
-       // Stop if we're preemptible or if someone wants to STW.
-       for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
+       //
+       // Stop if we're preemptible, if someone wants to STW, or if
+       // someone is calling forEachP.
+       //
+       // TODO(mknyszek): Consider always checking gp.preempt instead
+       // of having the preempt flag, and making an exception for certain
+       // mark workers in retake. That might be simpler than trying to
+       // enumerate all the reasons why we might want to preempt, even
+       // if we're supposed to be mostly non-preemptible.
+       for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
                // Try to keep work available on the global queue. We used to
                // check if there were waiting workers, but it's better to
                // just keep work available than to make workers wait. In the
@@ -1066,7 +1150,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                                // Flush the write barrier
                                // buffer; this may create
                                // more work.
-                               wbBufFlush(nil, 0)
+                               wbBufFlush()
                                b = gcw.tryGet()
                        }
                }
@@ -1122,7 +1206,7 @@ done:
 //go:nowritebarrier
 //go:systemstack
 func gcDrainN(gcw *gcWork, scanWork int64) int64 {
-       if !writeBarrier.needed {
+       if !writeBarrier.enabled {
                throw("gcDrainN phase incorrect")
        }
 
@@ -1130,8 +1214,10 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
        // want to claim was done by this call.
        workFlushed := -gcw.heapScanWork
 
+       // In addition to backing out because of a preemption, back out
+       // if the GC CPU limiter is enabled.
        gp := getg().m.curg
-       for !gp.preempt && workFlushed+gcw.heapScanWork < scanWork {
+       for !gp.preempt && !gcCPULimiter.limiting() && workFlushed+gcw.heapScanWork < scanWork {
                // See gcDrain comment.
                if work.full == 0 {
                        gcw.balance()
@@ -1143,7 +1229,7 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
                        if b == 0 {
                                // Flush the write barrier buffer;
                                // this may create more work.
-                               wbBufFlush(nil, 0)
+                               wbBufFlush()
                                b = gcw.tryGet()
                        }
                }
@@ -1185,6 +1271,7 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 // gcw.bytesMarked or gcw.heapScanWork.
 //
 // If stk != nil, possible stack pointers are also reported to stk.putPtr.
+//
 //go:nowritebarrier
 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
        // Use local copies of original parameters, so that a stack trace
@@ -1236,28 +1323,22 @@ func scanobject(b uintptr, gcw *gcWork) {
        // b is either the beginning of an object, in which case this
        // is the size of the object to scan, or it points to an
        // oblet, in which case we compute the size to scan below.
-       hbits := heapBitsForAddr(b)
        s := spanOfUnchecked(b)
        n := s.elemsize
        if n == 0 {
                throw("scanobject n == 0")
        }
+       if s.spanclass.noscan() {
+               // Correctness-wise this is ok, but it's inefficient
+               // if noscan objects reach here.
+               throw("scanobject of a noscan object")
+       }
 
+       var tp typePointers
        if n > maxObletBytes {
                // Large object. Break into oblets for better
                // parallelism and lower latency.
                if b == s.base() {
-                       // It's possible this is a noscan object (not
-                       // from greyobject, but from other code
-                       // paths), in which case we must *not* enqueue
-                       // oblets since their bitmaps will be
-                       // uninitialized.
-                       if s.spanclass.noscan() {
-                               // Bypass the whole scan.
-                               gcw.bytesMarked += uint64(n)
-                               return
-                       }
-
                        // Enqueue the other oblets to scan later.
                        // Some oblets may be in b's scalar tail, but
                        // these will be marked as "no more pointers",
@@ -1274,25 +1355,46 @@ func scanobject(b uintptr, gcw *gcWork) {
                // must be a large object, s.base() is the beginning
                // of the object.
                n = s.base() + s.elemsize - b
-               if n > maxObletBytes {
-                       n = maxObletBytes
+               n = min(n, maxObletBytes)
+               if goexperiment.AllocHeaders {
+                       tp = s.typePointersOfUnchecked(s.base())
+                       tp = tp.fastForward(b-tp.addr, b+n)
+               }
+       } else {
+               if goexperiment.AllocHeaders {
+                       tp = s.typePointersOfUnchecked(b)
                }
        }
 
-       var i uintptr
-       for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
-               // Load bits once. See CL 22712 and issue 16973 for discussion.
-               bits := hbits.bits()
-               if bits&bitScan == 0 {
-                       break // no more pointers in this object
-               }
-               if bits&bitPointer == 0 {
-                       continue // not a pointer
+       var hbits heapBits
+       if !goexperiment.AllocHeaders {
+               hbits = heapBitsForAddr(b, n)
+       }
+       var scanSize uintptr
+       for {
+               var addr uintptr
+               if goexperiment.AllocHeaders {
+                       if tp, addr = tp.nextFast(); addr == 0 {
+                               if tp, addr = tp.next(b + n); addr == 0 {
+                                       break
+                               }
+                       }
+               } else {
+                       if hbits, addr = hbits.nextFast(); addr == 0 {
+                               if hbits, addr = hbits.next(); addr == 0 {
+                                       break
+                               }
+                       }
                }
 
+               // Keep track of farthest pointer we found, so we can
+               // update heapScanWork. TODO: is there a better metric,
+               // now that we can skip scalar portions pretty efficiently?
+               scanSize = addr - b + goarch.PtrSize
+
                // Work here is duplicated in scanblock and above.
                // If you make changes here, make changes there too.
-               obj := *(*uintptr)(unsafe.Pointer(b + i))
+               obj := *(*uintptr)(unsafe.Pointer(addr))
 
                // At this point we have extracted the next potential pointer.
                // Quickly filter out nil and pointers back to the current object.
@@ -1306,13 +1408,13 @@ func scanobject(b uintptr, gcw *gcWork) {
                        // heap. In this case, we know the object was
                        // just allocated and hence will be marked by
                        // allocation itself.
-                       if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
-                               greyobject(obj, b, i, span, gcw, objIndex)
+                       if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
+                               greyobject(obj, b, addr-b, span, gcw, objIndex)
                        }
                }
        }
        gcw.bytesMarked += uint64(n)
-       gcw.heapScanWork += int64(i)
+       gcw.heapScanWork += int64(scanSize)
 }
 
 // scanConservative scans block [b, b+n) conservatively, treating any
@@ -1413,6 +1515,7 @@ func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackSca
 // Shade the object if it isn't already.
 // The object is not nil and known to be in the heap.
 // Preemption must be disabled.
+//
 //go:nowritebarrier
 func shade(b uintptr) {
        if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
@@ -1534,7 +1637,7 @@ func gcDumpObject(label string, obj, off uintptr) {
 //
 //go:nowritebarrier
 //go:nosplit
-func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
+func gcmarknewobject(span *mspan, obj, size uintptr) {
        if useCheckmark { // The world should be stopped so this should not happen.
                throw("gcmarknewobject called while doing checkmark")
        }