)
const (
- _DebugGC = 0
- _ConcurrentSweep = true
- _FinBlockSize = 4 * 1024
+ _DebugGC = 0
+ _FinBlockSize = 4 * 1024
+
+ // concurrentSweep is a debug flag. Disabling this flag
+ // ensures all spans are swept while the world is stopped.
+ concurrentSweep = true
// debugScanConservative enables debug logging for stack
// frames that are scanned conservatively.
sweepMinHeapDistance = 1024 * 1024
)
+// heapObjectsCanMove always returns false in the current garbage collector.
+// It exists for go4.org/unsafe/assume-no-moving-gc, which is an
+// unfortunate idea that had an even more unfortunate implementation.
+// Every time a new Go release happened, the package stopped building,
+// and the authors had to add a new file with a new //go:build line, and
+// then the entire ecosystem of packages with that as a dependency had to
+// explicitly update to the new version. Many packages depend on
+// assume-no-moving-gc transitively, through paths like
+// inet.af/netaddr -> go4.org/intern -> assume-no-moving-gc.
+// This was causing a significant amount of friction around each new
+// release, so we added this bool for the package to //go:linkname
+// instead. The bool is still unfortunate, but it's not as bad as
+// breaking the ecosystem on every new release.
+//
+// If the Go garbage collector ever does move heap objects, we can set
+// this to true to break all the programs using assume-no-moving-gc.
+//
+//go:linkname heapObjectsCanMove
+func heapObjectsCanMove() bool {
+ return false
+}
+
func gcinit() {
if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
throw("size of Workbuf is suboptimal")
var writeBarrier struct {
enabled bool // compiler emits a check of this before calling write barrier
pad [3]byte // compiler uses 32-bit load for "enabled" field
- needed bool // whether we need a write barrier for current GC phase
- cgo bool // whether we need a write barrier for a cgo check
alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load
}
//go:nosplit
func setGCPhase(x uint32) {
atomic.Store(&gcphase, x)
- writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination
- writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo
+ writeBarrier.enabled = gcphase == _GCmark || gcphase == _GCmarktermination
}
// gcMarkWorkerMode represents the mode that a concurrent mark worker
type workType struct {
full lfstack // lock-free list of full blocks workbuf
+ _ cpu.CacheLinePad // prevents false-sharing between full and empty
empty lfstack // lock-free list of empty blocks workbuf
- pad0 cpu.CacheLinePad // prevents false-sharing between full/empty and nproc/nwait
+ _ cpu.CacheLinePad // prevents false-sharing between empty and nproc/nwait
wbufSpans struct {
lock mutex
// as part of tests and benchmarks to get the system into a
// relatively stable and isolated state.
for work.cycles.Load() == n+1 && sweepone() != ^uintptr(0) {
- sweep.nbgsweep++
Gosched()
}
// Wait until sweep termination, mark, and mark
// termination of cycle N complete.
work.sweepWaiters.list.push(getg())
- goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1)
+ goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceBlockUntilGCEnds, 1)
}
}
}
switch t.kind {
case gcTriggerHeap:
- // Non-atomic access to gcController.heapLive for performance. If
- // we are going to trigger on this, this thread just
- // atomically wrote gcController.heapLive anyway and we'll see our
- // own write.
trigger, _ := gcController.trigger()
return gcController.heapLive.Load() >= trigger
case gcTriggerTime:
// We check the transition condition continuously here in case
// this G gets delayed in to the next GC cycle.
for trigger.test() && sweepone() != ^uintptr(0) {
- sweep.nbgsweep++
}
// Perform GC initialization and the sweep termination
// Update it under gcsema to avoid gctrace getting wrong values.
work.userForced = trigger.kind == gcTriggerCycle
- if trace.enabled {
- traceGCStart()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GCStart()
+ traceRelease(trace)
}
// Check that all Ps have finished deferred mcache flushes.
now := nanotime()
work.tSweepTerm = now
work.pauseStart = now
- if trace.enabled {
- traceGCSTWStart(1)
- }
- systemstack(stopTheWorldWithSema)
+ systemstack(func() { stopTheWorldWithSema(stwGCSweepTerm) })
// Finish sweep before we start concurrent scan.
systemstack(func() {
finishsweep_m()
})
- // clearpools before we start the GC. If we wait they memory will not be
+ // clearpools before we start the GC. If we wait the memory will not be
// reclaimed until the next GC cycle.
clearpools()
// enabled because they must be enabled before
// any non-leaf heap objects are marked. Since
// allocations are blocked until assists can
- // happen, we want enable assists as early as
+ // happen, we want to enable assists as early as
// possible.
setGCPhase(_GCmark)
- gcBgMarkPrepare() // Must happen before assist enable.
+ gcBgMarkPrepare() // Must happen before assists are enabled.
gcMarkRootPrepare()
// Mark all active tinyalloc blocks. Since we're
// Concurrent mark.
systemstack(func() {
- now = startTheWorldWithSema(trace.enabled)
+ now = startTheWorldWithSema()
work.pauseNS += now - work.pauseStart
work.tMark = now
memstats.gcPauseDist.record(now - work.pauseStart)
+ sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm)
+ work.cpuStats.gcPauseTime += sweepTermCpu
+ work.cpuStats.gcTotalTime += sweepTermCpu
+
// Release the CPU limiter.
gcCPULimiter.finishGCTransition(now)
})
work.tMarkTerm = now
work.pauseStart = now
getg().m.preemptoff = "gcing"
- if trace.enabled {
- traceGCSTWStart(0)
- }
- systemstack(stopTheWorldWithSema)
+ systemstack(func() { stopTheWorldWithSema(stwGCMarkTerm) })
// The gcphase is _GCmark, it will transition to _GCmarktermination
// below. The important thing is that the wb remains active until
// all marking is complete. This includes writes made by the GC.
if restart {
getg().m.preemptoff = ""
systemstack(func() {
- now := startTheWorldWithSema(trace.enabled)
+ now := startTheWorldWithSema()
work.pauseNS += now - work.pauseStart
memstats.gcPauseDist.record(now - work.pauseStart)
})
// before continuing.
})
+ var stwSwept bool
systemstack(func() {
work.heap2 = work.bytesMarked
if debug.gccheckmark > 0 {
// marking is complete so we can turn the write barrier off
setGCPhase(_GCoff)
- gcSweep(work.mode)
+ stwSwept = gcSweep(work.mode)
})
mp.traceback = 0
casgstatus(curgp, _Gwaiting, _Grunning)
- if trace.enabled {
- traceGCDone()
+ trace := traceAcquire()
+ if trace.ok() {
+ trace.GCDone()
+ traceRelease(trace)
}
// all done
memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow)
memstats.pause_total_ns += uint64(work.pauseNS)
- sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm)
- // We report idle marking time below, but omit it from the
- // overall utilization here since it's "free".
- markAssistCpu := gcController.assistTime.Load()
- markDedicatedCpu := gcController.dedicatedMarkTime.Load()
- markFractionalCpu := gcController.fractionalMarkTime.Load()
- markIdleCpu := gcController.idleMarkTime.Load()
markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm)
- scavAssistCpu := scavenge.assistTime.Load()
- scavBgCpu := scavenge.backgroundTime.Load()
-
- // Update cumulative GC CPU stats.
- work.cpuStats.gcAssistTime += markAssistCpu
- work.cpuStats.gcDedicatedTime += markDedicatedCpu + markFractionalCpu
- work.cpuStats.gcIdleTime += markIdleCpu
- work.cpuStats.gcPauseTime += sweepTermCpu + markTermCpu
- work.cpuStats.gcTotalTime += sweepTermCpu + markAssistCpu + markDedicatedCpu + markFractionalCpu + markIdleCpu + markTermCpu
-
- // Update cumulative scavenge CPU stats.
- work.cpuStats.scavengeAssistTime += scavAssistCpu
- work.cpuStats.scavengeBgTime += scavBgCpu
- work.cpuStats.scavengeTotalTime += scavAssistCpu + scavBgCpu
-
- // Update total CPU.
- work.cpuStats.totalTime = sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs)
- work.cpuStats.idleTime += sched.idleTime.Load()
-
- // Compute userTime. We compute this indirectly as everything that's not the above.
+ work.cpuStats.gcPauseTime += markTermCpu
+ work.cpuStats.gcTotalTime += markTermCpu
+
+ // Accumulate CPU stats.
//
- // Since time spent in _Pgcstop is covered by gcPauseTime, and time spent in _Pidle
- // is covered by idleTime, what we're left with is time spent in _Prunning and _Psyscall,
- // the latter of which is fine because the P will either go idle or get used for something
- // else via sysmon. Meanwhile if we subtract GC time from whatever's left, we get non-GC
- // _Prunning time. Note that this still leaves time spent in sweeping and in the scheduler,
- // but that's fine. The overwhelming majority of this time will be actual user time.
- work.cpuStats.userTime = work.cpuStats.totalTime - (work.cpuStats.gcTotalTime +
- work.cpuStats.scavengeTotalTime + work.cpuStats.idleTime)
+ // Pass gcMarkPhase=true so we can get all the latest GC CPU stats in there too.
+ work.cpuStats.accumulate(now, true)
// Compute overall GC CPU utilization.
// Omit idle marking time from the overall utilization here since it's "free".
// Reset idle time stat.
sched.idleTime.Store(0)
- // Reset sweep state.
- sweep.nbgsweep = 0
- sweep.npausesweep = 0
-
if work.userForced {
memstats.numforcedgc++
}
injectglist(&work.sweepWaiters.list)
unlock(&work.sweepWaiters.lock)
+ // Increment the scavenge generation now.
+ //
+ // This moment represents peak heap in use because we're
+ // about to start sweeping.
+ mheap_.pages.scav.index.nextGen()
+
// Release the CPU limiter.
gcCPULimiter.finishGCTransition(now)
// Those aren't tracked in any sweep lists, so we need to
// count them against sweep completion until we ensure all
// those spans have been forced out.
+ //
+ // If gcSweep fully swept the heap (for example if the sweep
+ // is not concurrent due to a GODEBUG setting), then we expect
+ // the sweepLocker to be invalid, since sweeping is done.
+ //
+ // N.B. Below we might duplicate some work from gcSweep; this is
+ // fine as all that work is idempotent within a GC cycle, and
+ // we're still holding worldsema so a new cycle can't start.
sl := sweep.active.begin()
- if !sl.valid {
+ if !stwSwept && !sl.valid {
throw("failed to set sweep barrier")
+ } else if stwSwept && sl.valid {
+ throw("non-concurrent sweep failed to drain all sweep queues")
}
- systemstack(func() { startTheWorldWithSema(trace.enabled) })
+ systemstack(func() { startTheWorldWithSema() })
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
// mcache before allocating, but idle Ps may not. Since this
// is necessary to sweep all spans, we need to ensure all
// mcaches are flushed before we start the next GC cycle.
+ //
+ // While we're here, flush the page cache for idle Ps to avoid
+ // having pages get stuck on them. These pages are hidden from
+ // the scavenger, so in small idle heaps a significant amount
+ // of additional memory might be held onto.
+ //
+ // Also, flush the pinner cache, to avoid leaking that memory
+ // indefinitely.
systemstack(func() {
forEachP(func(pp *p) {
pp.mcache.prepareForSweep()
+ if pp.status == _Pidle {
+ systemstack(func() {
+ lock(&mheap_.lock)
+ pp.pcache.flush(&mheap_.pages)
+ unlock(&mheap_.lock)
+ })
+ }
+ pp.pinnerCache = nil
})
})
- // Now that we've swept stale spans in mcaches, they don't
- // count against unswept spans.
- sweep.active.end(sl)
+ if sl.valid {
+ // Now that we've swept stale spans in mcaches, they don't
+ // count against unswept spans.
+ //
+ // Note: this sweepLocker may not be valid if sweeping had
+ // already completed during the STW. See the corresponding
+ // begin() call that produced sl.
+ sweep.active.end(sl)
+ }
// Print gctrace before dropping worldsema. As soon as we drop
// worldsema another cycle could start and smash the stats
}
print(" ms clock, ")
for i, ns := range []int64{
- sweepTermCpu,
+ int64(work.stwprocs) * (work.tMark - work.tSweepTerm),
gcController.assistTime.Load(),
gcController.dedicatedMarkTime.Load() + gcController.fractionalMarkTime.Load(),
gcController.idleMarkTime.Load(),
lc.mspan.setUserArenaChunkToFault()
}
+ // Enable huge pages on some metadata if we cross a heap threshold.
+ if gcController.heapGoal() > minHeapForMetadataHugePages {
+ mheap_.enableMetadataHugePages()
+ }
+
semrelease(&worldsema)
semrelease(&gcsema)
// Careful: another GC cycle may start now.
// Note that at this point, the G may immediately be
// rescheduled and may be running.
return true
- }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
+ }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceBlockSystemGoroutine, 0)
// Preemption must not occur here, or another G might see
// p.gcMarkWorkerMode.
default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
case gcMarkWorkerDedicatedMode:
- gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrainMarkWorkerDedicated(&pp.gcw, true)
if gp.preempt {
// We were preempted. This is
// a useful signal to kick
}
// Go back to draining, this time
// without preemption.
- gcDrain(&pp.gcw, gcDrainFlushBgCredit)
+ gcDrainMarkWorkerDedicated(&pp.gcw, false)
case gcMarkWorkerFractionalMode:
- gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrainMarkWorkerFractional(&pp.gcw)
case gcMarkWorkerIdleMode:
- gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrainMarkWorkerIdle(&pp.gcw)
}
casgstatus(gp, _Gwaiting, _Grunning)
})
// Gs, so only do it if checkmark is also enabled.
gcMarkRootCheck()
}
- if work.full != 0 {
- throw("work.full != 0")
- }
// Drop allg snapshot. allgs may have grown, in which case
// this is the only reference to the old backing store and
// gcSweep must be called on the system stack because it acquires the heap
// lock. See mheap for details.
//
+// Returns true if the heap was fully swept by this function.
+//
// The world must be stopped.
//
//go:systemstack
-func gcSweep(mode gcMode) {
+func gcSweep(mode gcMode) bool {
assertWorldStopped()
if gcphase != _GCoff {
sweep.centralIndex.clear()
- if !_ConcurrentSweep || mode == gcForceBlockMode {
+ if !concurrentSweep || mode == gcForceBlockMode {
// Special case synchronous sweep.
// Record that no proportional sweeping has to happen.
lock(&mheap_.lock)
mheap_.sweepPagesPerByte = 0
unlock(&mheap_.lock)
+ // Flush all mcaches.
+ for _, pp := range allp {
+ pp.mcache.prepareForSweep()
+ }
// Sweep all spans eagerly.
for sweepone() != ^uintptr(0) {
- sweep.npausesweep++
}
// Free workbufs eagerly.
prepareFreeWorkbufs()
// available immediately.
mProf_NextCycle()
mProf_Flush()
- return
+ return true
}
// Background sweep.
ready(sweep.g, 0, true)
}
unlock(&sweep.lock)
+ return false
}
// gcResetMarkState resets global state prior to marking (concurrent