const (
fixedRootFinalizers = iota
fixedRootFlushCaches
+ fixedRootFreeGStacks
fixedRootCount
// rootBlockBytes is the number of bytes to scan per data or
//
// The caller must have call gcCopySpans().
//
+// The world must be stopped.
+//
//go:nowritebarrier
func gcMarkRootPrepare() {
// Compute how many data and BSS root blocks there are.
}
work.nDataRoots = 0
- for datap := &firstmoduledata; datap != nil; datap = datap.next {
- nDataRoots := nBlocks(datap.edata - datap.data)
- if nDataRoots > work.nDataRoots {
- work.nDataRoots = nDataRoots
+ work.nBSSRoots = 0
+
+ // Only scan globals once per cycle; preferably concurrently.
+ if !work.markrootDone {
+ for datap := &firstmoduledata; datap != nil; datap = datap.next {
+ nDataRoots := nBlocks(datap.edata - datap.data)
+ if nDataRoots > work.nDataRoots {
+ work.nDataRoots = nDataRoots
+ }
}
- }
- work.nBSSRoots = 0
- for datap := &firstmoduledata; datap != nil; datap = datap.next {
- nBSSRoots := nBlocks(datap.ebss - datap.bss)
- if nBSSRoots > work.nBSSRoots {
- work.nBSSRoots = nBSSRoots
+ for datap := &firstmoduledata; datap != nil; datap = datap.next {
+ nBSSRoots := nBlocks(datap.ebss - datap.bss)
+ if nBSSRoots > work.nBSSRoots {
+ work.nBSSRoots = nBSSRoots
+ }
}
}
- // Compute number of span roots.
- work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
-
- // Snapshot of allglen. During concurrent scan, we just need
- // to be consistent about how many markroot jobs we create and
- // how many Gs we check. Gs may be created after this point,
- // but it's okay that we ignore them because they begin life
- // without any roots, so there's nothing to scan, and any
- // roots they create during the concurrent phase will be
- // scanned during mark termination. During mark termination,
- // allglen isn't changing, so we'll scan all Gs.
- work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+ if !work.markrootDone {
+ // On the first markroot, we need to scan span roots.
+ // In concurrent GC, this happens during concurrent
+ // mark and we depend on addfinalizer to ensure the
+ // above invariants for objects that get finalizers
+ // after concurrent mark. In STW GC, this will happen
+ // during mark termination.
+ work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+
+ // On the first markroot, we need to scan all Gs. Gs
+ // may be created after this point, but it's okay that
+ // we ignore them because they begin life without any
+ // roots, so there's nothing to scan, and any roots
+ // they create during the concurrent phase will be
+ // scanned during mark termination. During mark
+ // termination, allglen isn't changing, so we'll scan
+ // all Gs.
+ work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+ work.nRescanRoots = 0
+ } else {
+ // We've already scanned span roots and kept the scan
+ // up-to-date during concurrent mark.
+ work.nSpanRoots = 0
+
+ // On the second pass of markroot, we're just scanning
+ // dirty stacks. It's safe to access rescan since the
+ // world is stopped.
+ work.nStackRoots = 0
+ work.nRescanRoots = len(work.rescan.list)
+ }
work.markrootNext = 0
- work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
+ work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
}
// gcMarkRootCheck checks that all roots have been scanned. It is
}
lock(&allglock)
- // Check that gc work is done.
- for i := 0; i < work.nStackRoots; i++ {
- gp := allgs[i]
- if !gp.gcscandone {
- throw("scan missed a g")
+ // Check that stacks have been scanned.
+ if gcphase == _GCmarktermination {
+ for i := 0; i < len(allgs); i++ {
+ gp := allgs[i]
+ if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
+ println("gp", gp, "goid", gp.goid,
+ "status", readgstatus(gp),
+ "gcscandone", gp.gcscandone,
+ "gcscanvalid", gp.gcscanvalid)
+ throw("scan missed a g")
+ }
+ }
+ } else {
+ for i := 0; i < work.nStackRoots; i++ {
+ gp := allgs[i]
+ if !gp.gcscandone {
+ throw("scan missed a g")
+ }
}
}
unlock(&allglock)
//
// Preemption must be disabled (because this uses a gcWork).
//
+// nowritebarrier is only advisory here.
+//
//go:nowritebarrier
func markroot(gcw *gcWork, i uint32) {
+ // TODO(austin): This is a bit ridiculous. Compute and store
+ // the bases in gcMarkRootPrepare instead of the counts.
baseData := uint32(fixedRootCount)
baseBSS := baseData + uint32(work.nDataRoots)
baseSpans := baseBSS + uint32(work.nBSSRoots)
baseStacks := baseSpans + uint32(work.nSpanRoots)
+ baseRescan := baseStacks + uint32(work.nStackRoots)
+ end := baseRescan + uint32(work.nRescanRoots)
// Note: if you add a case here, please also update heapdump.go:dumproots.
switch {
flushallmcaches()
}
+ case i == fixedRootFreeGStacks:
+ // Only do this once per GC cycle; preferably
+ // concurrently.
+ if !work.markrootDone {
+ markrootFreeGStacks()
+ }
+
case baseSpans <= i && i < baseStacks:
// mark MSpan.specials
markrootSpans(gcw, int(i-baseSpans))
default:
// the rest is scanning goroutine stacks
- if uintptr(i-baseStacks) >= allglen {
+ var gp *g
+ if baseStacks <= i && i < baseRescan {
+ gp = allgs[i-baseStacks]
+ } else if baseRescan <= i && i < end {
+ gp = work.rescan.list[i-baseRescan].ptr()
+ } else {
throw("markroot: bad index")
}
- gp := allgs[i-baseStacks]
// remember when we've first observed the G blocked
// needed only to output in traceback
gp.waitsince = work.tstart
}
- if gcphase == _GCmarktermination && status == _Gdead {
- // Free gp's stack if necessary. Only do this
- // during mark termination because otherwise
- // _Gdead may be transient.
- shrinkstack(gp)
- }
-
- if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
+ if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC && readgstatus(gp) != _Gdead {
// GC background workers may be
// non-preemptible, so we may deadlock if we
// try to scan them during a concurrent phase.
// They also have tiny stacks, so just ignore
// them until mark termination.
gp.gcscandone = true
+ queueRescan(gp)
break
}
scanblock(b, n, ptrmask, gcw)
}
+// markrootFreeGStacks frees stacks of dead Gs.
+//
+// This does not free stacks of dead Gs cached on Ps, but having a few
+// cached stacks around isn't a problem.
+//
+//TODO go:nowritebarrier
+func markrootFreeGStacks() {
+ // Take list of dead Gs with stacks.
+ lock(&sched.gflock)
+ list := sched.gfreeStack
+ sched.gfreeStack = nil
+ unlock(&sched.gflock)
+ if list == nil {
+ return
+ }
+
+ // Free stacks.
+ tail := list
+ for gp := list; gp != nil; gp = gp.schedlink.ptr() {
+ shrinkstack(gp)
+ tail = gp
+ }
+
+ // Put Gs back on the free list.
+ lock(&sched.gflock)
+ tail.schedlink.set(sched.gfreeNoStack)
+ sched.gfreeNoStack = list
+ unlock(&sched.gflock)
+}
+
// markrootSpans marks roots for one shard of work.spans.
//
//go:nowritebarrier
// TODO(austin): There are several ideas for making this more
// efficient in issue #11485.
- // We process objects with finalizers only during the first
- // markroot pass. In concurrent GC, this happens during
- // concurrent mark and we depend on addfinalizer to ensure the
- // above invariants for objects that get finalizers after
- // concurrent mark. In STW GC, this will happen during mark
- // termination.
if work.markrootDone {
- return
+ throw("markrootSpans during second markroot")
}
sg := mheap_.sweepgen
//go:nowritebarrier
func scanstack(gp *g) {
if gp.gcscanvalid {
- if gcphase == _GCmarktermination {
- gcRemoveStackBarriers(gp)
- }
return
}
} else {
sp = gp.sched.sp
}
+ gcLockStackBarriers(gp) // Not necessary during mark term, but harmless.
switch gcphase {
case _GCmark:
// Install stack barriers during stack scan.
nextBarrier = ^uintptr(0)
}
- if gp.stkbarPos != 0 || len(gp.stkbar) != 0 {
- // If this happens, it's probably because we
- // scanned a stack twice in the same phase.
- print("stkbarPos=", gp.stkbarPos, " len(stkbar)=", len(gp.stkbar), " goid=", gp.goid, " gcphase=", gcphase, "\n")
- throw("g already has stack barriers")
- }
-
- gcLockStackBarriers(gp)
+ // Remove any existing stack barriers before we
+ // install new ones.
+ gcRemoveStackBarriers(gp)
case _GCmarktermination:
+ if !work.markrootDone {
+ // This is a STW GC. There may be stale stack
+ // barriers from an earlier cycle since we
+ // never passed through mark phase.
+ gcRemoveStackBarriers(gp)
+ }
+
if int(gp.stkbarPos) == len(gp.stkbar) {
// gp hit all of the stack barriers (or there
// were none). Re-scan the whole stack.
}
}
- gcRemoveStackBarriers(gp)
-
default:
throw("scanstack in wrong phase")
}
if gcphase == _GCmarktermination {
gcw.dispose()
}
+ gcUnlockStackBarriers(gp)
if gcphase == _GCmark {
- gcUnlockStackBarriers(gp)
+ // gp may have added itself to the rescan list between
+ // when GC started and now. It's clean now, so remove
+ // it. This isn't safe during mark termination because
+ // mark termination is consuming this list, but it's
+ // also not necessary.
+ dequeueRescan(gp)
}
gp.gcscanvalid = true
}
// Scan local variables if stack frame has been allocated.
size := frame.varp - frame.sp
var minsize uintptr
- switch sys.TheChar {
- case '7':
+ switch sys.ArchFamily {
+ case sys.ARM64:
minsize = sys.SpAlign
default:
minsize = sys.MinFrameSize
}
}
+// queueRescan adds gp to the stack rescan list and clears
+// gp.gcscanvalid. The caller must own gp and ensure that gp isn't
+// already on the rescan list.
+func queueRescan(gp *g) {
+ if gcphase == _GCoff {
+ gp.gcscanvalid = false
+ return
+ }
+ if gp.gcRescan != -1 {
+ throw("g already on rescan list")
+ }
+
+ lock(&work.rescan.lock)
+ gp.gcscanvalid = false
+
+ // Recheck gcphase under the lock in case there was a phase change.
+ if gcphase == _GCoff {
+ unlock(&work.rescan.lock)
+ return
+ }
+ if len(work.rescan.list) == cap(work.rescan.list) {
+ throw("rescan list overflow")
+ }
+ n := len(work.rescan.list)
+ gp.gcRescan = int32(n)
+ work.rescan.list = work.rescan.list[:n+1]
+ work.rescan.list[n].set(gp)
+ unlock(&work.rescan.lock)
+}
+
+// dequeueRescan removes gp from the stack rescan list, if gp is on
+// the rescan list. The caller must own gp.
+func dequeueRescan(gp *g) {
+ if gp.gcRescan == -1 {
+ return
+ }
+ if gcphase == _GCoff {
+ gp.gcRescan = -1
+ return
+ }
+
+ lock(&work.rescan.lock)
+ if work.rescan.list[gp.gcRescan].ptr() != gp {
+ throw("bad dequeueRescan")
+ }
+ // Careful: gp may itself be the last G on the list.
+ last := work.rescan.list[len(work.rescan.list)-1]
+ work.rescan.list[gp.gcRescan] = last
+ last.ptr().gcRescan = gp.gcRescan
+ gp.gcRescan = -1
+ work.rescan.list = work.rescan.list[:len(work.rescan.list)-1]
+ unlock(&work.rescan.lock)
+}
+
type gcDrainFlags int
const (
}
}
-// If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
+// gcmarknewobject marks a newly allocated object black. obj must
+// not contain any non-nil pointers.
+//
+// This is nosplit so it can manipulate a gcWork without preemption.
+//
//go:nowritebarrier
-func gcmarknewobject_m(obj, size uintptr) {
+//go:nosplit
+func gcmarknewobject(obj, size, scanSize uintptr) {
if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
markBitsForAddr(obj).setMarked()
- atomic.Xadd64(&work.bytesMarked, int64(size))
+ gcw := &getg().m.p.ptr().gcw
+ gcw.bytesMarked += uint64(size)
+ gcw.scanWork += int64(scanSize)
}
// Checkmarking