[dev.garbage] Merge remote-tracking branch 'origin/master' into HEAD

[gostls13.git] / src / runtime / mgcmark.go
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go

index d05ad6549fcb8b5b8e24808bbcc2c49a696532df..3704164527b69d363dd2e22268dc1a7167787367 100644 (file)
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -15,6 +15,7 @@ import (
  const (
         fixedRootFinalizers = iota
         fixedRootFlushCaches
+       fixedRootFreeGStacks
         fixedRootCount
  
         // rootBlockBytes is the number of bytes to scan per data or
@@ -31,6 +32,8 @@ const (
  //
  // The caller must have call gcCopySpans().
  //
+// The world must be stopped.
+//
  //go:nowritebarrier
  func gcMarkRootPrepare() {
         // Compute how many data and BSS root blocks there are.
@@ -39,36 +42,58 @@ func gcMarkRootPrepare() {
         }
  
         work.nDataRoots = 0
-       for datap := &firstmoduledata; datap != nil; datap = datap.next {
-               nDataRoots := nBlocks(datap.edata - datap.data)
-               if nDataRoots > work.nDataRoots {
-                       work.nDataRoots = nDataRoots
+       work.nBSSRoots = 0
+
+       // Only scan globals once per cycle; preferably concurrently.
+       if !work.markrootDone {
+               for datap := &firstmoduledata; datap != nil; datap = datap.next {
+                       nDataRoots := nBlocks(datap.edata - datap.data)
+                       if nDataRoots > work.nDataRoots {
+                               work.nDataRoots = nDataRoots
+                       }
                 }
-       }
  
-       work.nBSSRoots = 0
-       for datap := &firstmoduledata; datap != nil; datap = datap.next {
-               nBSSRoots := nBlocks(datap.ebss - datap.bss)
-               if nBSSRoots > work.nBSSRoots {
-                       work.nBSSRoots = nBSSRoots
+               for datap := &firstmoduledata; datap != nil; datap = datap.next {
+                       nBSSRoots := nBlocks(datap.ebss - datap.bss)
+                       if nBSSRoots > work.nBSSRoots {
+                               work.nBSSRoots = nBSSRoots
+                       }
                 }
         }
  
-       // Compute number of span roots.
-       work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
-
-       // Snapshot of allglen. During concurrent scan, we just need
-       // to be consistent about how many markroot jobs we create and
-       // how many Gs we check. Gs may be created after this point,
-       // but it's okay that we ignore them because they begin life
-       // without any roots, so there's nothing to scan, and any
-       // roots they create during the concurrent phase will be
-       // scanned during mark termination. During mark termination,
-       // allglen isn't changing, so we'll scan all Gs.
-       work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+       if !work.markrootDone {
+               // On the first markroot, we need to scan span roots.
+               // In concurrent GC, this happens during concurrent
+               // mark and we depend on addfinalizer to ensure the
+               // above invariants for objects that get finalizers
+               // after concurrent mark. In STW GC, this will happen
+               // during mark termination.
+               work.nSpanRoots = (len(work.spans) + rootBlockSpans - 1) / rootBlockSpans
+
+               // On the first markroot, we need to scan all Gs. Gs
+               // may be created after this point, but it's okay that
+               // we ignore them because they begin life without any
+               // roots, so there's nothing to scan, and any roots
+               // they create during the concurrent phase will be
+               // scanned during mark termination. During mark
+               // termination, allglen isn't changing, so we'll scan
+               // all Gs.
+               work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+               work.nRescanRoots = 0
+       } else {
+               // We've already scanned span roots and kept the scan
+               // up-to-date during concurrent mark.
+               work.nSpanRoots = 0
+
+               // On the second pass of markroot, we're just scanning
+               // dirty stacks. It's safe to access rescan since the
+               // world is stopped.
+               work.nStackRoots = 0
+               work.nRescanRoots = len(work.rescan.list)
+       }
  
         work.markrootNext = 0
-       work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
+       work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
  }
  
  // gcMarkRootCheck checks that all roots have been scanned. It is
@@ -80,11 +105,24 @@ func gcMarkRootCheck() {
         }
  
         lock(&allglock)
-       // Check that gc work is done.
-       for i := 0; i < work.nStackRoots; i++ {
-               gp := allgs[i]
-               if !gp.gcscandone {
-                       throw("scan missed a g")
+       // Check that stacks have been scanned.
+       if gcphase == _GCmarktermination {
+               for i := 0; i < len(allgs); i++ {
+                       gp := allgs[i]
+                       if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
+                               println("gp", gp, "goid", gp.goid,
+                                       "status", readgstatus(gp),
+                                       "gcscandone", gp.gcscandone,
+                                       "gcscanvalid", gp.gcscanvalid)
+                               throw("scan missed a g")
+                       }
+               }
+       } else {
+               for i := 0; i < work.nStackRoots; i++ {
+                       gp := allgs[i]
+                       if !gp.gcscandone {
+                               throw("scan missed a g")
+                       }
                 }
         }
         unlock(&allglock)
@@ -97,12 +135,18 @@ var oneptrmask = [...]uint8{1}
  //
  // Preemption must be disabled (because this uses a gcWork).
  //
+// nowritebarrier is only advisory here.
+//
  //go:nowritebarrier
  func markroot(gcw *gcWork, i uint32) {
+       // TODO(austin): This is a bit ridiculous. Compute and store
+       // the bases in gcMarkRootPrepare instead of the counts.
         baseData := uint32(fixedRootCount)
         baseBSS := baseData + uint32(work.nDataRoots)
         baseSpans := baseBSS + uint32(work.nBSSRoots)
         baseStacks := baseSpans + uint32(work.nSpanRoots)
+       baseRescan := baseStacks + uint32(work.nStackRoots)
+       end := baseRescan + uint32(work.nRescanRoots)
  
         // Note: if you add a case here, please also update heapdump.go:dumproots.
         switch {
@@ -126,16 +170,27 @@ func markroot(gcw *gcWork, i uint32) {
                         flushallmcaches()
                 }
  
+       case i == fixedRootFreeGStacks:
+               // Only do this once per GC cycle; preferably
+               // concurrently.
+               if !work.markrootDone {
+                       markrootFreeGStacks()
+               }
+
         case baseSpans <= i && i < baseStacks:
                 // mark MSpan.specials
                 markrootSpans(gcw, int(i-baseSpans))
  
         default:
                 // the rest is scanning goroutine stacks
-               if uintptr(i-baseStacks) >= allglen {
+               var gp *g
+               if baseStacks <= i && i < baseRescan {
+                       gp = allgs[i-baseStacks]
+               } else if baseRescan <= i && i < end {
+                       gp = work.rescan.list[i-baseRescan].ptr()
+               } else {
                         throw("markroot: bad index")
                 }
-               gp := allgs[i-baseStacks]
  
                 // remember when we've first observed the G blocked
                 // needed only to output in traceback
@@ -144,20 +199,14 @@ func markroot(gcw *gcWork, i uint32) {
                         gp.waitsince = work.tstart
                 }
  
-               if gcphase == _GCmarktermination && status == _Gdead {
-                       // Free gp's stack if necessary. Only do this
-                       // during mark termination because otherwise
-                       // _Gdead may be transient.
-                       shrinkstack(gp)
-               }
-
-               if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
+               if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC && readgstatus(gp) != _Gdead {
                         // GC background workers may be
                         // non-preemptible, so we may deadlock if we
                         // try to scan them during a concurrent phase.
                         // They also have tiny stacks, so just ignore
                         // them until mark termination.
                         gp.gcscandone = true
+                       queueRescan(gp)
                         break
                 }
  
@@ -215,6 +264,36 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
         scanblock(b, n, ptrmask, gcw)
  }
  
+// markrootFreeGStacks frees stacks of dead Gs.
+//
+// This does not free stacks of dead Gs cached on Ps, but having a few
+// cached stacks around isn't a problem.
+//
+//TODO go:nowritebarrier
+func markrootFreeGStacks() {
+       // Take list of dead Gs with stacks.
+       lock(&sched.gflock)
+       list := sched.gfreeStack
+       sched.gfreeStack = nil
+       unlock(&sched.gflock)
+       if list == nil {
+               return
+       }
+
+       // Free stacks.
+       tail := list
+       for gp := list; gp != nil; gp = gp.schedlink.ptr() {
+               shrinkstack(gp)
+               tail = gp
+       }
+
+       // Put Gs back on the free list.
+       lock(&sched.gflock)
+       tail.schedlink.set(sched.gfreeNoStack)
+       sched.gfreeNoStack = list
+       unlock(&sched.gflock)
+}
+
  // markrootSpans marks roots for one shard of work.spans.
  //
  //go:nowritebarrier
@@ -232,14 +311,8 @@ func markrootSpans(gcw *gcWork, shard int) {
         // TODO(austin): There are several ideas for making this more
         // efficient in issue #11485.
  
-       // We process objects with finalizers only during the first
-       // markroot pass. In concurrent GC, this happens during
-       // concurrent mark and we depend on addfinalizer to ensure the
-       // above invariants for objects that get finalizers after
-       // concurrent mark. In STW GC, this will happen during mark
-       // termination.
         if work.markrootDone {
-               return
+               throw("markrootSpans during second markroot")
         }
  
         sg := mheap_.sweepgen
@@ -566,9 +639,6 @@ func gcFlushBgCredit(scanWork int64) {
  //go:nowritebarrier
  func scanstack(gp *g) {
         if gp.gcscanvalid {
-               if gcphase == _GCmarktermination {
-                       gcRemoveStackBarriers(gp)
-               }
                 return
         }
  
@@ -611,6 +681,7 @@ func scanstack(gp *g) {
         } else {
                 sp = gp.sched.sp
         }
+       gcLockStackBarriers(gp) // Not necessary during mark term, but harmless.
         switch gcphase {
         case _GCmark:
                 // Install stack barriers during stack scan.
@@ -621,16 +692,18 @@ func scanstack(gp *g) {
                         nextBarrier = ^uintptr(0)
                 }
  
-               if gp.stkbarPos != 0 || len(gp.stkbar) != 0 {
-                       // If this happens, it's probably because we
-                       // scanned a stack twice in the same phase.
-                       print("stkbarPos=", gp.stkbarPos, " len(stkbar)=", len(gp.stkbar), " goid=", gp.goid, " gcphase=", gcphase, "\n")
-                       throw("g already has stack barriers")
-               }
-
-               gcLockStackBarriers(gp)
+               // Remove any existing stack barriers before we
+               // install new ones.
+               gcRemoveStackBarriers(gp)
  
         case _GCmarktermination:
+               if !work.markrootDone {
+                       // This is a STW GC. There may be stale stack
+                       // barriers from an earlier cycle since we
+                       // never passed through mark phase.
+                       gcRemoveStackBarriers(gp)
+               }
+
                 if int(gp.stkbarPos) == len(gp.stkbar) {
                         // gp hit all of the stack barriers (or there
                         // were none). Re-scan the whole stack.
@@ -647,8 +720,6 @@ func scanstack(gp *g) {
                         }
                 }
  
-               gcRemoveStackBarriers(gp)
-
         default:
                 throw("scanstack in wrong phase")
         }
@@ -686,8 +757,14 @@ func scanstack(gp *g) {
         if gcphase == _GCmarktermination {
                 gcw.dispose()
         }
+       gcUnlockStackBarriers(gp)
         if gcphase == _GCmark {
-               gcUnlockStackBarriers(gp)
+               // gp may have added itself to the rescan list between
+               // when GC started and now. It's clean now, so remove
+               // it. This isn't safe during mark termination because
+               // mark termination is consuming this list, but it's
+               // also not necessary.
+               dequeueRescan(gp)
         }
         gp.gcscanvalid = true
  }
@@ -719,8 +796,8 @@ func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
         // Scan local variables if stack frame has been allocated.
         size := frame.varp - frame.sp
         var minsize uintptr
-       switch sys.TheChar {
-       case '7':
+       switch sys.ArchFamily {
+       case sys.ARM64:
                 minsize = sys.SpAlign
         default:
                 minsize = sys.MinFrameSize
@@ -765,6 +842,60 @@ func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
         }
  }
  
+// queueRescan adds gp to the stack rescan list and clears
+// gp.gcscanvalid. The caller must own gp and ensure that gp isn't
+// already on the rescan list.
+func queueRescan(gp *g) {
+       if gcphase == _GCoff {
+               gp.gcscanvalid = false
+               return
+       }
+       if gp.gcRescan != -1 {
+               throw("g already on rescan list")
+       }
+
+       lock(&work.rescan.lock)
+       gp.gcscanvalid = false
+
+       // Recheck gcphase under the lock in case there was a phase change.
+       if gcphase == _GCoff {
+               unlock(&work.rescan.lock)
+               return
+       }
+       if len(work.rescan.list) == cap(work.rescan.list) {
+               throw("rescan list overflow")
+       }
+       n := len(work.rescan.list)
+       gp.gcRescan = int32(n)
+       work.rescan.list = work.rescan.list[:n+1]
+       work.rescan.list[n].set(gp)
+       unlock(&work.rescan.lock)
+}
+
+// dequeueRescan removes gp from the stack rescan list, if gp is on
+// the rescan list. The caller must own gp.
+func dequeueRescan(gp *g) {
+       if gp.gcRescan == -1 {
+               return
+       }
+       if gcphase == _GCoff {
+               gp.gcRescan = -1
+               return
+       }
+
+       lock(&work.rescan.lock)
+       if work.rescan.list[gp.gcRescan].ptr() != gp {
+               throw("bad dequeueRescan")
+       }
+       // Careful: gp may itself be the last G on the list.
+       last := work.rescan.list[len(work.rescan.list)-1]
+       work.rescan.list[gp.gcRescan] = last
+       last.ptr().gcRescan = gp.gcRescan
+       gp.gcRescan = -1
+       work.rescan.list = work.rescan.list[:len(work.rescan.list)-1]
+       unlock(&work.rescan.lock)
+}
+
  type gcDrainFlags int
  
  const (
@@ -1140,14 +1271,21 @@ func gcDumpObject(label string, obj, off uintptr) {
         }
  }
  
-// If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
+// gcmarknewobject marks a newly allocated object black. obj must
+// not contain any non-nil pointers.
+//
+// This is nosplit so it can manipulate a gcWork without preemption.
+//
  //go:nowritebarrier
-func gcmarknewobject_m(obj, size uintptr) {
+//go:nosplit
+func gcmarknewobject(obj, size, scanSize uintptr) {
         if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
                 throw("gcmarknewobject called while doing checkmark")
         }
         markBitsForAddr(obj).setMarked()
-       atomic.Xadd64(&work.bytesMarked, int64(size))
+       gcw := &getg().m.p.ptr().gcw
+       gcw.bytesMarked += uint64(size)
+       gcw.scanWork += int64(scanSize)
  }
  
  // Checkmarking