runtime: make it harder to introduce deadlocks with forEachP

author Michael Anthony Knyszek <mknyszek@google.com>

Fri, 6 Oct 2023 15:07:28 +0000 (15:07 +0000)

committer Gopher Robot <gobot@golang.org>

Thu, 9 Nov 2023 22:35:07 +0000 (22:35 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Fri, 6 Oct 2023 15:07:28 +0000 (15:07 +0000)
committer Gopher Robot <gobot@golang.org>
Thu, 9 Nov 2023 22:35:07 +0000 (22:35 +0000)
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 30d2f1d3852b4430c6ac2039e5a0f3d1dcf43ba3..d015d6dbabaa8035f28b5debad991c9da3840195 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -824,31 +824,22 @@ top:
  
         // Flush all local buffers and collect flushedWork flags.
         gcMarkDoneFlushed = 0
-       systemstack(func() {
-               gp := getg().m.curg
-               // Mark the user stack as preemptible so that it may be scanned.
-               // Otherwise, our attempt to force all P's to a safepoint could
-               // result in a deadlock as we attempt to preempt a worker that's
-               // trying to preempt us (e.g. for a stack scan).
-               casGToWaiting(gp, _Grunning, waitReasonGCMarkTermination)
-               forEachP(func(pp *p) {
-                       // Flush the write barrier buffer, since this may add
-                       // work to the gcWork.
-                       wbBufFlush1(pp)
-
-                       // Flush the gcWork, since this may create global work
-                       // and set the flushedWork flag.
-                       //
-                       // TODO(austin): Break up these workbufs to
-                       // better distribute work.
-                       pp.gcw.dispose()
-                       // Collect the flushedWork flag.
-                       if pp.gcw.flushedWork {
-                               atomic.Xadd(&gcMarkDoneFlushed, 1)
-                               pp.gcw.flushedWork = false
-                       }
-               })
-               casgstatus(gp, _Gwaiting, _Grunning)
+       forEachP(waitReasonGCMarkTermination, func(pp *p) {
+               // Flush the write barrier buffer, since this may add
+               // work to the gcWork.
+               wbBufFlush1(pp)
+
+               // Flush the gcWork, since this may create global work
+               // and set the flushedWork flag.
+               //
+               // TODO(austin): Break up these workbufs to
+               // better distribute work.
+               pp.gcw.dispose()
+               // Collect the flushedWork flag.
+               if pp.gcw.flushedWork {
+                       atomic.Xadd(&gcMarkDoneFlushed, 1)
+                       pp.gcw.flushedWork = false
+               }
         })
  
         if gcMarkDoneFlushed != 0 {
@@ -1116,18 +1107,16 @@ func gcMarkTermination() {
         //
         // Also, flush the pinner cache, to avoid leaking that memory
         // indefinitely.
-       systemstack(func() {
-               forEachP(func(pp *p) {
-                       pp.mcache.prepareForSweep()
-                       if pp.status == _Pidle {
-                               systemstack(func() {
-                                       lock(&mheap_.lock)
-                                       pp.pcache.flush(&mheap_.pages)
-                                       unlock(&mheap_.lock)
-                               })
-                       }
-                       pp.pinnerCache = nil
-               })
+       forEachP(waitReasonFlushProcCaches, func(pp *p) {
+               pp.mcache.prepareForSweep()
+               if pp.status == _Pidle {
+                       systemstack(func() {
+                               lock(&mheap_.lock)
+                               pp.pcache.flush(&mheap_.pages)
+                               unlock(&mheap_.lock)
+                       })
+               }
+               pp.pinnerCache = nil
         })
         if sl.valid {
                 // Now that we've swept stale spans in mcaches, they don't
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index ae2562a5b76caa70e5a6ca3b71eb53d1909f3c4b..159c19caf35dcacd8b81c659121d648b542daf06 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1820,10 +1820,35 @@ found:
  // fn will run on every CPU executing Go code, but it acts as a global
  // memory barrier. GC uses this as a "ragged barrier."
  //
-// The caller must hold worldsema.
+// The caller must hold worldsema. fn must not refer to any
+// part of the current goroutine's stack, since the GC may move it.
+func forEachP(reason waitReason, fn func(*p)) {
+       systemstack(func() {
+               gp := getg().m.curg
+               // Mark the user stack as preemptible so that it may be scanned.
+               // Otherwise, our attempt to force all P's to a safepoint could
+               // result in a deadlock as we attempt to preempt a worker that's
+               // trying to preempt us (e.g. for a stack scan).
+               //
+               // N.B. The execution tracer is not aware of this status
+               // transition and handles it specially based on the
+               // wait reason.
+               casGToWaiting(gp, _Grunning, reason)
+               forEachPInternal(fn)
+               casgstatus(gp, _Gwaiting, _Grunning)
+       })
+}
+
+// forEachPInternal calls fn(p) for every P p when p reaches a GC safe point.
+// It is the internal implementation of forEachP.
+//
+// The caller must hold worldsema and either must ensure that a GC is not
+// running (otherwise this may deadlock with the GC trying to preempt this P)
+// or it must leave its goroutine in a preemptible state before it switches
+// to the systemstack. Due to these restrictions, prefer forEachP when possible.
  //
  //go:systemstack
-func forEachP(fn func(*p)) {
+func forEachPInternal(fn func(*p)) {
         mp := acquirem()
         pp := getg().m.p.ptr()
  
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go

index 8bda2f733787de05add206db819de2ff1360bd32..e7a3d4ed1ba2e4857bcc43fca70e3b33f9e01582 100644 (file)
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -1112,6 +1112,7 @@ const (
         waitReasonDebugCall                               // "debug call"
         waitReasonGCMarkTermination                       // "GC mark termination"
         waitReasonStoppingTheWorld                        // "stopping the world"
+       waitReasonFlushProcCaches                         // "flushing proc caches"
  )
  
  var waitReasonStrings = [...]string{
@@ -1147,6 +1148,7 @@ var waitReasonStrings = [...]string{
         waitReasonDebugCall:             "debug call",
         waitReasonGCMarkTermination:     "GC mark termination",
         waitReasonStoppingTheWorld:      "stopping the world",
+       waitReasonFlushProcCaches:       "flushing proc caches",
  }
  
  func (w waitReason) String() string {
author	Michael Anthony Knyszek <mknyszek@google.com>
	Fri, 6 Oct 2023 15:07:28 +0000 (15:07 +0000)
committer	Gopher Robot <gobot@golang.org>
	Thu, 9 Nov 2023 22:35:07 +0000 (22:35 +0000)
src/runtime/mgc.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history
src/runtime/runtime2.go		patch \| blob \| history