]> Cypherpunks.ru repositories - gostls13.git/commitdiff
[release-branch.go1.22] runtime: traceAcquire and traceRelease across all P steals
authorMichael Anthony Knyszek <mknyszek@google.com>
Thu, 1 Feb 2024 05:32:03 +0000 (05:32 +0000)
committerGopher Robot <gobot@golang.org>
Thu, 1 Feb 2024 22:54:02 +0000 (22:54 +0000)
Currently there are a few places where a P can get stolen where the
runtime doesn't traceAcquire and traceRelease across the steal itself.
What can happen then is the following scenario:
- Thread 1 enters a syscall and writes an event about it.
- Thread 2 steals Thread 1's P.
- Thread 1 exits the syscall and writes one or more events about it.
- Tracing ends (trace.gen is set to 0).
- Thread 2 checks to see if it should write an event for the P it just
  stole, sees that tracing is disabled, and doesn't.

This results in broken traces, because there's a missing ProcSteal
event. The parser always waits for a ProcSteal to advance a
GoSyscallEndBlocked event, and in this case, it never comes.

Fixes #65181.

Change-Id: I437629499bb7669bf7fe2fc6fc4f64c53002916b
Reviewed-on: https://go-review.googlesource.com/c/go/+/560235
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
(cherry picked from commit c9d88ea2aa628cae224335c49f256e13adfce337)
Reviewed-on: https://go-review.googlesource.com/c/go/+/559958
Auto-Submit: Michael Knyszek <mknyszek@google.com>

src/runtime/proc.go

index c2676c43b2c447653dd16f84218b39ec31dac65d..061673150f5d876935fc7e1db5973f18c2e66e5a 100644 (file)
@@ -4404,8 +4404,8 @@ func entersyscall_gcwait() {
        pp := gp.m.oldp.ptr()
 
        lock(&sched.lock)
+       trace := traceAcquire()
        if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
-               trace := traceAcquire()
                if trace.ok() {
                        if goexperiment.ExecTracer2 {
                                // This is a steal in the new tracer. While it's very likely
@@ -4428,6 +4428,8 @@ func entersyscall_gcwait() {
                if sched.stopwait--; sched.stopwait == 0 {
                        notewakeup(&sched.stopnote)
                }
+       } else if trace.ok() {
+               traceRelease(trace)
        }
        unlock(&sched.lock)
 }
@@ -4605,12 +4607,19 @@ func exitsyscallfast(oldp *p) bool {
        }
 
        // Try to re-acquire the last P.
+       trace := traceAcquire()
        if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
                // There's a cpu for us, so we can run.
                wirep(oldp)
-               exitsyscallfast_reacquired()
+               exitsyscallfast_reacquired(trace)
+               if trace.ok() {
+                       traceRelease(trace)
+               }
                return true
        }
+       if trace.ok() {
+               traceRelease(trace)
+       }
 
        // Try to get any other idle P.
        if sched.pidle != 0 {
@@ -4646,10 +4655,9 @@ func exitsyscallfast(oldp *p) bool {
 // syscall.
 //
 //go:nosplit
-func exitsyscallfast_reacquired() {
+func exitsyscallfast_reacquired(trace traceLocker) {
        gp := getg()
        if gp.m.syscalltick != gp.m.p.ptr().syscalltick {
-               trace := traceAcquire()
                if trace.ok() {
                        // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed).
                        // traceGoSysBlock for this syscall was already emitted,
@@ -4666,7 +4674,6 @@ func exitsyscallfast_reacquired() {
                                        // Denote completion of the current syscall.
                                        trace.GoSysExit(true)
                                }
-                               traceRelease(trace)
                        })
                }
                gp.m.p.ptr().syscalltick++
@@ -6146,8 +6153,8 @@ func retake(now int64) uint32 {
                        // Otherwise the M from which we retake can exit the syscall,
                        // increment nmidle and report deadlock.
                        incidlelocked(-1)
+                       trace := traceAcquire()
                        if atomic.Cas(&pp.status, s, _Pidle) {
-                               trace := traceAcquire()
                                if trace.ok() {
                                        trace.GoSysBlock(pp)
                                        trace.ProcSteal(pp, false)
@@ -6156,6 +6163,8 @@ func retake(now int64) uint32 {
                                n++
                                pp.syscalltick++
                                handoffp(pp)
+                       } else if trace.ok() {
+                               traceRelease(trace)
                        }
                        incidlelocked(1)
                        lock(&allpLock)