1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 "internal/trace/v2/event"
12 "internal/trace/v2/event/go122"
13 "internal/trace/v2/version"
16 // ordering emulates Go scheduler state for both validation and
17 // for putting events in the right order.
18 type ordering struct {
19 gStates map[GoID]*gState
20 pStates map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
21 mStates map[ThreadID]*mState
22 activeTasks map[TaskID]taskState
28 // advance checks if it's valid to proceed with ev which came from thread m.
30 // Returns the schedCtx at the point of the event, whether it's OK to advance
31 // with this event, and any error encountered in validation.
33 // It assumes the gen value passed to it is monotonically increasing across calls.
35 // If any error is returned, then the trace is broken and trace parsing must cease.
36 // If it's not valid to advance with ev, but no error was encountered, the caller
37 // should attempt to advance with other candidate events from other threads. If the
38 // caller runs out of candidates, the trace is invalid.
39 func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (schedCtx, bool, error) {
40 if o.initialGen == 0 {
41 // Set the initial gen if necessary.
45 var curCtx, newCtx schedCtx
51 curCtx.G = NoGoroutine
54 // Pull out or create the mState for this event.
55 ms, ok := o.mStates[m]
67 // Update the mState for this event.
73 switch typ := ev.typ; typ {
75 case go122.EvProcStatus:
76 pid := ProcID(ev.args[0])
77 status := go122.ProcStatus(ev.args[1])
78 oldState := go122ProcStatus2ProcState[status]
79 if s, ok := o.pStates[pid]; ok {
80 if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall {
81 // ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
82 // potential loss of information, but if we're already in ProcSyscall,
83 // we haven't lost the relevant information. Promote the status and advance.
84 oldState = ProcRunning
85 ev.args[1] = uint64(go122.ProcSyscall)
86 } else if s.status != status {
87 return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
89 s.seq = makeSeq(gen, 0) // Reset seq.
91 o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
92 if gen == o.initialGen {
93 oldState = ProcUndetermined
95 oldState = ProcNotExist
98 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
100 // Bind the proc to the new context, if it's running.
101 if status == go122.ProcRunning || status == go122.ProcSyscall {
104 // Set the current context to the state of the M current running this G. Otherwise
105 // we'll emit a Running -> Running event that doesn't correspond to the right M.
106 if status == go122.ProcSyscallAbandoned && oldState != ProcUndetermined {
107 // N.B. This is slow but it should be fairly rare.
109 for mid, ms := range o.mStates {
118 return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
121 return curCtx, true, nil
122 case go122.EvProcStart:
123 pid := ProcID(ev.args[0])
124 seq := makeSeq(gen, ev.args[1])
126 // Try to advance. We might fail here due to sequencing, because the P hasn't
127 // had a status emitted, or because we already have a P and we're in a syscall,
128 // and we haven't observed that it was stolen from us yet.
129 state, ok := o.pStates[pid]
130 if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
131 // We can't make an inference as to whether this is bad. We could just be seeing
132 // a ProcStart on a different M before the proc's state was emitted, or before we
133 // got to the right point in the trace.
135 // Note that we also don't advance here if we have a P and we're in a syscall.
136 return curCtx, false, nil
138 // We can advance this P. Check some invariants.
140 // We might have a goroutine if a goroutine is exiting a syscall.
141 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave}
142 if err := validateCtx(curCtx, reqs); err != nil {
143 return curCtx, false, err
145 state.status = go122.ProcRunning
148 return curCtx, true, nil
149 case go122.EvProcStop:
150 // We must be able to advance this P.
152 // There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
153 // is stopped by the same M that started it, while ProcSteal is used when another M
154 // steals the P by stopping it from a distance.
156 // Since a P is bound to an M, and we're stopping on the same M we started, it must
157 // always be possible to advance the current M's P from a ProcStop. This is also why
158 // ProcStop doesn't need a sequence number.
159 state, ok := o.pStates[curCtx.P]
161 return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(typ), curCtx.P)
163 if state.status != go122.ProcRunning && state.status != go122.ProcSyscall {
164 return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(typ), go122.ProcRunning, go122.ProcSyscall)
166 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
167 if err := validateCtx(curCtx, reqs); err != nil {
168 return curCtx, false, err
170 state.status = go122.ProcIdle
172 return curCtx, true, nil
173 case go122.EvProcSteal:
174 pid := ProcID(ev.args[0])
175 seq := makeSeq(gen, ev.args[1])
176 state, ok := o.pStates[pid]
177 if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
178 // We can't make an inference as to whether this is bad. We could just be seeing
179 // a ProcStart on a different M before the proc's state was emitted, or before we
180 // got to the right point in the trace.
181 return curCtx, false, nil
183 // We can advance this P. Check some invariants.
184 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}
185 if err := validateCtx(curCtx, reqs); err != nil {
186 return curCtx, false, err
188 // Smuggle in the P state that let us advance so we can surface information to the event.
189 // Specifically, we need to make sure that the event is interpreted not as a transition of
190 // ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
192 // ProcRunning is binding, but we may be running with a P on the current M and we can't
193 // bind another P. This P is about to go ProcIdle anyway.
194 oldStatus := state.status
195 ev.extra(version.Go122)[0] = uint64(oldStatus)
197 // Update the P's status and sequence number.
198 state.status = go122.ProcIdle
201 // If we've lost information then don't try to do anything with the M.
202 // It may have moved on and we can't be sure.
203 if oldStatus == go122.ProcSyscallAbandoned {
204 return curCtx, true, nil
207 // Validate that the M we're stealing from is what we expect.
208 mid := ThreadID(ev.args[2]) // The M we're stealing from.
209 mState, ok := o.mStates[mid]
211 return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
214 // Make sure we're actually stealing the right P.
216 return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
219 // Tell the M it has no P so it can proceed.
221 // This is safe because we know the P was in a syscall and
222 // the other M must be trying to get out of the syscall.
223 // GoSyscallEndBlocked cannot advance until the corresponding
226 return curCtx, true, nil
228 // Handle goroutines.
229 case go122.EvGoStatus:
230 gid := GoID(ev.args[0])
231 mid := ThreadID(ev.args[1])
232 status := go122.GoStatus(ev.args[2])
233 oldState := go122GoStatus2GoState[status]
234 if s, ok := o.gStates[gid]; ok {
235 if s.status != status {
236 return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
238 s.seq = makeSeq(gen, 0) // Reset seq.
239 } else if gen == o.initialGen {
241 o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
242 oldState = GoUndetermined
244 return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
246 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
249 case go122.GoRunning:
250 // Bind the goroutine to the new context, since it's running.
252 case go122.GoSyscall:
254 return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
256 // Is the syscall on this thread? If so, bind it to the context.
257 // Otherwise, we're talking about a G sitting in a syscall on an M.
258 // Validate the named M.
263 // Now we're talking about a thread and goroutine that have been
264 // blocked on a syscall for the entire generation. This case must
265 // not have a P; the runtime makes sure that all Ps are traced at
266 // the beginning of a generation, which involves taking a P back
267 // from every thread.
268 ms, ok := o.mStates[mid]
270 // This M has been seen. That means we must have seen this
271 // goroutine go into a syscall on this thread at some point.
273 // But the G on the M doesn't match. Something's wrong.
274 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
276 // This case is just a Syscall->Syscall event, which needs to
277 // appear as having the G currently bound to this M.
280 // The M hasn't been seen yet. That means this goroutine
281 // has just been sitting in a syscall on this M. Create
283 o.mStates[mid] = &mState{g: gid, p: NoProc}
284 // Don't set curCtx.G in this case because this event is the
285 // binding event (and curCtx represents the "before" state).
287 // Update the current context to the M we're talking about.
290 return curCtx, true, nil
291 case go122.EvGoCreate:
292 // Goroutines must be created on a running P, but may or may not be created
293 // by a running goroutine.
294 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
295 if err := validateCtx(curCtx, reqs); err != nil {
296 return curCtx, false, err
298 // If we have a goroutine, it must be running.
299 if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning {
300 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
302 // This goroutine created another. Add a state for it.
303 newgid := GoID(ev.args[0])
304 if _, ok := o.gStates[newgid]; ok {
305 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
307 o.gStates[newgid] = &gState{id: newgid, status: go122.GoRunnable, seq: makeSeq(gen, 0)}
308 return curCtx, true, nil
309 case go122.EvGoDestroy, go122.EvGoStop, go122.EvGoBlock:
310 // These are goroutine events that all require an active running
311 // goroutine on some thread. They must *always* be advance-able,
312 // since running goroutines are bound to their M.
313 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
314 return curCtx, false, err
316 state, ok := o.gStates[curCtx.G]
318 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
320 if state.status != go122.GoRunning {
321 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
323 // Handle each case slightly differently; we just group them together
324 // because they have shared preconditions.
326 case go122.EvGoDestroy:
327 // This goroutine is exiting itself.
328 delete(o.gStates, curCtx.G)
329 newCtx.G = NoGoroutine
331 // Goroutine stopped (yielded). It's runnable but not running on this M.
332 state.status = go122.GoRunnable
333 newCtx.G = NoGoroutine
334 case go122.EvGoBlock:
335 // Goroutine blocked. It's waiting now and not running on this M.
336 state.status = go122.GoWaiting
337 newCtx.G = NoGoroutine
339 return curCtx, true, nil
340 case go122.EvGoStart:
341 gid := GoID(ev.args[0])
342 seq := makeSeq(gen, ev.args[1])
343 state, ok := o.gStates[gid]
344 if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) {
345 // We can't make an inference as to whether this is bad. We could just be seeing
346 // a GoStart on a different M before the goroutine was created, before it had its
347 // state emitted, or before we got to the right point in the trace yet.
348 return curCtx, false, nil
350 // We can advance this goroutine. Check some invariants.
351 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave}
352 if err := validateCtx(curCtx, reqs); err != nil {
353 return curCtx, false, err
355 state.status = go122.GoRunning
358 return curCtx, true, nil
359 case go122.EvGoUnblock:
360 // N.B. These both reference the goroutine to unblock, not the current goroutine.
361 gid := GoID(ev.args[0])
362 seq := makeSeq(gen, ev.args[1])
363 state, ok := o.gStates[gid]
364 if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) {
365 // We can't make an inference as to whether this is bad. We could just be seeing
366 // a GoUnblock on a different M before the goroutine was created and blocked itself,
367 // before it had its state emitted, or before we got to the right point in the trace yet.
368 return curCtx, false, nil
370 state.status = go122.GoRunnable
372 // N.B. No context to validate. Basically anything can unblock
373 // a goroutine (e.g. sysmon).
374 return curCtx, true, nil
375 case go122.EvGoSyscallBegin:
376 // Entering a syscall requires an active running goroutine with a
377 // proc on some thread. It is always advancable.
378 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
379 return curCtx, false, err
381 state, ok := o.gStates[curCtx.G]
383 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
385 if state.status != go122.GoRunning {
386 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
388 // Goroutine entered a syscall. It's still running on this P and M.
389 state.status = go122.GoSyscall
390 pState, ok := o.pStates[curCtx.P]
392 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
394 pState.status = go122.ProcSyscall
395 // Validate the P sequence number on the event and advance it.
397 // We have a P sequence number for what is supposed to be a goroutine event
398 // so that we can correctly model P stealing. Without this sequence number here,
399 // the syscall from which a ProcSteal event is stealing can be ambiguous in the
400 // face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
403 // Note that because this sequence number only exists as a tool for disambiguation,
404 // we can enforce that we have the right sequence number at this point; we don't need
405 // to back off and see if any other events will advance. This is a running P.
406 pSeq := makeSeq(gen, ev.args[0])
407 if !pSeq.succeeds(pState.seq) {
408 return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(typ), pState.seq, pSeq)
411 return curCtx, true, nil
412 case go122.EvGoSyscallEnd:
413 // This event is always advance-able because it happens on the same
414 // thread that EvGoSyscallStart happened, and the goroutine can't leave
415 // that thread until its done.
416 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
417 return curCtx, false, err
419 state, ok := o.gStates[curCtx.G]
421 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
423 if state.status != go122.GoSyscall {
424 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
426 state.status = go122.GoRunning
428 // Transfer the P back to running from syscall.
429 pState, ok := o.pStates[curCtx.P]
431 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
433 if pState.status != go122.ProcSyscall {
434 return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
436 pState.status = go122.ProcRunning
437 return curCtx, true, nil
438 case go122.EvGoSyscallEndBlocked:
439 // This event becomes advanceable when its P is not in a syscall state
440 // (lack of a P altogether is also acceptable for advancing).
441 // The transfer out of ProcSyscall can happen either voluntarily via
442 // ProcStop or involuntarily via ProcSteal. We may also acquire a new P
443 // before we get here (after the transfer out) but that's OK: that new
444 // P won't be in the ProcSyscall state anymore.
446 // Basically: while we have a preemptible P, don't advance, because we
447 // *know* from the event that we're going to lose it at some point during
448 // the syscall. We shouldn't advance until that happens.
449 if curCtx.P != NoProc {
450 pState, ok := o.pStates[curCtx.P]
452 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
454 if pState.status == go122.ProcSyscall {
455 return curCtx, false, nil
458 // As mentioned above, we may have a P here if we ProcStart
459 // before this event.
460 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
461 return curCtx, false, err
463 state, ok := o.gStates[curCtx.G]
465 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
467 if state.status != go122.GoSyscall {
468 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
470 newCtx.G = NoGoroutine
471 state.status = go122.GoRunnable
472 return curCtx, true, nil
473 case go122.EvGoCreateSyscall:
474 // This event indicates that a goroutine is effectively
475 // being created out of a cgo callback. Such a goroutine
476 // is 'created' in the syscall state.
477 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MustNotHave}); err != nil {
478 return curCtx, false, err
480 // This goroutine is effectively being created. Add a state for it.
481 newgid := GoID(ev.args[0])
482 if _, ok := o.gStates[newgid]; ok {
483 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
485 o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)}
486 // Goroutine is executing. Bind it to the context.
488 return curCtx, true, nil
489 case go122.EvGoDestroySyscall:
490 // This event indicates that a goroutine created for a
491 // cgo callback is disappearing, either because the callback
492 // ending or the C thread that called it is being destroyed.
494 // Note: we might have a P here. The P might not be released
495 // eagerly by the runtime, and it might get stolen back later
496 // (or never again, if the program is going to exit).
497 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
498 return curCtx, false, err
500 // Check to make sure the goroutine exists in the right state.
501 state, ok := o.gStates[curCtx.G]
503 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
505 if state.status != go122.GoSyscall {
506 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(typ), GoSyscall)
508 // This goroutine is exiting itself.
509 delete(o.gStates, curCtx.G)
510 newCtx.G = NoGoroutine
511 return curCtx, true, nil
513 // Handle tasks. Tasks are interesting because:
514 // - There's no Begin event required to reference a task.
515 // - End for a particular task ID can appear multiple times.
516 // As a result, there's very little to validate. The only
517 // thing we have to be sure of is that a task didn't begin
518 // after it had already begun. Task IDs are allowed to be
519 // reused, so we don't care about a Begin after an End.
520 case go122.EvUserTaskBegin:
521 id := TaskID(ev.args[0])
522 if _, ok := o.activeTasks[id]; ok {
523 return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
525 // Get the parent ID, but don't validate it. There's no guarantee
526 // we actually have information on whether it's active.
527 parentID := TaskID(ev.args[1])
529 // Validate the name and record it. We'll need to pass it through to
531 nameID := stringID(ev.args[2])
532 name, ok := evt.strings.get(nameID)
534 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
536 o.activeTasks[id] = taskState{name: name, parentID: parentID}
537 return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
538 case go122.EvUserTaskEnd:
539 id := TaskID(ev.args[0])
540 if ts, ok := o.activeTasks[id]; ok {
541 // Smuggle the task info. This may happen in a different generation,
542 // which may not have the name in its string table. Add it to the extra
543 // strings table so we can look it up later.
544 ev.extra(version.Go122)[0] = uint64(ts.parentID)
545 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
546 delete(o.activeTasks, id)
548 // Explicitly clear the task info.
549 ev.extra(version.Go122)[0] = uint64(NoTask)
550 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
552 return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
554 // Handle user regions.
555 case go122.EvUserRegionBegin:
556 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
557 return curCtx, false, err
559 tid := TaskID(ev.args[0])
560 nameID := stringID(ev.args[1])
561 name, ok := evt.strings.get(nameID)
563 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
565 if err := o.gStates[curCtx.G].beginRegion(userRegion{tid, name}); err != nil {
566 return curCtx, false, err
568 return curCtx, true, nil
569 case go122.EvUserRegionEnd:
570 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
571 return curCtx, false, err
573 tid := TaskID(ev.args[0])
574 nameID := stringID(ev.args[1])
575 name, ok := evt.strings.get(nameID)
577 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
579 if err := o.gStates[curCtx.G].endRegion(userRegion{tid, name}); err != nil {
580 return curCtx, false, err
582 return curCtx, true, nil
584 // Handle the GC mark phase.
586 // We have sequence numbers for both start and end because they
587 // can happen on completely different threads. We want an explicit
588 // partial order edge between start and end here, otherwise we're
589 // relying entirely on timestamps to make sure we don't advance a
590 // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
591 case go122.EvGCActive:
593 if gen == o.initialGen {
594 if o.gcState != gcUndetermined {
595 return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
598 o.gcState = gcRunning
599 return curCtx, true, nil
601 if seq != o.gcSeq+1 {
602 // This is not the right GC cycle.
603 return curCtx, false, nil
605 if o.gcState != gcRunning {
606 return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
609 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
610 return curCtx, false, err
612 return curCtx, true, nil
613 case go122.EvGCBegin:
615 if o.gcState == gcUndetermined {
617 o.gcState = gcRunning
618 return curCtx, true, nil
620 if seq != o.gcSeq+1 {
621 // This is not the right GC cycle.
622 return curCtx, false, nil
624 if o.gcState == gcRunning {
625 return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
628 o.gcState = gcRunning
629 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
630 return curCtx, false, err
632 return curCtx, true, nil
635 if seq != o.gcSeq+1 {
636 // This is not the right GC cycle.
637 return curCtx, false, nil
639 if o.gcState == gcNotRunning {
640 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
642 if o.gcState == gcUndetermined {
643 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
646 o.gcState = gcNotRunning
647 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
648 return curCtx, false, err
650 return curCtx, true, nil
652 // Handle simple instantaneous events that require a G.
653 case go122.EvGoLabel, go122.EvProcsChange, go122.EvUserLog:
654 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
655 return curCtx, false, err
657 return curCtx, true, nil
659 // Handle allocation states, which don't require a G.
660 case go122.EvHeapAlloc, go122.EvHeapGoal:
661 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
662 return curCtx, false, err
664 return curCtx, true, nil
666 // Handle sweep, which is bound to a P and doesn't require a G.
667 case go122.EvGCSweepBegin:
668 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
669 return curCtx, false, err
671 if err := o.pStates[curCtx.P].beginRange(makeRangeType(typ, 0)); err != nil {
672 return curCtx, false, err
674 return curCtx, true, nil
675 case go122.EvGCSweepActive:
676 pid := ProcID(ev.args[0])
677 // N.B. In practice Ps can't block while they're sweeping, so this can only
678 // ever reference curCtx.P. However, be lenient about this like we are with
679 // GCMarkAssistActive; there's no reason the runtime couldn't change to block
680 // in the middle of a sweep.
681 if err := o.pStates[pid].activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
682 return curCtx, false, err
684 return curCtx, true, nil
685 case go122.EvGCSweepEnd:
686 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
687 return curCtx, false, err
689 _, err := o.pStates[curCtx.P].endRange(typ)
691 return curCtx, false, err
693 return curCtx, true, nil
695 // Handle special goroutine-bound event ranges.
696 case go122.EvSTWBegin, go122.EvGCMarkAssistBegin:
697 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
698 return curCtx, false, err
701 if typ == go122.EvSTWBegin {
702 desc = stringID(ev.args[0])
704 if err := o.gStates[curCtx.G].beginRange(makeRangeType(typ, desc)); err != nil {
705 return curCtx, false, err
707 return curCtx, true, nil
708 case go122.EvGCMarkAssistActive:
709 gid := GoID(ev.args[0])
710 // N.B. Like GoStatus, this can happen at any time, because it can
711 // reference a non-running goroutine. Don't check anything about the
712 // current scheduler context.
713 if err := o.gStates[gid].activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
714 return curCtx, false, err
716 return curCtx, true, nil
717 case go122.EvSTWEnd, go122.EvGCMarkAssistEnd:
718 if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
719 return curCtx, false, err
721 desc, err := o.gStates[curCtx.G].endRange(typ)
723 return curCtx, false, err
725 if typ == go122.EvSTWEnd {
726 // Smuggle the kind into the event.
727 // Don't use ev.extra here so we have symmetry with STWBegin.
728 ev.args[0] = uint64(desc)
730 return curCtx, true, nil
732 return curCtx, false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
735 // schedCtx represents the scheduling resources associated with an event.
736 type schedCtx struct {
742 // validateCtx ensures that ctx conforms to some reqs, returning an error if
744 func validateCtx(ctx schedCtx, reqs event.SchedReqs) error {
745 // Check thread requirements.
746 if reqs.Thread == event.MustHave && ctx.M == NoThread {
747 return fmt.Errorf("expected a thread but didn't have one")
748 } else if reqs.Thread == event.MustNotHave && ctx.M != NoThread {
749 return fmt.Errorf("expected no thread but had one")
752 // Check proc requirements.
753 if reqs.Proc == event.MustHave && ctx.P == NoProc {
754 return fmt.Errorf("expected a proc but didn't have one")
755 } else if reqs.Proc == event.MustNotHave && ctx.P != NoProc {
756 return fmt.Errorf("expected no proc but had one")
759 // Check goroutine requirements.
760 if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine {
761 return fmt.Errorf("expected a goroutine but didn't have one")
762 } else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine {
763 return fmt.Errorf("expected no goroutine but had one")
768 // gcState is a trinary variable for the current state of the GC.
770 // The third state besides "enabled" and "disabled" is "undetermined."
774 gcUndetermined gcState = iota
779 // String returns a human-readable string for the GC state.
780 func (s gcState) String() string {
783 return "Undetermined"
792 // userRegion represents a unique user region when attached to some gState.
793 type userRegion struct {
794 // name must be a resolved string because the string ID for the same
795 // string may change across generations, but we care about checking
801 // rangeType is a way to classify special ranges of time.
803 // These typically correspond 1:1 with "Begin" events, but
804 // they may have an optional subtype that describes the range
806 type rangeType struct {
807 typ event.Type // "Begin" event.
808 desc stringID // Optional subtype.
811 // makeRangeType constructs a new rangeType.
812 func makeRangeType(typ event.Type, desc stringID) rangeType {
813 if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone {
816 return rangeType{typ, desc}
819 // gState is the state of a goroutine at a point in the trace.
822 status go122.GoStatus
825 // regions are the active user regions for this goroutine.
828 // rangeState is the state of special time ranges bound to this goroutine.
832 // beginRegion starts a user region on the goroutine.
833 func (s *gState) beginRegion(r userRegion) error {
834 s.regions = append(s.regions, r)
838 // endRegion ends a user region on the goroutine.
839 func (s *gState) endRegion(r userRegion) error {
840 if next := s.regions[len(s.regions)-1]; next != r {
841 return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
843 s.regions = s.regions[:len(s.regions)-1]
847 // pState is the state of a proc at a point in the trace.
850 status go122.ProcStatus
853 // rangeState is the state of special time ranges bound to this proc.
857 // mState is the state of a thread at a point in the trace.
859 g GoID // Goroutine bound to this M. (The goroutine's state is Executing.)
860 p ProcID // Proc bound to this M. (The proc's state is Executing.)
863 // rangeState represents the state of special time ranges.
864 type rangeState struct {
865 // inFlight contains the rangeTypes of any ranges bound to a resource.
869 // beginRange begins a special range in time on the goroutine.
871 // Returns an error if the range is already in progress.
872 func (s *rangeState) beginRange(typ rangeType) error {
874 return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name)
876 s.inFlight = append(s.inFlight, typ)
880 // activeRange marks special range in time on the goroutine as active in the
881 // initial generation, or confirms that it is indeed active in later generations.
882 func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
885 return fmt.Errorf("found named active range already in first gen: %v", typ)
887 s.inFlight = append(s.inFlight, typ)
888 } else if !s.hasRange(typ) {
889 return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight)
894 // hasRange returns true if a special time range on the goroutine as in progress.
895 func (s *rangeState) hasRange(typ rangeType) bool {
896 for _, ftyp := range s.inFlight {
904 // endsRange ends a special range in time on the goroutine.
906 // This must line up with the start event type of the range the goroutine is currently in.
907 func (s *rangeState) endRange(typ event.Type) (stringID, error) {
908 st := go122.Specs()[typ].StartEv
910 for i, r := range s.inFlight {
917 return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name)
920 desc := s.inFlight[idx].desc
921 s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
922 s.inFlight = s.inFlight[:len(s.inFlight)-1]
926 // seqCounter represents a global sequence counter for a resource.
927 type seqCounter struct {
928 gen uint64 // The generation for the local sequence counter seq.
929 seq uint64 // The sequence number local to the generation.
932 // makeSeq creates a new seqCounter.
933 func makeSeq(gen, seq uint64) seqCounter {
934 return seqCounter{gen: gen, seq: seq}
937 // succeeds returns true if a is the immediate successor of b.
938 func (a seqCounter) succeeds(b seqCounter) bool {
939 return a.gen == b.gen && a.seq == b.seq+1
942 // String returns a debug string representation of the seqCounter.
943 func (c seqCounter) String() string {
944 return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
947 func dumpOrdering(order *ordering) string {
948 var sb strings.Builder
949 for id, state := range order.gStates {
950 fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
953 for id, state := range order.pStates {
954 fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
957 for id, state := range order.mStates {
958 fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
961 fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
965 // taskState represents an active task.
966 type taskState struct {
967 // name is the type of the active task.
970 // parentID is the parent ID of the active task.