src/runtime/trace.go

   1 // Copyright 2014 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Go execution tracer.
   6 // The tracer captures a wide range of execution events like goroutine
   7 // creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
   8 // changes of heap size, processor start/stop, etc and writes them to a buffer
   9 // in a compact form. A precise nanosecond-precision timestamp and a stack
  10 // trace is captured for most events.
  11 // See https://golang.org/s/go15trace for more info.
  12
  13 package runtime
  14
  15 import (
  16         "internal/abi"
  17         "internal/goarch"
  18         "internal/goos"
  19         "runtime/internal/atomic"
  20         "runtime/internal/sys"
  21         "unsafe"
  22 )
  23
  24 // Event types in the trace, args are given in square brackets.
  25 const (
  26         traceEvNone              = 0  // unused
  27         traceEvBatch             = 1  // start of per-P batch of events [pid, timestamp]
  28         traceEvFrequency         = 2  // contains tracer timer frequency [frequency (ticks per second)]
  29         traceEvStack             = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
  30         traceEvGomaxprocs        = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
  31         traceEvProcStart         = 5  // start of P [timestamp, thread id]
  32         traceEvProcStop          = 6  // stop of P [timestamp]
  33         traceEvGCStart           = 7  // GC start [timestamp, seq, stack id]
  34         traceEvGCDone            = 8  // GC done [timestamp]
  35         traceEvSTWStart          = 9  // STW start [timestamp, kind]
  36         traceEvSTWDone           = 10 // STW done [timestamp]
  37         traceEvGCSweepStart      = 11 // GC sweep start [timestamp, stack id]
  38         traceEvGCSweepDone       = 12 // GC sweep done [timestamp, swept, reclaimed]
  39         traceEvGoCreate          = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
  40         traceEvGoStart           = 14 // goroutine starts running [timestamp, goroutine id, seq]
  41         traceEvGoEnd             = 15 // goroutine ends [timestamp]
  42         traceEvGoStop            = 16 // goroutine stops (like in select{}) [timestamp, stack]
  43         traceEvGoSched           = 17 // goroutine calls Gosched [timestamp, stack]
  44         traceEvGoPreempt         = 18 // goroutine is preempted [timestamp, stack]
  45         traceEvGoSleep           = 19 // goroutine calls Sleep [timestamp, stack]
  46         traceEvGoBlock           = 20 // goroutine blocks [timestamp, stack]
  47         traceEvGoUnblock         = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
  48         traceEvGoBlockSend       = 22 // goroutine blocks on chan send [timestamp, stack]
  49         traceEvGoBlockRecv       = 23 // goroutine blocks on chan recv [timestamp, stack]
  50         traceEvGoBlockSelect     = 24 // goroutine blocks on select [timestamp, stack]
  51         traceEvGoBlockSync       = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
  52         traceEvGoBlockCond       = 26 // goroutine blocks on Cond [timestamp, stack]
  53         traceEvGoBlockNet        = 27 // goroutine blocks on network [timestamp, stack]
  54         traceEvGoSysCall         = 28 // syscall enter [timestamp, stack]
  55         traceEvGoSysExit         = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
  56         traceEvGoSysBlock        = 30 // syscall blocks [timestamp]
  57         traceEvGoWaiting         = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
  58         traceEvGoInSyscall       = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
  59         traceEvHeapAlloc         = 33 // gcController.heapLive change [timestamp, heap_alloc]
  60         traceEvHeapGoal          = 34 // gcController.heapGoal() (formerly next_gc) change [timestamp, heap goal in bytes]
  61         traceEvTimerGoroutine    = 35 // not currently used; previously denoted timer goroutine [timer goroutine id]
  62         traceEvFutileWakeup      = 36 // not currently used; denotes that the previous wakeup of this goroutine was futile [timestamp]
  63         traceEvString            = 37 // string dictionary entry [ID, length, string]
  64         traceEvGoStartLocal      = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
  65         traceEvGoUnblockLocal    = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
  66         traceEvGoSysExitLocal    = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
  67         traceEvGoStartLabel      = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
  68         traceEvGoBlockGC         = 42 // goroutine blocks on GC assist [timestamp, stack]
  69         traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
  70         traceEvGCMarkAssistDone  = 44 // GC mark assist done [timestamp]
  71         traceEvUserTaskCreate    = 45 // trace.NewTask [timestamp, internal task id, internal parent task id, name string, stack]
  72         traceEvUserTaskEnd       = 46 // end of a task [timestamp, internal task id, stack]
  73         traceEvUserRegion        = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string, stack]
  74         traceEvUserLog           = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string]
  75         traceEvCPUSample         = 49 // CPU profiling sample [timestamp, real timestamp, real P id (-1 when absent), goroutine id, stack]
  76         traceEvCount             = 50
  77         // Byte is used but only 6 bits are available for event type.
  78         // The remaining 2 bits are used to specify the number of arguments.
  79         // That means, the max event type value is 63.
  80 )
  81
  82 // traceBlockReason is an enumeration of reasons a goroutine might block.
  83 // This is the interface the rest of the runtime uses to tell the
  84 // tracer why a goroutine blocked. The tracer then propagates this information
  85 // into the trace however it sees fit.
  86 //
  87 // Note that traceBlockReasons should not be compared, since reasons that are
  88 // distinct by name may *not* be distinct by value.
  89 type traceBlockReason uint8
  90
  91 // For maximal efficiency, just map the trace block reason directly to a trace
  92 // event.
  93 const (
  94         traceBlockGeneric         traceBlockReason = traceEvGoBlock
  95         traceBlockForever                          = traceEvGoStop
  96         traceBlockNet                              = traceEvGoBlockNet
  97         traceBlockSelect                           = traceEvGoBlockSelect
  98         traceBlockCondWait                         = traceEvGoBlockCond
  99         traceBlockSync                             = traceEvGoBlockSync
 100         traceBlockChanSend                         = traceEvGoBlockSend
 101         traceBlockChanRecv                         = traceEvGoBlockRecv
 102         traceBlockGCMarkAssist                     = traceEvGoBlockGC
 103         traceBlockGCSweep                          = traceEvGoBlock
 104         traceBlockSystemGoroutine                  = traceEvGoBlock
 105         traceBlockPreempted                        = traceEvGoBlock
 106         traceBlockDebugCall                        = traceEvGoBlock
 107         traceBlockUntilGCEnds                      = traceEvGoBlock
 108         traceBlockSleep                            = traceEvGoSleep
 109 )
 110
 111 const (
 112         // Timestamps in trace are cputicks/traceTickDiv.
 113         // This makes absolute values of timestamp diffs smaller,
 114         // and so they are encoded in less number of bytes.
 115         // 64 on x86 is somewhat arbitrary (one tick is ~20ns on a 3GHz machine).
 116         // The suggested increment frequency for PowerPC's time base register is
 117         // 512 MHz according to Power ISA v2.07 section 6.2, so we use 16 on ppc64
 118         // and ppc64le.
 119         traceTimeDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64)
 120         // Maximum number of PCs in a single stack trace.
 121         // Since events contain only stack id rather than whole stack trace,
 122         // we can allow quite large values here.
 123         traceStackSize = 128
 124         // Identifier of a fake P that is used when we trace without a real P.
 125         traceGlobProc = -1
 126         // Maximum number of bytes to encode uint64 in base-128.
 127         traceBytesPerNumber = 10
 128         // Shift of the number of arguments in the first event byte.
 129         traceArgCountShift = 6
 130 )
 131
 132 // trace is global tracing context.
 133 var trace struct {
 134         // trace.lock must only be acquired on the system stack where
 135         // stack splits cannot happen while it is held.
 136         lock          mutex       // protects the following members
 137         enabled       bool        // when set runtime traces events
 138         shutdown      bool        // set when we are waiting for trace reader to finish after setting enabled to false
 139         headerWritten bool        // whether ReadTrace has emitted trace header
 140         footerWritten bool        // whether ReadTrace has emitted trace footer
 141         shutdownSema  uint32      // used to wait for ReadTrace completion
 142         seqStart      uint64      // sequence number when tracing was started
 143         startTicks    int64       // cputicks when tracing was started
 144         endTicks      int64       // cputicks when tracing was stopped
 145         startNanotime int64       // nanotime when tracing was started
 146         endNanotime   int64       // nanotime when tracing was stopped
 147         startTime     traceTime   // traceClockNow when tracing started
 148         endTime       traceTime   // traceClockNow when tracing stopped
 149         seqGC         uint64      // GC start/done sequencer
 150         reading       traceBufPtr // buffer currently handed off to user
 151         empty         traceBufPtr // stack of empty buffers
 152         fullHead      traceBufPtr // queue of full buffers
 153         fullTail      traceBufPtr
 154         stackTab      traceStackTable // maps stack traces to unique ids
 155         // cpuLogRead accepts CPU profile samples from the signal handler where
 156         // they're generated. It uses a two-word header to hold the IDs of the P and
 157         // G (respectively) that were active at the time of the sample. Because
 158         // profBuf uses a record with all zeros in its header to indicate overflow,
 159         // we make sure to make the P field always non-zero: The ID of a real P will
 160         // start at bit 1, and bit 0 will be set. Samples that arrive while no P is
 161         // running (such as near syscalls) will set the first header field to 0b10.
 162         // This careful handling of the first header field allows us to store ID of
 163         // the active G directly in the second field, even though that will be 0
 164         // when sampling g0.
 165         cpuLogRead *profBuf
 166         // cpuLogBuf is a trace buffer to hold events corresponding to CPU profile
 167         // samples, which arrive out of band and not directly connected to a
 168         // specific P.
 169         cpuLogBuf traceBufPtr
 170
 171         reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil
 172
 173         signalLock  atomic.Uint32 // protects use of the following member, only usable in signal handlers
 174         cpuLogWrite *profBuf      // copy of cpuLogRead for use in signal handlers, set without signalLock
 175
 176         // Dictionary for traceEvString.
 177         //
 178         // TODO: central lock to access the map is not ideal.
 179         //   option: pre-assign ids to all user annotation region names and tags
 180         //   option: per-P cache
 181         //   option: sync.Map like data structure
 182         stringsLock mutex
 183         strings     map[string]uint64
 184         stringSeq   uint64
 185
 186         // markWorkerLabels maps gcMarkWorkerMode to string ID.
 187         markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64
 188
 189         bufLock mutex       // protects buf
 190         buf     traceBufPtr // global trace buffer, used when running without a p
 191 }
 192
 193 // gTraceState is per-G state for the tracer.
 194 type gTraceState struct {
 195         sysExitTime        traceTime // timestamp when syscall has returned
 196         tracedSyscallEnter bool      // syscall or cgo was entered while trace was enabled or StartTrace has emitted EvGoInSyscall about this goroutine
 197         seq                uint64    // trace event sequencer
 198         lastP              puintptr  // last P emitted an event for this goroutine
 199 }
 200
 201 // mTraceState is per-M state for the tracer.
 202 type mTraceState struct {
 203         startingTrace  bool // this M is in TraceStart, potentially before traceEnabled is true
 204         tracedSTWStart bool // this M traced a STW start, so it should trace an end
 205 }
 206
 207 // pTraceState is per-P state for the tracer.
 208 type pTraceState struct {
 209         buf traceBufPtr
 210
 211         // inSweep indicates the sweep events should be traced.
 212         // This is used to defer the sweep start event until a span
 213         // has actually been swept.
 214         inSweep bool
 215
 216         // swept and reclaimed track the number of bytes swept and reclaimed
 217         // by sweeping in the current sweep loop (while inSweep was true).
 218         swept, reclaimed uintptr
 219 }
 220
 221 // traceLockInit initializes global trace locks.
 222 func traceLockInit() {
 223         lockInit(&trace.bufLock, lockRankTraceBuf)
 224         lockInit(&trace.stringsLock, lockRankTraceStrings)
 225         lockInit(&trace.lock, lockRankTrace)
 226         lockInit(&trace.stackTab.lock, lockRankTraceStackTab)
 227 }
 228
 229 // traceBufHeader is per-P tracing buffer.
 230 type traceBufHeader struct {
 231         link     traceBufPtr             // in trace.empty/full
 232         lastTime traceTime               // when we wrote the last event
 233         pos      int                     // next write offset in arr
 234         stk      [traceStackSize]uintptr // scratch buffer for traceback
 235 }
 236
 237 // traceBuf is per-P tracing buffer.
 238 type traceBuf struct {
 239         _ sys.NotInHeap
 240         traceBufHeader
 241         arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
 242 }
 243
 244 // traceBufPtr is a *traceBuf that is not traced by the garbage
 245 // collector and doesn't have write barriers. traceBufs are not
 246 // allocated from the GC'd heap, so this is safe, and are often
 247 // manipulated in contexts where write barriers are not allowed, so
 248 // this is necessary.
 249 //
 250 // TODO: Since traceBuf is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.
 251 type traceBufPtr uintptr
 252
 253 func (tp traceBufPtr) ptr() *traceBuf   { return (*traceBuf)(unsafe.Pointer(tp)) }
 254 func (tp *traceBufPtr) set(b *traceBuf) { *tp = traceBufPtr(unsafe.Pointer(b)) }
 255 func traceBufPtrOf(b *traceBuf) traceBufPtr {
 256         return traceBufPtr(unsafe.Pointer(b))
 257 }
 258
 259 // traceEnabled returns true if the trace is currently enabled.
 260 //
 261 //go:nosplit
 262 func traceEnabled() bool {
 263         return trace.enabled
 264 }
 265
 266 // traceShuttingDown returns true if the trace is currently shutting down.
 267 //
 268 //go:nosplit
 269 func traceShuttingDown() bool {
 270         return trace.shutdown
 271 }
 272
 273 // StartTrace enables tracing for the current process.
 274 // While tracing, the data will be buffered and available via ReadTrace.
 275 // StartTrace returns an error if tracing is already enabled.
 276 // Most clients should use the runtime/trace package or the testing package's
 277 // -test.trace flag instead of calling StartTrace directly.
 278 func StartTrace() error {
 279         // Stop the world so that we can take a consistent snapshot
 280         // of all goroutines at the beginning of the trace.
 281         // Do not stop the world during GC so we ensure we always see
 282         // a consistent view of GC-related events (e.g. a start is always
 283         // paired with an end).
 284         stopTheWorldGC(stwStartTrace)
 285
 286         // Prevent sysmon from running any code that could generate events.
 287         lock(&sched.sysmonlock)
 288
 289         // We are in stop-the-world, but syscalls can finish and write to trace concurrently.
 290         // Exitsyscall could check trace.enabled long before and then suddenly wake up
 291         // and decide to write to trace at a random point in time.
 292         // However, such syscall will use the global trace.buf buffer, because we've
 293         // acquired all p's by doing stop-the-world. So this protects us from such races.
 294         lock(&trace.bufLock)
 295
 296         if trace.enabled || trace.shutdown {
 297                 unlock(&trace.bufLock)
 298                 unlock(&sched.sysmonlock)
 299                 startTheWorldGC()
 300                 return errorString("tracing is already enabled")
 301         }
 302
 303         // Can't set trace.enabled yet. While the world is stopped, exitsyscall could
 304         // already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
 305         // That would lead to an inconsistent trace:
 306         // - either GoSysExit appears before EvGoInSyscall,
 307         // - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below.
 308         // To instruct traceEvent that it must not ignore events below, we set trace.startingTrace.
 309         // trace.enabled is set afterwards once we have emitted all preliminary events.
 310         mp := getg().m
 311         mp.trace.startingTrace = true
 312
 313         // Obtain current stack ID to use in all traceEvGoCreate events below.
 314         stkBuf := make([]uintptr, traceStackSize)
 315         stackID := traceStackID(mp, stkBuf, 2)
 316
 317         profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid]
 318         trace.cpuLogRead = profBuf
 319
 320         // We must not acquire trace.signalLock outside of a signal handler: a
 321         // profiling signal may arrive at any time and try to acquire it, leading to
 322         // deadlock. Because we can't use that lock to protect updates to
 323         // trace.cpuLogWrite (only use of the structure it references), reads and
 324         // writes of the pointer must be atomic. (And although this field is never
 325         // the sole pointer to the profBuf value, it's best to allow a write barrier
 326         // here.)
 327         atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), unsafe.Pointer(profBuf))
 328
 329         // World is stopped, no need to lock.
 330         forEachGRace(func(gp *g) {
 331                 status := readgstatus(gp)
 332                 if status != _Gdead {
 333                         gp.trace.seq = 0
 334                         gp.trace.lastP = getg().m.p
 335                         // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
 336                         id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(gp.startpc) + sys.PCQuantum})
 337                         traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
 338                 }
 339                 if status == _Gwaiting {
 340                         // traceEvGoWaiting is implied to have seq=1.
 341                         gp.trace.seq++
 342                         traceEvent(traceEvGoWaiting, -1, gp.goid)
 343                 }
 344                 if status == _Gsyscall {
 345                         gp.trace.seq++
 346                         gp.trace.tracedSyscallEnter = true
 347                         traceEvent(traceEvGoInSyscall, -1, gp.goid)
 348                 } else if status == _Gdead && gp.m != nil && gp.m.isextra {
 349                         // Trigger two trace events for the dead g in the extra m,
 350                         // since the next event of the g will be traceEvGoSysExit in exitsyscall,
 351                         // while calling from C thread to Go.
 352                         gp.trace.seq = 0
 353                         gp.trace.lastP = getg().m.p
 354                         // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
 355                         id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(0) + sys.PCQuantum}) // no start pc
 356                         traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
 357                         gp.trace.seq++
 358                         gp.trace.tracedSyscallEnter = true
 359                         traceEvent(traceEvGoInSyscall, -1, gp.goid)
 360                 } else {
 361                         // We need to explicitly clear the flag. A previous trace might have ended with a goroutine
 362                         // not emitting a GoSysExit and clearing the flag, leaving it in a stale state. Clearing
 363                         // it here makes it unambiguous to any goroutine exiting a syscall racing with us that
 364                         // no EvGoInSyscall event was emitted for it. (It's not racy to set this flag here, because
 365                         // it'll only get checked when the goroutine runs again, which will be after the world starts
 366                         // again.)
 367                         gp.trace.tracedSyscallEnter = false
 368                 }
 369         })
 370         traceProcStart()
 371         traceGoStart()
 372         // Note: startTicks needs to be set after we emit traceEvGoInSyscall events.
 373         // If we do it the other way around, it is possible that exitsyscall will
 374         // query sysExitTime after startTicks but before traceEvGoInSyscall timestamp.
 375         // It will lead to a false conclusion that cputicks is broken.
 376         trace.startTime = traceClockNow()
 377         trace.startTicks = cputicks()
 378         trace.startNanotime = nanotime()
 379         trace.headerWritten = false
 380         trace.footerWritten = false
 381
 382         // string to id mapping
 383         //  0 : reserved for an empty string
 384         //  remaining: other strings registered by traceString
 385         trace.stringSeq = 0
 386         trace.strings = make(map[string]uint64)
 387
 388         trace.seqGC = 0
 389         mp.trace.startingTrace = false
 390         trace.enabled = true
 391
 392         // Register runtime goroutine labels.
 393         _, pid, bufp := traceAcquireBuffer()
 394         for i, label := range gcMarkWorkerModeStrings[:] {
 395                 trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label)
 396         }
 397         traceReleaseBuffer(mp, pid)
 398
 399         unlock(&trace.bufLock)
 400
 401         unlock(&sched.sysmonlock)
 402
 403         // Record the current state of HeapGoal to avoid information loss in trace.
 404         traceHeapGoal()
 405
 406         startTheWorldGC()
 407         return nil
 408 }
 409
 410 // StopTrace stops tracing, if it was previously enabled.
 411 // StopTrace only returns after all the reads for the trace have completed.
 412 func StopTrace() {
 413         // Stop the world so that we can collect the trace buffers from all p's below,
 414         // and also to avoid races with traceEvent.
 415         stopTheWorldGC(stwStopTrace)
 416
 417         // See the comment in StartTrace.
 418         lock(&sched.sysmonlock)
 419
 420         // See the comment in StartTrace.
 421         lock(&trace.bufLock)
 422
 423         if !trace.enabled {
 424                 unlock(&trace.bufLock)
 425                 unlock(&sched.sysmonlock)
 426                 startTheWorldGC()
 427                 return
 428         }
 429
 430         traceGoSched()
 431
 432         atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), nil)
 433         trace.cpuLogRead.close()
 434         traceReadCPU()
 435
 436         // Loop over all allocated Ps because dead Ps may still have
 437         // trace buffers.
 438         for _, p := range allp[:cap(allp)] {
 439                 buf := p.trace.buf
 440                 if buf != 0 {
 441                         traceFullQueue(buf)
 442                         p.trace.buf = 0
 443                 }
 444         }
 445         if trace.buf != 0 {
 446                 buf := trace.buf
 447                 trace.buf = 0
 448                 if buf.ptr().pos != 0 {
 449                         traceFullQueue(buf)
 450                 }
 451         }
 452         if trace.cpuLogBuf != 0 {
 453                 buf := trace.cpuLogBuf
 454                 trace.cpuLogBuf = 0
 455                 if buf.ptr().pos != 0 {
 456                         traceFullQueue(buf)
 457                 }
 458         }
 459
 460         // Wait for startNanotime != endNanotime. On Windows the default interval between
 461         // system clock ticks is typically between 1 and 15 milliseconds, which may not
 462         // have passed since the trace started. Without nanotime moving forward, trace
 463         // tooling has no way of identifying how much real time each cputicks time deltas
 464         // represent.
 465         for {
 466                 trace.endTime = traceClockNow()
 467                 trace.endTicks = cputicks()
 468                 trace.endNanotime = nanotime()
 469
 470                 if trace.endNanotime != trace.startNanotime || faketime != 0 {
 471                         break
 472                 }
 473                 osyield()
 474         }
 475
 476         trace.enabled = false
 477         trace.shutdown = true
 478         unlock(&trace.bufLock)
 479
 480         unlock(&sched.sysmonlock)
 481
 482         startTheWorldGC()
 483
 484         // The world is started but we've set trace.shutdown, so new tracing can't start.
 485         // Wait for the trace reader to flush pending buffers and stop.
 486         semacquire(&trace.shutdownSema)
 487         if raceenabled {
 488                 raceacquire(unsafe.Pointer(&trace.shutdownSema))
 489         }
 490
 491         systemstack(func() {
 492                 // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
 493                 lock(&trace.lock)
 494                 for _, p := range allp[:cap(allp)] {
 495                         if p.trace.buf != 0 {
 496                                 throw("trace: non-empty trace buffer in proc")
 497                         }
 498                 }
 499                 if trace.buf != 0 {
 500                         throw("trace: non-empty global trace buffer")
 501                 }
 502                 if trace.fullHead != 0 || trace.fullTail != 0 {
 503                         throw("trace: non-empty full trace buffer")
 504                 }
 505                 if trace.reading != 0 || trace.reader.Load() != nil {
 506                         throw("trace: reading after shutdown")
 507                 }
 508                 for trace.empty != 0 {
 509                         buf := trace.empty
 510                         trace.empty = buf.ptr().link
 511                         sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
 512                 }
 513                 trace.strings = nil
 514                 trace.shutdown = false
 515                 trace.cpuLogRead = nil
 516                 unlock(&trace.lock)
 517         })
 518 }
 519
 520 // ReadTrace returns the next chunk of binary tracing data, blocking until data
 521 // is available. If tracing is turned off and all the data accumulated while it
 522 // was on has been returned, ReadTrace returns nil. The caller must copy the
 523 // returned data before calling ReadTrace again.
 524 // ReadTrace must be called from one goroutine at a time.
 525 func ReadTrace() []byte {
 526 top:
 527         var buf []byte
 528         var park bool
 529         systemstack(func() {
 530                 buf, park = readTrace0()
 531         })
 532         if park {
 533                 gopark(func(gp *g, _ unsafe.Pointer) bool {
 534                         if !trace.reader.CompareAndSwapNoWB(nil, gp) {
 535                                 // We're racing with another reader.
 536                                 // Wake up and handle this case.
 537                                 return false
 538                         }
 539
 540                         if g2 := traceReader(); gp == g2 {
 541                                 // New data arrived between unlocking
 542                                 // and the CAS and we won the wake-up
 543                                 // race, so wake up directly.
 544                                 return false
 545                         } else if g2 != nil {
 546                                 printlock()
 547                                 println("runtime: got trace reader", g2, g2.goid)
 548                                 throw("unexpected trace reader")
 549                         }
 550
 551                         return true
 552                 }, nil, waitReasonTraceReaderBlocked, traceBlockSystemGoroutine, 2)
 553                 goto top
 554         }
 555
 556         return buf
 557 }
 558
 559 // readTrace0 is ReadTrace's continuation on g0. This must run on the
 560 // system stack because it acquires trace.lock.
 561 //
 562 //go:systemstack
 563 func readTrace0() (buf []byte, park bool) {
 564         if raceenabled {
 565                 // g0 doesn't have a race context. Borrow the user G's.
 566                 if getg().racectx != 0 {
 567                         throw("expected racectx == 0")
 568                 }
 569                 getg().racectx = getg().m.curg.racectx
 570                 // (This defer should get open-coded, which is safe on
 571                 // the system stack.)
 572                 defer func() { getg().racectx = 0 }()
 573         }
 574
 575         // Optimistically look for CPU profile samples. This may write new stack
 576         // records, and may write new tracing buffers. This must be done with the
 577         // trace lock not held. footerWritten and shutdown are safe to access
 578         // here. They are only mutated by this goroutine or during a STW.
 579         if !trace.footerWritten && !trace.shutdown {
 580                 traceReadCPU()
 581         }
 582
 583         // This function must not allocate while holding trace.lock:
 584         // allocation can call heap allocate, which will try to emit a trace
 585         // event while holding heap lock.
 586         lock(&trace.lock)
 587
 588         if trace.reader.Load() != nil {
 589                 // More than one goroutine reads trace. This is bad.
 590                 // But we rather do not crash the program because of tracing,
 591                 // because tracing can be enabled at runtime on prod servers.
 592                 unlock(&trace.lock)
 593                 println("runtime: ReadTrace called from multiple goroutines simultaneously")
 594                 return nil, false
 595         }
 596         // Recycle the old buffer.
 597         if buf := trace.reading; buf != 0 {
 598                 buf.ptr().link = trace.empty
 599                 trace.empty = buf
 600                 trace.reading = 0
 601         }
 602         // Write trace header.
 603         if !trace.headerWritten {
 604                 trace.headerWritten = true
 605                 unlock(&trace.lock)
 606                 return []byte("go 1.21 trace\x00\x00\x00"), false
 607         }
 608         // Wait for new data.
 609         if trace.fullHead == 0 && !trace.shutdown {
 610                 // We don't simply use a note because the scheduler
 611                 // executes this goroutine directly when it wakes up
 612                 // (also a note would consume an M).
 613                 unlock(&trace.lock)
 614                 return nil, true
 615         }
 616 newFull:
 617         assertLockHeld(&trace.lock)
 618         // Write a buffer.
 619         if trace.fullHead != 0 {
 620                 buf := traceFullDequeue()
 621                 trace.reading = buf
 622                 unlock(&trace.lock)
 623                 return buf.ptr().arr[:buf.ptr().pos], false
 624         }
 625
 626         // Write footer with timer frequency.
 627         if !trace.footerWritten {
 628                 trace.footerWritten = true
 629                 freq := (float64(trace.endTicks-trace.startTicks) / traceTimeDiv) / (float64(trace.endNanotime-trace.startNanotime) / 1e9)
 630                 if freq <= 0 {
 631                         throw("trace: ReadTrace got invalid frequency")
 632                 }
 633                 unlock(&trace.lock)
 634
 635                 // Write frequency event.
 636                 bufp := traceFlush(0, 0)
 637                 buf := bufp.ptr()
 638                 buf.byte(traceEvFrequency | 0<<traceArgCountShift)
 639                 buf.varint(uint64(freq))
 640
 641                 // Dump stack table.
 642                 // This will emit a bunch of full buffers, we will pick them up
 643                 // on the next iteration.
 644                 bufp = trace.stackTab.dump(bufp)
 645
 646                 // Flush final buffer.
 647                 lock(&trace.lock)
 648                 traceFullQueue(bufp)
 649                 goto newFull // trace.lock should be held at newFull
 650         }
 651         // Done.
 652         if trace.shutdown {
 653                 unlock(&trace.lock)
 654                 if raceenabled {
 655                         // Model synchronization on trace.shutdownSema, which race
 656                         // detector does not see. This is required to avoid false
 657                         // race reports on writer passed to trace.Start.
 658                         racerelease(unsafe.Pointer(&trace.shutdownSema))
 659                 }
 660                 // trace.enabled is already reset, so can call traceable functions.
 661                 semrelease(&trace.shutdownSema)
 662                 return nil, false
 663         }
 664         // Also bad, but see the comment above.
 665         unlock(&trace.lock)
 666         println("runtime: spurious wakeup of trace reader")
 667         return nil, false
 668 }
 669
 670 // traceReader returns the trace reader that should be woken up, if any.
 671 // Callers should first check that trace.enabled or trace.shutdown is set.
 672 //
 673 // This must run on the system stack because it acquires trace.lock.
 674 //
 675 //go:systemstack
 676 func traceReader() *g {
 677         // Optimistic check first
 678         if traceReaderAvailable() == nil {
 679                 return nil
 680         }
 681         lock(&trace.lock)
 682         gp := traceReaderAvailable()
 683         if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) {
 684                 unlock(&trace.lock)
 685                 return nil
 686         }
 687         unlock(&trace.lock)
 688         return gp
 689 }
 690
 691 // traceReaderAvailable returns the trace reader if it is not currently
 692 // scheduled and should be. Callers should first check that trace.enabled
 693 // or trace.shutdown is set.
 694 func traceReaderAvailable() *g {
 695         if trace.fullHead != 0 || trace.shutdown {
 696                 return trace.reader.Load()
 697         }
 698         return nil
 699 }
 700
 701 // traceProcFree frees trace buffer associated with pp.
 702 //
 703 // This must run on the system stack because it acquires trace.lock.
 704 //
 705 //go:systemstack
 706 func traceProcFree(pp *p) {
 707         buf := pp.trace.buf
 708         pp.trace.buf = 0
 709         if buf == 0 {
 710                 return
 711         }
 712         lock(&trace.lock)
 713         traceFullQueue(buf)
 714         unlock(&trace.lock)
 715 }
 716
 717 // traceFullQueue queues buf into queue of full buffers.
 718 func traceFullQueue(buf traceBufPtr) {
 719         buf.ptr().link = 0
 720         if trace.fullHead == 0 {
 721                 trace.fullHead = buf
 722         } else {
 723                 trace.fullTail.ptr().link = buf
 724         }
 725         trace.fullTail = buf
 726 }
 727
 728 // traceFullDequeue dequeues from queue of full buffers.
 729 func traceFullDequeue() traceBufPtr {
 730         buf := trace.fullHead
 731         if buf == 0 {
 732                 return 0
 733         }
 734         trace.fullHead = buf.ptr().link
 735         if trace.fullHead == 0 {
 736                 trace.fullTail = 0
 737         }
 738         buf.ptr().link = 0
 739         return buf
 740 }
 741
 742 // traceEvent writes a single event to trace buffer, flushing the buffer if necessary.
 743 // ev is event type.
 744 // If skip > 0, write current stack id as the last argument (skipping skip top frames).
 745 // If skip = 0, this event type should contain a stack, but we don't want
 746 // to collect and remember it for this particular call.
 747 func traceEvent(ev byte, skip int, args ...uint64) {
 748         mp, pid, bufp := traceAcquireBuffer()
 749         // Double-check trace.enabled now that we've done m.locks++ and acquired bufLock.
 750         // This protects from races between traceEvent and StartTrace/StopTrace.
 751
 752         // The caller checked that trace.enabled == true, but trace.enabled might have been
 753         // turned off between the check and now. Check again. traceLockBuffer did mp.locks++,
 754         // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero,
 755         // so if we see trace.enabled == true now, we know it's true for the rest of the function.
 756         // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace
 757         // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer.
 758         //
 759         // Note trace_userTaskCreate runs the same check.
 760         if !trace.enabled && !mp.trace.startingTrace {
 761                 traceReleaseBuffer(mp, pid)
 762                 return
 763         }
 764
 765         if skip > 0 {
 766                 if getg() == mp.curg {
 767                         skip++ // +1 because stack is captured in traceEventLocked.
 768                 }
 769         }
 770         traceEventLocked(0, mp, pid, bufp, ev, 0, skip, args...)
 771         traceReleaseBuffer(mp, pid)
 772 }
 773
 774 // traceEventLocked writes a single event of type ev to the trace buffer bufp,
 775 // flushing the buffer if necessary. pid is the id of the current P, or
 776 // traceGlobProc if we're tracing without a real P.
 777 //
 778 // Preemption is disabled, and if running without a real P the global tracing
 779 // buffer is locked.
 780 //
 781 // Events types that do not include a stack set skip to -1. Event types that
 782 // include a stack may explicitly reference a stackID from the trace.stackTab
 783 // (obtained by an earlier call to traceStackID). Without an explicit stackID,
 784 // this function will automatically capture the stack of the goroutine currently
 785 // running on mp, skipping skip top frames or, if skip is 0, writing out an
 786 // empty stack record.
 787 //
 788 // It records the event's args to the traceBuf, and also makes an effort to
 789 // reserve extraBytes bytes of additional space immediately following the event,
 790 // in the same traceBuf.
 791 func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, stackID uint32, skip int, args ...uint64) {
 792         buf := bufp.ptr()
 793         // TODO: test on non-zero extraBytes param.
 794         maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
 795         if buf == nil || len(buf.arr)-buf.pos < maxSize {
 796                 systemstack(func() {
 797                         buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
 798                 })
 799                 bufp.set(buf)
 800         }
 801
 802         ts := traceClockNow()
 803         if ts <= buf.lastTime {
 804                 ts = buf.lastTime + 1
 805         }
 806         tsDiff := uint64(ts - buf.lastTime)
 807         buf.lastTime = ts
 808         narg := byte(len(args))
 809         if stackID != 0 || skip >= 0 {
 810                 narg++
 811         }
 812         // We have only 2 bits for number of arguments.
 813         // If number is >= 3, then the event type is followed by event length in bytes.
 814         if narg > 3 {
 815                 narg = 3
 816         }
 817         startPos := buf.pos
 818         buf.byte(ev | narg<<traceArgCountShift)
 819         var lenp *byte
 820         if narg == 3 {
 821                 // Reserve the byte for length assuming that length < 128.
 822                 buf.varint(0)
 823                 lenp = &buf.arr[buf.pos-1]
 824         }
 825         buf.varint(tsDiff)
 826         for _, a := range args {
 827                 buf.varint(a)
 828         }
 829         if stackID != 0 {
 830                 buf.varint(uint64(stackID))
 831         } else if skip == 0 {
 832                 buf.varint(0)
 833         } else if skip > 0 {
 834                 buf.varint(traceStackID(mp, buf.stk[:], skip))
 835         }
 836         evSize := buf.pos - startPos
 837         if evSize > maxSize {
 838                 throw("invalid length of trace event")
 839         }
 840         if lenp != nil {
 841                 // Fill in actual length.
 842                 *lenp = byte(evSize - 2)
 843         }
 844 }
 845
 846 // traceCPUSample writes a CPU profile sample stack to the execution tracer's
 847 // profiling buffer. It is called from a signal handler, so is limited in what
 848 // it can do.
 849 func traceCPUSample(gp *g, pp *p, stk []uintptr) {
 850         if !trace.enabled {
 851                 // Tracing is usually turned off; don't spend time acquiring the signal
 852                 // lock unless it's active.
 853                 return
 854         }
 855
 856         // Match the clock used in traceEventLocked
 857         now := traceClockNow()
 858         // The "header" here is the ID of the P that was running the profiled code,
 859         // followed by the ID of the goroutine. (For normal CPU profiling, it's
 860         // usually the number of samples with the given stack.) Near syscalls, pp
 861         // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp.
 862         var hdr [2]uint64
 863         if pp != nil {
 864                 // Overflow records in profBuf have all header values set to zero. Make
 865                 // sure that real headers have at least one bit set.
 866                 hdr[0] = uint64(pp.id)<<1 | 0b1
 867         } else {
 868                 hdr[0] = 0b10
 869         }
 870         if gp != nil {
 871                 hdr[1] = gp.goid
 872         }
 873
 874         // Allow only one writer at a time
 875         for !trace.signalLock.CompareAndSwap(0, 1) {
 876                 // TODO: Is it safe to osyield here? https://go.dev/issue/52672
 877                 osyield()
 878         }
 879
 880         if log := (*profBuf)(atomic.Loadp(unsafe.Pointer(&trace.cpuLogWrite))); log != nil {
 881                 // Note: we don't pass a tag pointer here (how should profiling tags
 882                 // interact with the execution tracer?), but if we did we'd need to be
 883                 // careful about write barriers. See the long comment in profBuf.write.
 884                 log.write(nil, int64(now), hdr[:], stk)
 885         }
 886
 887         trace.signalLock.Store(0)
 888 }
 889
 890 func traceReadCPU() {
 891         bufp := &trace.cpuLogBuf
 892
 893         for {
 894                 data, tags, _ := trace.cpuLogRead.read(profBufNonBlocking)
 895                 if len(data) == 0 {
 896                         break
 897                 }
 898                 for len(data) > 0 {
 899                         if len(data) < 4 || data[0] > uint64(len(data)) {
 900                                 break // truncated profile
 901                         }
 902                         if data[0] < 4 || tags != nil && len(tags) < 1 {
 903                                 break // malformed profile
 904                         }
 905                         if len(tags) < 1 {
 906                                 break // mismatched profile records and tags
 907                         }
 908                         timestamp := data[1]
 909                         ppid := data[2] >> 1
 910                         if hasP := (data[2] & 0b1) != 0; !hasP {
 911                                 ppid = ^uint64(0)
 912                         }
 913                         goid := data[3]
 914                         stk := data[4:data[0]]
 915                         empty := len(stk) == 1 && data[2] == 0 && data[3] == 0
 916                         data = data[data[0]:]
 917                         // No support here for reporting goroutine tags at the moment; if
 918                         // that information is to be part of the execution trace, we'd
 919                         // probably want to see when the tags are applied and when they
 920                         // change, instead of only seeing them when we get a CPU sample.
 921                         tags = tags[1:]
 922
 923                         if empty {
 924                                 // Looks like an overflow record from the profBuf. Not much to
 925                                 // do here, we only want to report full records.
 926                                 //
 927                                 // TODO: should we start a goroutine to drain the profBuf,
 928                                 // rather than relying on a high-enough volume of tracing events
 929                                 // to keep ReadTrace busy? https://go.dev/issue/52674
 930                                 continue
 931                         }
 932
 933                         buf := bufp.ptr()
 934                         if buf == nil {
 935                                 systemstack(func() {
 936                                         *bufp = traceFlush(*bufp, 0)
 937                                 })
 938                                 buf = bufp.ptr()
 939                         }
 940                         nstk := 1
 941                         buf.stk[0] = logicalStackSentinel
 942                         for ; nstk < len(buf.stk) && nstk-1 < len(stk); nstk++ {
 943                                 buf.stk[nstk] = uintptr(stk[nstk-1])
 944                         }
 945                         stackID := trace.stackTab.put(buf.stk[:nstk])
 946
 947                         traceEventLocked(0, nil, 0, bufp, traceEvCPUSample, stackID, 1, timestamp, ppid, goid)
 948                 }
 949         }
 950 }
 951
 952 // logicalStackSentinel is a sentinel value at pcBuf[0] signifying that
 953 // pcBuf[1:] holds a logical stack requiring no further processing. Any other
 954 // value at pcBuf[0] represents a skip value to apply to the physical stack in
 955 // pcBuf[1:] after inline expansion.
 956 const logicalStackSentinel = ^uintptr(0)
 957
 958 // traceStackID captures a stack trace into pcBuf, registers it in the trace
 959 // stack table, and returns its unique ID. pcBuf should have a length equal to
 960 // traceStackSize. skip controls the number of leaf frames to omit in order to
 961 // hide tracer internals from stack traces, see CL 5523.
 962 func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 {
 963         gp := getg()
 964         curgp := mp.curg
 965         nstk := 1
 966         if tracefpunwindoff() || mp.hasCgoOnStack() {
 967                 // Slow path: Unwind using default unwinder. Used when frame pointer
 968                 // unwinding is unavailable or disabled (tracefpunwindoff), or might
 969                 // produce incomplete results or crashes (hasCgoOnStack). Note that no
 970                 // cgo callback related crashes have been observed yet. The main
 971                 // motivation is to take advantage of a potentially registered cgo
 972                 // symbolizer.
 973                 pcBuf[0] = logicalStackSentinel
 974                 if curgp == gp {
 975                         nstk += callers(skip+1, pcBuf[1:])
 976                 } else if curgp != nil {
 977                         nstk += gcallers(curgp, skip, pcBuf[1:])
 978                 }
 979         } else {
 980                 // Fast path: Unwind using frame pointers.
 981                 pcBuf[0] = uintptr(skip)
 982                 if curgp == gp {
 983                         nstk += fpTracebackPCs(unsafe.Pointer(getfp()), pcBuf[1:])
 984                 } else if curgp != nil {
 985                         // We're called on the g0 stack through mcall(fn) or systemstack(fn). To
 986                         // behave like gcallers above, we start unwinding from sched.bp, which
 987                         // points to the caller frame of the leaf frame on g's stack. The return
 988                         // address of the leaf frame is stored in sched.pc, which we manually
 989                         // capture here.
 990                         pcBuf[1] = curgp.sched.pc
 991                         nstk += 1 + fpTracebackPCs(unsafe.Pointer(curgp.sched.bp), pcBuf[2:])
 992                 }
 993         }
 994         if nstk > 0 {
 995                 nstk-- // skip runtime.goexit
 996         }
 997         if nstk > 0 && curgp.goid == 1 {
 998                 nstk-- // skip runtime.main
 999         }
1000         id := trace.stackTab.put(pcBuf[:nstk])
1001         return uint64(id)
1002 }
1003
1004 // tracefpunwindoff returns true if frame pointer unwinding for the tracer is
1005 // disabled via GODEBUG or not supported by the architecture.
1006 // TODO(#60254): support frame pointer unwinding on plan9/amd64.
1007 func tracefpunwindoff() bool {
1008         return debug.tracefpunwindoff != 0 || (goarch.ArchFamily != goarch.AMD64 && goarch.ArchFamily != goarch.ARM64) || goos.IsPlan9 == 1
1009 }
1010
1011 // fpTracebackPCs populates pcBuf with the return addresses for each frame and
1012 // returns the number of PCs written to pcBuf. The returned PCs correspond to
1013 // "physical frames" rather than "logical frames"; that is if A is inlined into
1014 // B, this will return a PC for only B.
1015 func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) {
1016         for i = 0; i < len(pcBuf) && fp != nil; i++ {
1017                 // return addr sits one word above the frame pointer
1018                 pcBuf[i] = *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
1019                 // follow the frame pointer to the next one
1020                 fp = unsafe.Pointer(*(*uintptr)(fp))
1021         }
1022         return i
1023 }
1024
1025 // traceAcquireBuffer returns trace buffer to use and, if necessary, locks it.
1026 func traceAcquireBuffer() (mp *m, pid int32, bufp *traceBufPtr) {
1027         // Any time we acquire a buffer, we may end up flushing it,
1028         // but flushes are rare. Record the lock edge even if it
1029         // doesn't happen this time.
1030         lockRankMayTraceFlush()
1031
1032         mp = acquirem()
1033         if p := mp.p.ptr(); p != nil {
1034                 return mp, p.id, &p.trace.buf
1035         }
1036         lock(&trace.bufLock)
1037         return mp, traceGlobProc, &trace.buf
1038 }
1039
1040 // traceReleaseBuffer releases a buffer previously acquired with traceAcquireBuffer.
1041 func traceReleaseBuffer(mp *m, pid int32) {
1042         if pid == traceGlobProc {
1043                 unlock(&trace.bufLock)
1044         }
1045         releasem(mp)
1046 }
1047
1048 // lockRankMayTraceFlush records the lock ranking effects of a
1049 // potential call to traceFlush.
1050 func lockRankMayTraceFlush() {
1051         lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock))
1052 }
1053
1054 // traceFlush puts buf onto stack of full buffers and returns an empty buffer.
1055 //
1056 // This must run on the system stack because it acquires trace.lock.
1057 //
1058 //go:systemstack
1059 func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
1060         lock(&trace.lock)
1061         if buf != 0 {
1062                 traceFullQueue(buf)
1063         }
1064         if trace.empty != 0 {
1065                 buf = trace.empty
1066                 trace.empty = buf.ptr().link
1067         } else {
1068                 buf = traceBufPtr(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys))
1069                 if buf == 0 {
1070                         throw("trace: out of memory")
1071                 }
1072         }
1073         bufp := buf.ptr()
1074         bufp.link.set(nil)
1075         bufp.pos = 0
1076
1077         // initialize the buffer for a new batch
1078         ts := traceClockNow()
1079         if ts <= bufp.lastTime {
1080                 ts = bufp.lastTime + 1
1081         }
1082         bufp.lastTime = ts
1083         bufp.byte(traceEvBatch | 1<<traceArgCountShift)
1084         bufp.varint(uint64(pid))
1085         bufp.varint(uint64(ts))
1086
1087         unlock(&trace.lock)
1088         return buf
1089 }
1090
1091 // traceString adds a string to the trace.strings and returns the id.
1092 func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) {
1093         if s == "" {
1094                 return 0, bufp
1095         }
1096
1097         lock(&trace.stringsLock)
1098         if raceenabled {
1099                 // raceacquire is necessary because the map access
1100                 // below is race annotated.
1101                 raceacquire(unsafe.Pointer(&trace.stringsLock))
1102         }
1103
1104         if id, ok := trace.strings[s]; ok {
1105                 if raceenabled {
1106                         racerelease(unsafe.Pointer(&trace.stringsLock))
1107                 }
1108                 unlock(&trace.stringsLock)
1109
1110                 return id, bufp
1111         }
1112
1113         trace.stringSeq++
1114         id := trace.stringSeq
1115         trace.strings[s] = id
1116
1117         if raceenabled {
1118                 racerelease(unsafe.Pointer(&trace.stringsLock))
1119         }
1120         unlock(&trace.stringsLock)
1121
1122         // memory allocation in above may trigger tracing and
1123         // cause *bufp changes. Following code now works with *bufp,
1124         // so there must be no memory allocation or any activities
1125         // that causes tracing after this point.
1126
1127         buf := bufp.ptr()
1128         size := 1 + 2*traceBytesPerNumber + len(s)
1129         if buf == nil || len(buf.arr)-buf.pos < size {
1130                 systemstack(func() {
1131                         buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
1132                         bufp.set(buf)
1133                 })
1134         }
1135         buf.byte(traceEvString)
1136         buf.varint(id)
1137
1138         // double-check the string and the length can fit.
1139         // Otherwise, truncate the string.
1140         slen := len(s)
1141         if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
1142                 slen = room
1143         }
1144
1145         buf.varint(uint64(slen))
1146         buf.pos += copy(buf.arr[buf.pos:], s[:slen])
1147
1148         bufp.set(buf)
1149         return id, bufp
1150 }
1151
1152 // varint appends v to buf in little-endian-base-128 encoding.
1153 func (buf *traceBuf) varint(v uint64) {
1154         pos := buf.pos
1155         for ; v >= 0x80; v >>= 7 {
1156                 buf.arr[pos] = 0x80 | byte(v)
1157                 pos++
1158         }
1159         buf.arr[pos] = byte(v)
1160         pos++
1161         buf.pos = pos
1162 }
1163
1164 // varintAt writes varint v at byte position pos in buf. This always
1165 // consumes traceBytesPerNumber bytes. This is intended for when the
1166 // caller needs to reserve space for a varint but can't populate it
1167 // until later.
1168 func (buf *traceBuf) varintAt(pos int, v uint64) {
1169         for i := 0; i < traceBytesPerNumber; i++ {
1170                 if i < traceBytesPerNumber-1 {
1171                         buf.arr[pos] = 0x80 | byte(v)
1172                 } else {
1173                         buf.arr[pos] = byte(v)
1174                 }
1175                 v >>= 7
1176                 pos++
1177         }
1178 }
1179
1180 // byte appends v to buf.
1181 func (buf *traceBuf) byte(v byte) {
1182         buf.arr[buf.pos] = v
1183         buf.pos++
1184 }
1185
1186 // traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids.
1187 // It is lock-free for reading.
1188 type traceStackTable struct {
1189         lock mutex // Must be acquired on the system stack
1190         seq  uint32
1191         mem  traceAlloc
1192         tab  [1 << 13]traceStackPtr
1193 }
1194
1195 // traceStack is a single stack in traceStackTable.
1196 type traceStack struct {
1197         link traceStackPtr
1198         hash uintptr
1199         id   uint32
1200         n    int
1201         stk  [0]uintptr // real type [n]uintptr
1202 }
1203
1204 type traceStackPtr uintptr
1205
1206 func (tp traceStackPtr) ptr() *traceStack { return (*traceStack)(unsafe.Pointer(tp)) }
1207
1208 // stack returns slice of PCs.
1209 func (ts *traceStack) stack() []uintptr {
1210         return (*[traceStackSize]uintptr)(unsafe.Pointer(&ts.stk))[:ts.n]
1211 }
1212
1213 // put returns a unique id for the stack trace pcs and caches it in the table,
1214 // if it sees the trace for the first time.
1215 func (tab *traceStackTable) put(pcs []uintptr) uint32 {
1216         if len(pcs) == 0 {
1217                 return 0
1218         }
1219         hash := memhash(unsafe.Pointer(&pcs[0]), 0, uintptr(len(pcs))*unsafe.Sizeof(pcs[0]))
1220         // First, search the hashtable w/o the mutex.
1221         if id := tab.find(pcs, hash); id != 0 {
1222                 return id
1223         }
1224         // Now, double check under the mutex.
1225         // Switch to the system stack so we can acquire tab.lock
1226         var id uint32
1227         systemstack(func() {
1228                 lock(&tab.lock)
1229                 if id = tab.find(pcs, hash); id != 0 {
1230                         unlock(&tab.lock)
1231                         return
1232                 }
1233                 // Create new record.
1234                 tab.seq++
1235                 stk := tab.newStack(len(pcs))
1236                 stk.hash = hash
1237                 stk.id = tab.seq
1238                 id = stk.id
1239                 stk.n = len(pcs)
1240                 stkpc := stk.stack()
1241                 copy(stkpc, pcs)
1242                 part := int(hash % uintptr(len(tab.tab)))
1243                 stk.link = tab.tab[part]
1244                 atomicstorep(unsafe.Pointer(&tab.tab[part]), unsafe.Pointer(stk))
1245                 unlock(&tab.lock)
1246         })
1247         return id
1248 }
1249
1250 // find checks if the stack trace pcs is already present in the table.
1251 func (tab *traceStackTable) find(pcs []uintptr, hash uintptr) uint32 {
1252         part := int(hash % uintptr(len(tab.tab)))
1253 Search:
1254         for stk := tab.tab[part].ptr(); stk != nil; stk = stk.link.ptr() {
1255                 if stk.hash == hash && stk.n == len(pcs) {
1256                         for i, stkpc := range stk.stack() {
1257                                 if stkpc != pcs[i] {
1258                                         continue Search
1259                                 }
1260                         }
1261                         return stk.id
1262                 }
1263         }
1264         return 0
1265 }
1266
1267 // newStack allocates a new stack of size n.
1268 func (tab *traceStackTable) newStack(n int) *traceStack {
1269         return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize))
1270 }
1271
1272 // traceFrames returns the frames corresponding to pcs. It may
1273 // allocate and may emit trace events.
1274 func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) {
1275         frames := make([]traceFrame, 0, len(pcs))
1276         ci := CallersFrames(pcs)
1277         for {
1278                 var frame traceFrame
1279                 f, more := ci.Next()
1280                 frame, bufp = traceFrameForPC(bufp, 0, f)
1281                 frames = append(frames, frame)
1282                 if !more {
1283                         return frames, bufp
1284                 }
1285         }
1286 }
1287
1288 // dump writes all previously cached stacks to trace buffers,
1289 // releases all memory and resets state.
1290 //
1291 // This must run on the system stack because it calls traceFlush.
1292 //
1293 //go:systemstack
1294 func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr {
1295         for i := range tab.tab {
1296                 stk := tab.tab[i].ptr()
1297                 for ; stk != nil; stk = stk.link.ptr() {
1298                         var frames []traceFrame
1299                         frames, bufp = traceFrames(bufp, fpunwindExpand(stk.stack()))
1300
1301                         // Estimate the size of this record. This
1302                         // bound is pretty loose, but avoids counting
1303                         // lots of varint sizes.
1304                         maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber
1305                         // Make sure we have enough buffer space.
1306                         if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize {
1307                                 bufp = traceFlush(bufp, 0)
1308                         }
1309
1310                         // Emit header, with space reserved for length.
1311                         buf := bufp.ptr()
1312                         buf.byte(traceEvStack | 3<<traceArgCountShift)
1313                         lenPos := buf.pos
1314                         buf.pos += traceBytesPerNumber
1315
1316                         // Emit body.
1317                         recPos := buf.pos
1318                         buf.varint(uint64(stk.id))
1319                         buf.varint(uint64(len(frames)))
1320                         for _, frame := range frames {
1321                                 buf.varint(uint64(frame.PC))
1322                                 buf.varint(frame.funcID)
1323                                 buf.varint(frame.fileID)
1324                                 buf.varint(frame.line)
1325                         }
1326
1327                         // Fill in size header.
1328                         buf.varintAt(lenPos, uint64(buf.pos-recPos))
1329                 }
1330         }
1331
1332         tab.mem.drop()
1333         *tab = traceStackTable{}
1334         lockInit(&((*tab).lock), lockRankTraceStackTab)
1335
1336         return bufp
1337 }
1338
1339 // fpunwindExpand checks if pcBuf contains logical frames (which include inlined
1340 // frames) or physical frames (produced by frame pointer unwinding) using a
1341 // sentinel value in pcBuf[0]. Logical frames are simply returned without the
1342 // sentinel. Physical frames are turned into logical frames via inline unwinding
1343 // and by applying the skip value that's stored in pcBuf[0].
1344 func fpunwindExpand(pcBuf []uintptr) []uintptr {
1345         if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
1346                 // pcBuf contains logical rather than inlined frames, skip has already been
1347                 // applied, just return it without the sentinel value in pcBuf[0].
1348                 return pcBuf[1:]
1349         }
1350
1351         var (
1352                 lastFuncID = abi.FuncIDNormal
1353                 newPCBuf   = make([]uintptr, 0, traceStackSize)
1354                 skip       = pcBuf[0]
1355                 // skipOrAdd skips or appends retPC to newPCBuf and returns true if more
1356                 // pcs can be added.
1357                 skipOrAdd = func(retPC uintptr) bool {
1358                         if skip > 0 {
1359                                 skip--
1360                         } else {
1361                                 newPCBuf = append(newPCBuf, retPC)
1362                         }
1363                         return len(newPCBuf) < cap(newPCBuf)
1364                 }
1365         )
1366
1367 outer:
1368         for _, retPC := range pcBuf[1:] {
1369                 callPC := retPC - 1
1370                 fi := findfunc(callPC)
1371                 if !fi.valid() {
1372                         // There is no funcInfo if callPC belongs to a C function. In this case
1373                         // we still keep the pc, but don't attempt to expand inlined frames.
1374                         if more := skipOrAdd(retPC); !more {
1375                                 break outer
1376                         }
1377                         continue
1378                 }
1379
1380                 u, uf := newInlineUnwinder(fi, callPC)
1381                 for ; uf.valid(); uf = u.next(uf) {
1382                         sf := u.srcFunc(uf)
1383                         if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
1384                                 // ignore wrappers
1385                         } else if more := skipOrAdd(uf.pc + 1); !more {
1386                                 break outer
1387                         }
1388                         lastFuncID = sf.funcID
1389                 }
1390         }
1391         return newPCBuf
1392 }
1393
1394 type traceFrame struct {
1395         PC     uintptr
1396         funcID uint64
1397         fileID uint64
1398         line   uint64
1399 }
1400
1401 // traceFrameForPC records the frame information.
1402 // It may allocate memory.
1403 func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) {
1404         bufp := &buf
1405         var frame traceFrame
1406         frame.PC = f.PC
1407
1408         fn := f.Function
1409         const maxLen = 1 << 10
1410         if len(fn) > maxLen {
1411                 fn = fn[len(fn)-maxLen:]
1412         }
1413         frame.funcID, bufp = traceString(bufp, pid, fn)
1414         frame.line = uint64(f.Line)
1415         file := f.File
1416         if len(file) > maxLen {
1417                 file = file[len(file)-maxLen:]
1418         }
1419         frame.fileID, bufp = traceString(bufp, pid, file)
1420         return frame, (*bufp)
1421 }
1422
1423 // traceAlloc is a non-thread-safe region allocator.
1424 // It holds a linked list of traceAllocBlock.
1425 type traceAlloc struct {
1426         head traceAllocBlockPtr
1427         off  uintptr
1428 }
1429
1430 // traceAllocBlock is a block in traceAlloc.
1431 //
1432 // traceAllocBlock is allocated from non-GC'd memory, so it must not
1433 // contain heap pointers. Writes to pointers to traceAllocBlocks do
1434 // not need write barriers.
1435 type traceAllocBlock struct {
1436         _    sys.NotInHeap
1437         next traceAllocBlockPtr
1438         data [64<<10 - goarch.PtrSize]byte
1439 }
1440
1441 // TODO: Since traceAllocBlock is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.
1442 type traceAllocBlockPtr uintptr
1443
1444 func (p traceAllocBlockPtr) ptr() *traceAllocBlock   { return (*traceAllocBlock)(unsafe.Pointer(p)) }
1445 func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(unsafe.Pointer(x)) }
1446
1447 // alloc allocates n-byte block.
1448 func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer {
1449         n = alignUp(n, goarch.PtrSize)
1450         if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) {
1451                 if n > uintptr(len(a.head.ptr().data)) {
1452                         throw("trace: alloc too large")
1453                 }
1454                 block := (*traceAllocBlock)(sysAlloc(unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys))
1455                 if block == nil {
1456                         throw("trace: out of memory")
1457                 }
1458                 block.next.set(a.head.ptr())
1459                 a.head.set(block)
1460                 a.off = 0
1461         }
1462         p := &a.head.ptr().data[a.off]
1463         a.off += n
1464         return unsafe.Pointer(p)
1465 }
1466
1467 // drop frees all previously allocated memory and resets the allocator.
1468 func (a *traceAlloc) drop() {
1469         for a.head != 0 {
1470                 block := a.head.ptr()
1471                 a.head.set(block.next.ptr())
1472                 sysFree(unsafe.Pointer(block), unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys)
1473         }
1474 }
1475
1476 // The following functions write specific events to trace.
1477
1478 func traceGomaxprocs(procs int32) {
1479         traceEvent(traceEvGomaxprocs, 1, uint64(procs))
1480 }
1481
1482 func traceProcStart() {
1483         traceEvent(traceEvProcStart, -1, uint64(getg().m.id))
1484 }
1485
1486 func traceProcStop(pp *p) {
1487         // Sysmon and stopTheWorld can stop Ps blocked in syscalls,
1488         // to handle this we temporary employ the P.
1489         mp := acquirem()
1490         oldp := mp.p
1491         mp.p.set(pp)
1492         traceEvent(traceEvProcStop, -1)
1493         mp.p = oldp
1494         releasem(mp)
1495 }
1496
1497 func traceGCStart() {
1498         traceEvent(traceEvGCStart, 3, trace.seqGC)
1499         trace.seqGC++
1500 }
1501
1502 func traceGCDone() {
1503         traceEvent(traceEvGCDone, -1)
1504 }
1505
1506 func traceSTWStart(reason stwReason) {
1507         // Don't trace if this STW is for trace start/stop, since traceEnabled
1508         // switches during a STW.
1509         if reason == stwStartTrace || reason == stwStopTrace {
1510                 return
1511         }
1512         getg().m.trace.tracedSTWStart = true
1513         traceEvent(traceEvSTWStart, -1, uint64(reason))
1514 }
1515
1516 func traceSTWDone() {
1517         mp := getg().m
1518         if !mp.trace.tracedSTWStart {
1519                 return
1520         }
1521         mp.trace.tracedSTWStart = false
1522         traceEvent(traceEvSTWDone, -1)
1523 }
1524
1525 // traceGCSweepStart prepares to trace a sweep loop. This does not
1526 // emit any events until traceGCSweepSpan is called.
1527 //
1528 // traceGCSweepStart must be paired with traceGCSweepDone and there
1529 // must be no preemption points between these two calls.
1530 func traceGCSweepStart() {
1531         // Delay the actual GCSweepStart event until the first span
1532         // sweep. If we don't sweep anything, don't emit any events.
1533         pp := getg().m.p.ptr()
1534         if pp.trace.inSweep {
1535                 throw("double traceGCSweepStart")
1536         }
1537         pp.trace.inSweep, pp.trace.swept, pp.trace.reclaimed = true, 0, 0
1538 }
1539
1540 // traceGCSweepSpan traces the sweep of a single page.
1541 //
1542 // This may be called outside a traceGCSweepStart/traceGCSweepDone
1543 // pair; however, it will not emit any trace events in this case.
1544 func traceGCSweepSpan(bytesSwept uintptr) {
1545         pp := getg().m.p.ptr()
1546         if pp.trace.inSweep {
1547                 if pp.trace.swept == 0 {
1548                         traceEvent(traceEvGCSweepStart, 1)
1549                 }
1550                 pp.trace.swept += bytesSwept
1551         }
1552 }
1553
1554 func traceGCSweepDone() {
1555         pp := getg().m.p.ptr()
1556         if !pp.trace.inSweep {
1557                 throw("missing traceGCSweepStart")
1558         }
1559         if pp.trace.swept != 0 {
1560                 traceEvent(traceEvGCSweepDone, -1, uint64(pp.trace.swept), uint64(pp.trace.reclaimed))
1561         }
1562         pp.trace.inSweep = false
1563 }
1564
1565 func traceGCMarkAssistStart() {
1566         traceEvent(traceEvGCMarkAssistStart, 1)
1567 }
1568
1569 func traceGCMarkAssistDone() {
1570         traceEvent(traceEvGCMarkAssistDone, -1)
1571 }
1572
1573 func traceGoCreate(newg *g, pc uintptr) {
1574         newg.trace.seq = 0
1575         newg.trace.lastP = getg().m.p
1576         // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
1577         id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(pc) + sys.PCQuantum})
1578         traceEvent(traceEvGoCreate, 2, newg.goid, uint64(id))
1579 }
1580
1581 func traceGoStart() {
1582         gp := getg().m.curg
1583         pp := gp.m.p
1584         gp.trace.seq++
1585         if pp.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {
1586                 traceEvent(traceEvGoStartLabel, -1, gp.goid, gp.trace.seq, trace.markWorkerLabels[pp.ptr().gcMarkWorkerMode])
1587         } else if gp.trace.lastP == pp {
1588                 traceEvent(traceEvGoStartLocal, -1, gp.goid)
1589         } else {
1590                 gp.trace.lastP = pp
1591                 traceEvent(traceEvGoStart, -1, gp.goid, gp.trace.seq)
1592         }
1593 }
1594
1595 func traceGoEnd() {
1596         traceEvent(traceEvGoEnd, -1)
1597 }
1598
1599 func traceGoSched() {
1600         gp := getg()
1601         gp.trace.lastP = gp.m.p
1602         traceEvent(traceEvGoSched, 1)
1603 }
1604
1605 func traceGoPreempt() {
1606         gp := getg()
1607         gp.trace.lastP = gp.m.p
1608         traceEvent(traceEvGoPreempt, 1)
1609 }
1610
1611 func traceGoPark(reason traceBlockReason, skip int) {
1612         // Convert the block reason directly to a trace event type.
1613         // See traceBlockReason for more information.
1614         traceEvent(byte(reason), skip)
1615 }
1616
1617 func traceGoUnpark(gp *g, skip int) {
1618         pp := getg().m.p
1619         gp.trace.seq++
1620         if gp.trace.lastP == pp {
1621                 traceEvent(traceEvGoUnblockLocal, skip, gp.goid)
1622         } else {
1623                 gp.trace.lastP = pp
1624                 traceEvent(traceEvGoUnblock, skip, gp.goid, gp.trace.seq)
1625         }
1626 }
1627
1628 func traceGoSysCall() {
1629         var skip int
1630         switch {
1631         case tracefpunwindoff():
1632                 // Unwind by skipping 1 frame relative to gp.syscallsp which is captured 3
1633                 // frames above this frame. For frame pointer unwinding we produce the same
1634                 // results by hard coding the number of frames in between our caller and the
1635                 // actual syscall, see cases below.
1636                 // TODO(felixge): Implement gp.syscallbp to avoid this workaround?
1637                 skip = 1
1638         case GOOS == "solaris" || GOOS == "illumos":
1639                 // These platforms don't use a libc_read_trampoline.
1640                 skip = 3
1641         default:
1642                 // Skip the extra trampoline frame used on most systems.
1643                 skip = 4
1644         }
1645         getg().m.curg.trace.tracedSyscallEnter = true
1646         traceEvent(traceEvGoSysCall, skip)
1647 }
1648
1649 func traceGoSysExit() {
1650         gp := getg().m.curg
1651         if !gp.trace.tracedSyscallEnter {
1652                 // There was no syscall entry traced for us at all, so there's definitely
1653                 // no EvGoSysBlock or EvGoInSyscall before us, which EvGoSysExit requires.
1654                 return
1655         }
1656         gp.trace.tracedSyscallEnter = false
1657         ts := gp.trace.sysExitTime
1658         if ts != 0 && ts < trace.startTime {
1659                 // There is a race between the code that initializes sysExitTimes
1660                 // (in exitsyscall, which runs without a P, and therefore is not
1661                 // stopped with the rest of the world) and the code that initializes
1662                 // a new trace. The recorded sysExitTime must therefore be treated
1663                 // as "best effort". If they are valid for this trace, then great,
1664                 // use them for greater accuracy. But if they're not valid for this
1665                 // trace, assume that the trace was started after the actual syscall
1666                 // exit (but before we actually managed to start the goroutine,
1667                 // aka right now), and assign a fresh time stamp to keep the log consistent.
1668                 ts = 0
1669         }
1670         gp.trace.sysExitTime = 0
1671         gp.trace.seq++
1672         gp.trace.lastP = gp.m.p
1673         traceEvent(traceEvGoSysExit, -1, gp.goid, gp.trace.seq, uint64(ts))
1674 }
1675
1676 func traceGoSysBlock(pp *p) {
1677         // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked,
1678         // to handle this we temporary employ the P.
1679         mp := acquirem()
1680         oldp := mp.p
1681         mp.p.set(pp)
1682         traceEvent(traceEvGoSysBlock, -1)
1683         mp.p = oldp
1684         releasem(mp)
1685 }
1686
1687 func traceHeapAlloc(live uint64) {
1688         traceEvent(traceEvHeapAlloc, -1, live)
1689 }
1690
1691 func traceHeapGoal() {
1692         heapGoal := gcController.heapGoal()
1693         if heapGoal == ^uint64(0) {
1694                 // Heap-based triggering is disabled.
1695                 traceEvent(traceEvHeapGoal, -1, 0)
1696         } else {
1697                 traceEvent(traceEvHeapGoal, -1, heapGoal)
1698         }
1699 }
1700
1701 // To access runtime functions from runtime/trace.
1702 // See runtime/trace/annotation.go
1703
1704 //go:linkname trace_userTaskCreate runtime/trace.userTaskCreate
1705 func trace_userTaskCreate(id, parentID uint64, taskType string) {
1706         if !trace.enabled {
1707                 return
1708         }
1709
1710         // Same as in traceEvent.
1711         mp, pid, bufp := traceAcquireBuffer()
1712         if !trace.enabled && !mp.trace.startingTrace {
1713                 traceReleaseBuffer(mp, pid)
1714                 return
1715         }
1716
1717         typeStringID, bufp := traceString(bufp, pid, taskType)
1718         traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 0, 3, id, parentID, typeStringID)
1719         traceReleaseBuffer(mp, pid)
1720 }
1721
1722 //go:linkname trace_userTaskEnd runtime/trace.userTaskEnd
1723 func trace_userTaskEnd(id uint64) {
1724         traceEvent(traceEvUserTaskEnd, 2, id)
1725 }
1726
1727 //go:linkname trace_userRegion runtime/trace.userRegion
1728 func trace_userRegion(id, mode uint64, name string) {
1729         if !trace.enabled {
1730                 return
1731         }
1732
1733         mp, pid, bufp := traceAcquireBuffer()
1734         if !trace.enabled && !mp.trace.startingTrace {
1735                 traceReleaseBuffer(mp, pid)
1736                 return
1737         }
1738
1739         nameStringID, bufp := traceString(bufp, pid, name)
1740         traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 0, 3, id, mode, nameStringID)
1741         traceReleaseBuffer(mp, pid)
1742 }
1743
1744 //go:linkname trace_userLog runtime/trace.userLog
1745 func trace_userLog(id uint64, category, message string) {
1746         if !trace.enabled {
1747                 return
1748         }
1749
1750         mp, pid, bufp := traceAcquireBuffer()
1751         if !trace.enabled && !mp.trace.startingTrace {
1752                 traceReleaseBuffer(mp, pid)
1753                 return
1754         }
1755
1756         categoryID, bufp := traceString(bufp, pid, category)
1757
1758         // The log message is recorded after all of the normal trace event
1759         // arguments, including the task, category, and stack IDs. We must ask
1760         // traceEventLocked to reserve extra space for the length of the message
1761         // and the message itself.
1762         extraSpace := traceBytesPerNumber + len(message)
1763         traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 0, 3, id, categoryID)
1764         buf := bufp.ptr()
1765
1766         // double-check the message and its length can fit.
1767         // Otherwise, truncate the message.
1768         slen := len(message)
1769         if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
1770                 slen = room
1771         }
1772         buf.varint(uint64(slen))
1773         buf.pos += copy(buf.arr[buf.pos:], message[:slen])
1774
1775         traceReleaseBuffer(mp, pid)
1776 }
1777
1778 // the start PC of a goroutine for tracing purposes. If pc is a wrapper,
1779 // it returns the PC of the wrapped function. Otherwise it returns pc.
1780 func startPCforTrace(pc uintptr) uintptr {
1781         f := findfunc(pc)
1782         if !f.valid() {
1783                 return pc // may happen for locked g in extra M since its pc is 0.
1784         }
1785         w := funcdata(f, abi.FUNCDATA_WrapInfo)
1786         if w == nil {
1787                 return pc // not a wrapper
1788         }
1789         return f.datap.textAddr(*(*uint32)(w))
1790 }
1791
1792 // traceOneNewExtraM registers the fact that a new extra M was created with
1793 // the tracer. This matters if the M (which has an attached G) is used while
1794 // the trace is still active because if it is, we need the fact that it exists
1795 // to show up in the final trace.
1796 func traceOneNewExtraM(gp *g) {
1797         // Trigger two trace events for the locked g in the extra m,
1798         // since the next event of the g will be traceEvGoSysExit in exitsyscall,
1799         // while calling from C thread to Go.
1800         traceGoCreate(gp, 0) // no start pc
1801         gp.trace.seq++
1802         traceEvent(traceEvGoInSyscall, -1, gp.goid)
1803 }
1804
1805 // traceTime represents a timestamp for the trace.
1806 type traceTime uint64
1807
1808 // traceClockNow returns a monotonic timestamp. The clock this function gets
1809 // the timestamp from is specific to tracing, and shouldn't be mixed with other
1810 // clock sources.
1811 //
1812 // nosplit because it's called from exitsyscall, which is nosplit.
1813 //
1814 //go:nosplit
1815 func traceClockNow() traceTime {
1816         return traceTime(cputicks() / traceTimeDiv)
1817 }