1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:build goexperiment.exectracer2
7 // CPU profile -> trace
11 // traceInitReadCPU initializes CPU profile -> tracer state for tracing.
13 // Returns a profBuf for reading from.
14 func traceInitReadCPU() {
16 throw("traceInitReadCPU called with trace enabled")
18 // Create new profBuf for CPU samples that will be emitted as events.
19 profBuf := newProfBuf(3, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid, mp.procid]
20 trace.cpuLogRead = profBuf
21 // We must not acquire trace.signalLock outside of a signal handler: a
22 // profiling signal may arrive at any time and try to acquire it, leading to
23 // deadlock. Because we can't use that lock to protect updates to
24 // trace.cpuLogWrite (only use of the structure it references), reads and
25 // writes of the pointer must be atomic. (And although this field is never
26 // the sole pointer to the profBuf value, it's best to allow a write barrier
28 trace.cpuLogWrite.Store(profBuf)
31 // traceStartReadCPU creates a goroutine to start reading CPU profile
32 // data into an active trace.
34 // traceAdvanceSema must be held.
35 func traceStartReadCPU() {
37 throw("traceStartReadCPU called with trace disabled")
39 // Spin up the logger goroutine.
40 trace.cpuSleep = newWakeableSleep()
41 done := make(chan struct{}, 1)
44 // Sleep here because traceReadCPU is non-blocking. This mirrors
45 // how the runtime/pprof package obtains CPU profile data.
47 // We can't do a blocking read here because Darwin can't do a
48 // wakeup from a signal handler, so all CPU profiling is just
49 // non-blocking. See #61768 for more details.
51 // Like the runtime/pprof package, even if that bug didn't exist
52 // we would still want to do a goroutine-level sleep in between
53 // reads to avoid frequent wakeups.
54 trace.cpuSleep.sleep(100_000_000)
55 if !traceReadCPU(trace.cpuLogRead) {
61 trace.cpuLogDone = done
64 // traceStopReadCPU blocks until the trace CPU reading goroutine exits.
66 // traceAdvanceSema must be held, and tracing must be disabled.
67 func traceStopReadCPU() {
69 throw("traceStopReadCPU called with trace enabled")
72 // Once we close the profbuf, we'll be in one of two situations:
73 // - The logger goroutine has already exited because it observed
74 // that the trace is disabled.
75 // - The logger goroutine is asleep.
77 // Wake the goroutine so it can observe that their the buffer is
79 trace.cpuLogWrite.Store(nil)
80 trace.cpuLogRead.close()
83 // Wait until the logger goroutine exits.
86 // Clear state for the next trace.
87 trace.cpuLogDone = nil
88 trace.cpuLogRead = nil
89 trace.cpuSleep.close()
92 // traceReadCPU attempts to read from the provided profBuf and write
93 // into the trace. Returns true if there might be more to read or false
94 // if the profBuf is closed or the caller should otherwise stop reading.
96 // No more than one goroutine may be in traceReadCPU for the same
98 func traceReadCPU(pb *profBuf) bool {
99 var pcBuf [traceStackSize]uintptr
101 data, tags, eof := pb.read(profBufNonBlocking)
103 if len(data) < 4 || data[0] > uint64(len(data)) {
104 break // truncated profile
106 if data[0] < 4 || tags != nil && len(tags) < 1 {
107 break // malformed profile
110 break // mismatched profile records and tags
113 // Deserialize the data in the profile buffer.
117 if hasP := (data[2] & 0b1) != 0; !hasP {
122 stk := data[5:recordLen]
124 // Overflow records always have their headers contain
126 isOverflowRecord := len(stk) == 1 && data[2] == 0 && data[3] == 0 && data[4] == 0
128 // Move the data iterator forward.
129 data = data[recordLen:]
130 // No support here for reporting goroutine tags at the moment; if
131 // that information is to be part of the execution trace, we'd
132 // probably want to see when the tags are applied and when they
133 // change, instead of only seeing them when we get a CPU sample.
136 if isOverflowRecord {
137 // Looks like an overflow record from the profBuf. Not much to
138 // do here, we only want to report full records.
142 // Construct the stack for insertion to the stack table.
144 pcBuf[0] = logicalStackSentinel
145 for ; nstk < len(pcBuf) && nstk-1 < len(stk); nstk++ {
146 pcBuf[nstk] = uintptr(stk[nstk-1])
149 // Write out a trace event.
152 // Tracing disabled, exit without continuing.
155 w := unsafeTraceWriter(tl.gen, trace.cpuBuf[tl.gen%2])
157 // Ensure we have a place to write to.
159 w, flushed = w.ensure(2 + 5*traceBytesPerNumber /* traceEvCPUSamples + traceEvCPUSample + timestamp + g + m + p + stack ID */)
161 // Annotate the batch as containing strings.
162 w.byte(byte(traceEvCPUSamples))
165 // Add the stack to the table.
166 stackID := trace.stackTab[tl.gen%2].put(pcBuf[:nstk])
168 // Write out the CPU sample.
169 w.byte(byte(traceEvCPUSample))
176 trace.cpuBuf[tl.gen%2] = w.traceBuf
182 // traceCPUFlush flushes trace.cpuBuf[gen%2]. The caller must be certain that gen
183 // has completed and that there are no more writers to it.
185 // Must run on the systemstack because it flushes buffers and acquires trace.lock
189 func traceCPUFlush(gen uintptr) {
190 if buf := trace.cpuBuf[gen%2]; buf != nil {
192 traceBufFlush(buf, gen)
194 trace.cpuBuf[gen%2] = nil
198 // traceCPUSample writes a CPU profile sample stack to the execution tracer's
199 // profiling buffer. It is called from a signal handler, so is limited in what
201 func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
203 // Tracing is usually turned off; don't spend time acquiring the signal
204 // lock unless it's active.
208 now := traceClockNow()
209 // The "header" here is the ID of the M that was running the profiled code,
210 // followed by the IDs of the P and goroutine. (For normal CPU profiling, it's
211 // usually the number of samples with the given stack.) Near syscalls, pp
212 // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp.
215 // Overflow records in profBuf have all header values set to zero. Make
216 // sure that real headers have at least one bit set.
217 hdr[0] = uint64(pp.id)<<1 | 0b1
225 hdr[2] = uint64(mp.procid)
228 // Allow only one writer at a time
229 for !trace.signalLock.CompareAndSwap(0, 1) {
230 // TODO: Is it safe to osyield here? https://go.dev/issue/52672
234 if log := trace.cpuLogWrite.Load(); log != nil {
235 // Note: we don't pass a tag pointer here (how should profiling tags
236 // interact with the execution tracer?), but if we did we'd need to be
237 // careful about write barriers. See the long comment in profBuf.write.
238 log.write(nil, int64(now), hdr[:], stk)
241 trace.signalLock.Store(0)