package runtime
import (
- "runtime/internal/atomic"
+ "internal/abi"
"runtime/internal/sys"
"unsafe"
)
-const maxCPUProfStack = 64
+const (
+ maxCPUProfStack = 64
+
+ // profBufWordCount is the size of the CPU profile buffer's storage for the
+ // header and stack of each sample, measured in 64-bit words. Every sample
+ // has a required header of two words. With a small additional header (a
+ // word or two) and stacks at the profiler's maximum length of 64 frames,
+ // that capacity can support 1900 samples or 19 thread-seconds at a 100 Hz
+ // sample rate, at a cost of 1 MiB.
+ profBufWordCount = 1 << 17
+ // profBufTagCount is the size of the CPU profile buffer's storage for the
+ // goroutine tags associated with each sample. A capacity of 1<<14 means
+ // room for 16k samples, or 160 thread-seconds at a 100 Hz sample rate.
+ profBufTagCount = 1 << 14
+)
type cpuProfile struct {
lock mutex
// 300 words per second.
// Hopefully a normal Go thread will get the profiling
// signal at least once every few seconds.
- extra [1000]uintptr
- numExtra int
- lostExtra uint64 // count of frames lost because extra is full
+ extra [1000]uintptr
+ numExtra int
+ lostExtra uint64 // count of frames lost because extra is full
+ lostAtomic uint64 // count of frames lost because of being in atomic64 on mips/arm; updated racily
}
var cpuprof cpuProfile
}
cpuprof.on = true
- cpuprof.log = newProfBuf(1, 1<<17, 1<<14)
+ cpuprof.log = newProfBuf(1, profBufWordCount, profBufTagCount)
hdr := [1]uint64{uint64(hz)}
cpuprof.log.write(nil, nanotime(), hdr[:], nil)
setcpuprofilerate(int32(hz))
// and cannot allocate memory or acquire locks that might be
// held at the time of the signal, nor can it use substantial amounts
// of stack.
+//
//go:nowritebarrierrec
-func (p *cpuProfile) add(gp *g, stk []uintptr) {
+func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
// Simple cas-lock to coordinate with setcpuprofilerate.
- for !atomic.Cas(&prof.signalLock, 0, 1) {
+ for !prof.signalLock.CompareAndSwap(0, 1) {
+ // TODO: Is it safe to osyield here? https://go.dev/issue/52672
osyield()
}
- if prof.hz != 0 { // implies cpuprof.log != nil
- if p.numExtra > 0 || p.lostExtra > 0 {
+ if prof.hz.Load() != 0 { // implies cpuprof.log != nil
+ if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 {
p.addExtra()
}
hdr := [1]uint64{1}
// because otherwise its write barrier behavior may not
// be correct. See the long comment there before
// changing the argument here.
- cpuprof.log.write(&gp.labels, nanotime(), hdr[:], stk)
+ cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
}
- atomic.Store(&prof.signalLock, 0)
+ prof.signalLock.Store(0)
}
// addNonGo adds the non-Go stack trace to the profile.
// Instead, we copy the stack into cpuprof.extra,
// which will be drained the next time a Go thread
// gets the signal handling event.
+//
//go:nosplit
//go:nowritebarrierrec
func (p *cpuProfile) addNonGo(stk []uintptr) {
// Simple cas-lock to coordinate with SetCPUProfileRate.
// (Other calls to add or addNonGo should be blocked out
// by the fact that only one SIGPROF can be handled by the
- // process at a time. If not, this lock will serialize those too.)
- for !atomic.Cas(&prof.signalLock, 0, 1) {
+ // process at a time. If not, this lock will serialize those too.
+ // The use of timer_create(2) on Linux to request process-targeted
+ // signals may have changed this.)
+ for !prof.signalLock.CompareAndSwap(0, 1) {
+ // TODO: Is it safe to osyield here? https://go.dev/issue/52672
osyield()
}
cpuprof.lostExtra++
}
- atomic.Store(&prof.signalLock, 0)
+ prof.signalLock.Store(0)
}
// addExtra adds the "extra" profiling events,
if p.lostExtra > 0 {
hdr := [1]uint64{p.lostExtra}
lostStk := [2]uintptr{
- funcPC(_LostExternalCode) + sys.PCQuantum,
- funcPC(_ExternalCode) + sys.PCQuantum,
+ abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum,
+ abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
}
- cpuprof.log.write(nil, 0, hdr[:], lostStk[:])
+ p.log.write(nil, 0, hdr[:], lostStk[:])
+ p.lostExtra = 0
}
+
+ if p.lostAtomic > 0 {
+ hdr := [1]uint64{p.lostAtomic}
+ lostStk := [2]uintptr{
+ abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum,
+ abi.FuncPCABIInternal(_System) + sys.PCQuantum,
+ }
+ p.log.write(nil, 0, hdr[:], lostStk[:])
+ p.lostAtomic = 0
+ }
+
}
// CPUProfile panics.
// The details of generating that format have changed,
// so this functionality has been removed.
//
-// Deprecated: use the runtime/pprof package,
-// or the handlers in the net/http/pprof package,
+// Deprecated: Use the [runtime/pprof] package,
+// or the handlers in the [net/http/pprof] package,
// or the testing package's -test.cpuprofile flag instead.
func CPUProfile() []byte {
panic("CPUProfile no longer available")
//go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
func runtime_pprof_runtime_cyclesPerSecond() int64 {
- return tickspersecond()
+ return ticksPerSecond()
}
// readProfile, provided to runtime/pprof, returns the next chunk of
// If profiling is turned off and all the profile data accumulated while it was
// on has been returned, readProfile returns eof=true.
// The caller must save the returned data and tags before calling readProfile again.
+// The returned data contains a whole number of records, and tags contains
+// exactly one entry per record.
//
//go:linkname runtime_pprof_readProfile runtime/pprof.readProfile
func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) {
lock(&cpuprof.lock)
log := cpuprof.log
unlock(&cpuprof.lock)
- data, tags, eof := log.read(profBufBlocking)
+ readMode := profBufBlocking
+ if GOOS == "darwin" || GOOS == "ios" {
+ readMode = profBufNonBlocking // For #61768; on Darwin notes are not async-signal-safe. See sigNoteSetup in os_darwin.go.
+ }
+ data, tags, eof := log.read(readMode)
if len(data) == 0 && eof {
lock(&cpuprof.lock)
cpuprof.log = nil