import (
"internal/goarch"
+ "internal/goexperiment"
"internal/goos"
"runtime/internal/atomic"
"runtime/internal/math"
)
const (
- debugMalloc = false
-
maxTinySize = _TinySize
tinySizeClass = _TinySizeClass
maxSmallSize = _MaxSmallSize
pageShift = _PageShift
pageSize = _PageSize
- pageMask = _PageMask
- // By construction, single page spans of the smallest object class
- // have the most objects per span.
- maxObjsPerSpan = pageSize / 8
-
- concurrentSweep = _ConcurrentSweep
_PageSize = 1 << _PageShift
_PageMask = _PageSize - 1
// memory.
heapArenaBytes = 1 << logHeapArenaBytes
+ heapArenaWords = heapArenaBytes / goarch.PtrSize
+
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
- // heapArenaBitmapBytes is the size of each heap arena's bitmap.
- heapArenaBitmapBytes = heapArenaBytes / (goarch.PtrSize * 8 / 2)
+ // heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
+ heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)
pagesPerArena = heapArenaBytes / pageSize
//
// This should agree with minZeroPage in the compiler.
minLegalPointer uintptr = 4096
+
+ // minHeapForMetadataHugePages sets a threshold on when certain kinds of
+ // heap metadata, currently the arenas map L2 entries and page alloc bitmap
+ // mappings, are allowed to be backed by huge pages. If the heap goal ever
+ // exceeds this threshold, then huge pages are enabled.
+ //
+ // These numbers are chosen with the assumption that huge pages are on the
+ // order of a few MiB in size.
+ //
+ // The kind of metadata this applies to has a very low overhead when compared
+ // to address space used, but their constant overheads for small heaps would
+ // be very high if they were to be backed by huge pages (e.g. a few MiB makes
+ // a huge difference for an 8 MiB heap, but barely any difference for a 1 GiB
+ // heap). The benefit of huge pages is also not worth it for small heaps,
+ // because only a very, very small part of the metadata is used for small heaps.
+ //
+ // N.B. If the heap goal exceeds the threshold then shrinks to a very small size
+ // again, then huge pages will still be enabled for this mapping. The reason is that
+ // there's no point unless we're also returning the physical memory for these
+ // metadata mappings back to the OS. That would be quite complex to do in general
+ // as the heap is likely fragmented after a reduction in heap size.
+ minHeapForMetadataHugePages = 1 << 30
)
// physPageSize is the size in bytes of the OS's physical pages.
throw("bad TinySizeClass")
}
- if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+ if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
- throw("heapArenaBitmapBytes not a power of 2")
+ throw("heapArenaBitmapWords not a power of 2")
}
// Check physPageSize.
print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n")
throw("bad pagesPerReclaimerChunk")
}
+ if goexperiment.AllocHeaders {
+ // Check that the minimum size (exclusive) for a malloc header is also
+ // a size class boundary. This is important to making sure checks align
+ // across different parts of the runtime.
+ minSizeForMallocHeaderIsSizeClass := false
+ for i := 0; i < len(class_to_size); i++ {
+ if minSizeForMallocHeader == uintptr(class_to_size[i]) {
+ minSizeForMallocHeaderIsSizeClass = true
+ break
+ }
+ }
+ if !minSizeForMallocHeaderIsSizeClass {
+ throw("min size of malloc header is not a size class boundary")
+ }
+ // Check that the pointer bitmap for all small sizes without a malloc header
+ // fits in a word.
+ if minSizeForMallocHeader/goarch.PtrSize > 8*goarch.PtrSize {
+ throw("max pointer/scan bitmap size for headerless objects is too large")
+ }
+ }
+
+ if minTagBits > taggedPointerBits {
+ throw("taggedPointerbits too small")
+ }
// Initialize the heap.
mheap_.init()
mcache0 = allocmcache()
lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas)
- lockInit(&proflock, lockRankProf)
+ lockInit(&profInsertLock, lockRankProfInsert)
+ lockInit(&profBlockLock, lockRankProfBlock)
+ lockInit(&profMemActiveLock, lockRankProfMemActive)
+ for i := range profMemFutureLock {
+ lockInit(&profMemFutureLock[i], lockRankProfMemFuture)
+ }
lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
// Create initial arena growth hints.
//
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
+ //
+ // Space mapped for user arenas comes immediately after the range
+ // originally reserved for the regular heap when race mode is not
+ // enabled because user arena chunks can never be used for regular heap
+ // allocations and we want to avoid fragmenting the address space.
+ //
+ // In race mode we have no choice but to just use the same hints because
+ // the race detector requires that the heap be mapped contiguously.
for i := 0x7f; i >= 0; i-- {
var p uintptr
switch {
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
+ // Switch to generating hints for user arenas if we've gone
+ // through about half the hints. In race mode, take only about
+ // a quarter; we don't have very much space to work with.
+ hintList := &mheap_.arenaHints
+ if (!raceenabled && i > 0x3f) || (raceenabled && i > 0x5f) {
+ hintList = &mheap_.userArena.arenaHints
+ }
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
- hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+ hint.next, *hintList = *hintList, hint
}
} else {
// On a 32-bit machine, we're much more concerned
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
- }
+
+ // Place the hint for user arenas just after the large reservation.
+ //
+ // While this potentially competes with the hint above, in practice we probably
+ // aren't going to be getting this far anyway on 32-bit platforms.
+ userArenaHint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+ userArenaHint.addr = p
+ userArenaHint.next, mheap_.userArena.arenaHints = mheap_.userArena.arenaHints, userArenaHint
+ }
+ // Initialize the memory limit here because the allocator is going to look at it
+ // but we haven't called gcinit yet and we're definitely going to allocate memory before then.
+ gcController.memoryLimit.Store(maxInt64)
}
// sysAlloc allocates heap arena space for at least n bytes. The
// heapArenaBytes. sysAlloc returns nil on failure.
// There is no corresponding free function.
//
+// hintList is a list of hint addresses for where to allocate new
+// heap arenas. It must be non-nil.
+//
+// register indicates whether the heap arena should be registered
+// in allArenas.
+//
// sysAlloc returns a memory region in the Reserved state. This region must
// be transitioned to Prepared and then Ready before use.
//
// h must be locked.
-func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+func (h *mheap) sysAlloc(n uintptr, hintList **arenaHint, register bool) (v unsafe.Pointer, size uintptr) {
assertLockHeld(&h.lock)
n = alignUp(n, heapArenaBytes)
- // First, try the arena pre-reservation.
- // Newly-used mappings are considered released.
- v = h.arena.alloc(n, heapArenaBytes, &gcController.heapReleased)
- if v != nil {
- size = n
- goto mapped
+ if hintList == &h.arenaHints {
+ // First, try the arena pre-reservation.
+ // Newly-used mappings are considered released.
+ //
+ // Only do this if we're using the regular heap arena hints.
+ // This behavior is only for the heap.
+ v = h.arena.alloc(n, heapArenaBytes, &gcController.heapReleased)
+ if v != nil {
+ size = n
+ goto mapped
+ }
}
// Try to grow the heap at a hint address.
- for h.arenaHints != nil {
- hint := h.arenaHints
+ for *hintList != nil {
+ hint := *hintList
p := hint.addr
if hint.down {
p -= n
if v != nil {
sysFreeOS(v, n)
}
- h.arenaHints = hint.next
+ *hintList = hint.next
h.arenaHintAlloc.free(unsafe.Pointer(hint))
}
if l2 == nil {
throw("out of memory allocating heap arena map")
}
+ if h.arenasHugePages {
+ sysHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ } else {
+ sysNoHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ }
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
}
}
}
- // Add the arena to the arenas list.
- if len(h.allArenas) == cap(h.allArenas) {
- size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
- if size == 0 {
- size = physPageSize
- }
- newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
- if newArray == nil {
- throw("out of memory allocating allArenas")
+ // Register the arena in allArenas if requested.
+ if register {
+ if len(h.allArenas) == cap(h.allArenas) {
+ size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
+ if size == 0 {
+ size = physPageSize
+ }
+ newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
+ if newArray == nil {
+ throw("out of memory allocating allArenas")
+ }
+ oldSlice := h.allArenas
+ *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
+ copy(h.allArenas, oldSlice)
+ // Do not free the old backing array because
+ // there may be concurrent readers. Since we
+ // double the array each time, this can lead
+ // to at most 2x waste.
}
- oldSlice := h.allArenas
- *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
- copy(h.allArenas, oldSlice)
- // Do not free the old backing array because
- // there may be concurrent readers. Since we
- // double the array each time, this can lead
- // to at most 2x waste.
+ h.allArenas = h.allArenas[:len(h.allArenas)+1]
+ h.allArenas[len(h.allArenas)-1] = ri
}
- h.allArenas = h.allArenas[:len(h.allArenas)+1]
- h.allArenas[len(h.allArenas)-1] = ri
// Store atomically just in case an object from the
// new heap arena becomes visible before the heap lock
case p == 0:
return nil, 0
case p&(align-1) == 0:
- // We got lucky and got an aligned region, so we can
- // use the whole thing.
return unsafe.Pointer(p), size + align
case GOOS == "windows":
// On Windows we can't release pieces of a
}
}
+// enableMetadataHugePages enables huge pages for various sources of heap metadata.
+//
+// A note on latency: for sufficiently small heaps (<10s of GiB) this function will take constant
+// time, but may take time proportional to the size of the mapped heap beyond that.
+//
+// This function is idempotent.
+//
+// The heap lock must not be held over this operation, since it will briefly acquire
+// the heap lock.
+func (h *mheap) enableMetadataHugePages() {
+ // Enable huge pages for page structure.
+ h.pages.enableChunkHugePages()
+
+ // Grab the lock and set arenasHugePages if it's not.
+ //
+ // Once arenasHugePages is set, all new L2 entries will be eligible for
+ // huge pages. We'll set all the old entries after we release the lock.
+ lock(&h.lock)
+ if h.arenasHugePages {
+ unlock(&h.lock)
+ return
+ }
+ h.arenasHugePages = true
+ unlock(&h.lock)
+
+ // N.B. The arenas L1 map is quite small on all platforms, so it's fine to
+ // just iterate over the whole thing.
+ for i := range h.arenas {
+ l2 := (*[1 << arenaL2Bits]*heapArena)(atomic.Loadp(unsafe.Pointer(&h.arenas[i])))
+ if l2 == nil {
+ continue
+ }
+ sysHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ }
+}
+
// base address for all 0-byte allocations
var zerobase uintptr
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
- theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
+ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
- result := s.freeindex + uintptr(theBit)
+ result := s.freeindex + uint16(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
s.allocCache >>= uint(theBit + 1)
s.freeindex = freeidx
s.allocCount++
- return gclinkptr(result*s.elemsize + s.base())
+ return gclinkptr(uintptr(result)*s.elemsize + s.base())
}
}
return 0
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
- if uintptr(s.allocCount) != s.nelems {
+ if s.allocCount != s.nelems {
println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount != s.nelems && freeIndex == s.nelems")
}
throw("freeIndex is not valid")
}
- v = gclinkptr(freeIndex*s.elemsize + s.base())
+ v = gclinkptr(uintptr(freeIndex)*s.elemsize + s.base())
s.allocCount++
- if uintptr(s.allocCount) > s.nelems {
+ if s.allocCount > s.nelems {
println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount > s.nelems")
}
if size == 0 {
return unsafe.Pointer(&zerobase)
}
+
+ // It's possible for any malloc to trigger sweeping, which may in
+ // turn queue finalizers. Record this dynamic lock edge.
+ lockRankMayQueueFinalizer()
+
userSize := size
if asanenabled {
// Refer to ASAN runtime library, the malloc() function allocates extra memory,
// assistG is the G to charge for this allocation, or nil if
// GC is not currently active.
- var assistG *g
- if gcBlackenEnabled != 0 {
- // Charge the current user G for this allocation.
- assistG = getg()
- if assistG.m.curg != nil {
- assistG = assistG.m.curg
- }
- // Charge the allocation against the G. We'll account
- // for internal fragmentation at the end of mallocgc.
- assistG.gcAssistBytes -= int64(size)
-
- if assistG.gcAssistBytes < 0 {
- // This G is in debt. Assist the GC to correct
- // this before allocating. This must happen
- // before disabling preemption.
- gcAssistAlloc(assistG)
- }
- }
+ assistG := deductAssistCredit(size)
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
throw("mallocgc called without a P or outside bootstrapping")
}
var span *mspan
+ var header **_type
var x unsafe.Pointer
- noscan := typ == nil || typ.ptrdata == 0
+ noscan := typ == nil || typ.PtrBytes == 0
// In some cases block zeroing can profitably (for latency reduction purposes)
// be delayed till preemption is possible; delayedZeroing tracks that state.
delayedZeroing := false
- if size <= maxSmallSize {
+ // Determine if it's a 'small' object that goes into a size-classed span.
+ //
+ // Note: This comparison looks a little strange, but it exists to smooth out
+ // the crossover between the largest size class and large objects that have
+ // their own spans. The small window of object sizes between maxSmallSize-mallocHeaderSize
+ // and maxSmallSize will be considered large, even though they might fit in
+ // a size class. In practice this is completely fine, since the largest small
+ // size class has a single object in it already, precisely to make the transition
+ // to large objects smooth.
+ if size <= maxSmallSize-mallocHeaderSize {
if noscan && size < maxTinySize {
// Tiny allocator.
//
}
size = maxTinySize
} else {
+ hasHeader := !noscan && !heapBitsInSpan(size)
+ if goexperiment.AllocHeaders && hasHeader {
+ size += mallocHeaderSize
+ }
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
- memclrNoHeapPointers(unsafe.Pointer(v), size)
+ memclrNoHeapPointers(x, size)
+ }
+ if goexperiment.AllocHeaders && hasHeader {
+ header = (**_type)(x)
+ x = add(x, mallocHeaderSize)
+ size -= mallocHeaderSize
}
}
} else {
delayedZeroing = true
} else {
memclrNoHeapPointers(x, size)
- // We've in theory cleared almost the whole span here,
- // and could take the extra step of actually clearing
- // the whole thing. However, don't. Any GC bits for the
- // uncleared parts will be zero, and it's just going to
- // be needzero = 1 once freed anyway.
}
}
+ if goexperiment.AllocHeaders && !noscan {
+ header = &span.largeType
+ }
}
-
- var scanSize uintptr
if !noscan {
- heapBitsSetType(uintptr(x), size, dataSize, typ)
- if dataSize > typ.size {
- // Array allocation. If there are any
- // pointers, GC has to scan to the last
- // element.
- if typ.ptrdata != 0 {
- scanSize = dataSize - typ.size + typ.ptrdata
- }
+ if goexperiment.AllocHeaders {
+ c.scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
} else {
- scanSize = typ.ptrdata
+ var scanSize uintptr
+ heapBitsSetType(uintptr(x), size, dataSize, typ)
+ if dataSize > typ.Size_ {
+ // Array allocation. If there are any
+ // pointers, GC has to scan to the last
+ // element.
+ if typ.PtrBytes != 0 {
+ scanSize = dataSize - typ.Size_ + typ.PtrBytes
+ }
+ } else {
+ scanSize = typ.PtrBytes
+ }
+ c.scanAlloc += scanSize
}
- c.scanAlloc += scanSize
}
// Ensure that the stores above that initialize x to
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
+ // As x and the heap bits are initialized, update
+ // freeIndexForScan now so x is seen by the GC
+ // (including conservative scan) as an allocated object.
+ // While this pointer can't escape into user code as a
+ // _live_ pointer until we return, conservative scanning
+ // may find a dead pointer that happens to point into this
+ // object. Delaying this update until now ensures that
+ // conservative scanning considers this pointer dead until
+ // this point.
+ span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if gcphase != _GCoff {
- gcmarknewobject(span, uintptr(x), size, scanSize)
+ // Pass the full size of the allocation to the number of bytes
+ // marked.
+ //
+ // If !goexperiment.AllocHeaders, "size" doesn't include the
+ // allocation header, so use span.elemsize unconditionally.
+ gcmarknewobject(span, uintptr(x), span.elemsize)
}
if raceenabled {
if !noscan {
throw("delayed zeroing on data that may contain pointers")
}
+ if goexperiment.AllocHeaders && header != nil {
+ throw("unexpected malloc header in delayed zeroing of large object")
+ }
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
}
return x
}
+// deductAssistCredit reduces the current G's assist credit
+// by size bytes, and assists the GC if necessary.
+//
+// Caller must be preemptible.
+//
+// Returns the G for which the assist credit was accounted.
+func deductAssistCredit(size uintptr) *g {
+ var assistG *g
+ if gcBlackenEnabled != 0 {
+ // Charge the current user G for this allocation.
+ assistG = getg()
+ if assistG.m.curg != nil {
+ assistG = assistG.m.curg
+ }
+ // Charge the allocation against the G. We'll account
+ // for internal fragmentation at the end of mallocgc.
+ assistG.gcAssistBytes -= int64(size)
+
+ if assistG.gcAssistBytes < 0 {
+ // This G is in debt. Assist the GC to correct
+ // this before allocating. This must happen
+ // before disabling preemption.
+ gcAssistAlloc(assistG)
+ }
+ }
+ return assistG
+}
+
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also
// implementation of new builtin
// compiler (both frontend and SSA backend) knows the signature
-// of this function
+// of this function.
func newobject(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
//go:linkname reflect_unsafe_New reflect.unsafe_New
func reflect_unsafe_New(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
//go:linkname reflectlite_unsafe_New internal/reflectlite.unsafe_New
func reflectlite_unsafe_New(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
// newarray allocates an array of n elements of type typ.
func newarray(typ *_type, n int) unsafe.Pointer {
if n == 1 {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
- mem, overflow := math.MulUintptr(typ.size, uintptr(n))
+ mem, overflow := math.MulUintptr(typ.Size_, uintptr(n))
if overflow || mem > maxAlloc || n < 0 {
panic(plainError("runtime: allocation size out of range"))
}
}
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
- if g := getg(); g == g.m.gsignal {
+ if gp := getg(); gp == gp.m.gsignal {
return nextSampleNoFP()
}
}
// The returned memory will be zeroed.
// sysStat must be non-nil.
//
-// Consider marking persistentalloc'd types go:notinheap.
+// Consider marking persistentalloc'd types not in heap by embedding
+// runtime/internal/sys.NotInHeap.
func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
var p *notInHeap
systemstack(func() {
// notInHeap is off-heap memory allocated by a lower-level allocator
// like sysAlloc or persistentAlloc.
//
-// In general, it's better to use real types marked as go:notinheap,
-// but this serves as a generic type for situations where that isn't
-// possible (like in the allocators).
+// In general, it's better to use real types which embed
+// runtime/internal/sys.NotInHeap, but this serves as a generic type
+// for situations where that isn't possible (like in the allocators).
//
// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
-//
-//go:notinheap
-type notInHeap struct{}
+type notInHeap struct{ _ sys.NotInHeap }
func (p *notInHeap) add(bytes uintptr) *notInHeap {
return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))