package runtime
import (
+ "internal/goarch"
+ "internal/goexperiment"
+ "internal/goos"
"runtime/internal/atomic"
"runtime/internal/math"
"runtime/internal/sys"
)
const (
- debugMalloc = false
-
maxTinySize = _TinySize
tinySizeClass = _TinySizeClass
maxSmallSize = _MaxSmallSize
pageShift = _PageShift
pageSize = _PageSize
- pageMask = _PageMask
- // By construction, single page spans of the smallest object class
- // have the most objects per span.
- maxObjsPerSpan = pageSize / 8
-
- concurrentSweep = _ConcurrentSweep
_PageSize = 1 << _PageShift
_PageMask = _PageSize - 1
// windows/32 | 4KB | 3
// windows/64 | 8KB | 2
// plan9 | 4KB | 3
- _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
+ _NumStackOrders = 4 - goarch.PtrSize/4*goos.IsWindows - 1*goos.IsPlan9
// heapAddrBits is the number of bits in a heap address. On
// amd64, addresses are sign-extended beyond heapAddrBits. On
// we further limit it to 31 bits.
//
// On ios/arm64, although 64-bit pointers are presumably
- // available, pointers are truncated to 33 bits. Furthermore,
- // only the top 4 GiB of the address space are actually available
- // to the application, but we allow the whole 33 bits anyway for
- // simplicity.
- // TODO(mknyszek): Consider limiting it to 32 bits and using
- // arenaBaseOffset to offset into the top 4 GiB.
+ // available, pointers are truncated to 33 bits in iOS <14.
+ // Furthermore, only the top 4 GiB of the address space are
+ // actually available to the application. In iOS >=14, more
+ // of the address space is available, and the OS can now
+ // provide addresses outside of those 33 bits. Pick 40 bits
+ // as a reasonable balance between address space usage by the
+ // page allocator, and flexibility for what mmap'd regions
+ // we'll accept for the heap. We can't just move to the full
+ // 48 bits because this uses too much address space for older
+ // iOS versions.
+ // TODO(mknyszek): Once iOS <14 is deprecated, promote ios/arm64
+ // to a 48-bit address space like every other arm64 platform.
//
// WebAssembly currently has a limit of 4GB linear memory.
- heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosIos*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosIos*sys.GoarchArm64
+ heapAddrBits = (_64bit*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64))*48 + (1-_64bit+goarch.IsWasm)*(32-(goarch.IsMips+goarch.IsMipsle)) + 40*goos.IsIos*goarch.IsArm64
// maxAlloc is the maximum size of an allocation. On 64-bit,
// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
// memory.
heapArenaBytes = 1 << logHeapArenaBytes
+ heapArenaWords = heapArenaBytes / goarch.PtrSize
+
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
- logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoarchWasm)*(1-sys.GoosIos*sys.GoarchArm64)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (2+20)*sys.GoarchWasm + (2+20)*sys.GoosIos*sys.GoarchArm64
+ logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
- // heapArenaBitmapBytes is the size of each heap arena's bitmap.
- heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
+ // heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
+ heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)
pagesPerArena = heapArenaBytes / pageSize
// We use the L1 map on 64-bit Windows because the arena size
// is small, but the address space is still 48 bits, and
// there's a high cost to having a large L2.
- arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
+ arenaL1Bits = 6 * (_64bit * goos.IsWindows)
// arenaL2Bits is the number of bits of the arena number
// covered by the second level arena index.
//
// On other platforms, the user address space is contiguous
// and starts at 0, so no offset is necessary.
- arenaBaseOffset = 0xffff800000000000*sys.GoarchAmd64 + 0x0a00000000000000*sys.GoosAix
+ arenaBaseOffset = 0xffff800000000000*goarch.IsAmd64 + 0x0a00000000000000*goos.IsAix
// A typed version of this constant that will make it into DWARF (for viewcore).
arenaBaseOffsetUintptr = uintptr(arenaBaseOffset)
//
// This should agree with minZeroPage in the compiler.
minLegalPointer uintptr = 4096
+
+ // minHeapForMetadataHugePages sets a threshold on when certain kinds of
+ // heap metadata, currently the arenas map L2 entries and page alloc bitmap
+ // mappings, are allowed to be backed by huge pages. If the heap goal ever
+ // exceeds this threshold, then huge pages are enabled.
+ //
+ // These numbers are chosen with the assumption that huge pages are on the
+ // order of a few MiB in size.
+ //
+ // The kind of metadata this applies to has a very low overhead when compared
+ // to address space used, but their constant overheads for small heaps would
+ // be very high if they were to be backed by huge pages (e.g. a few MiB makes
+ // a huge difference for an 8 MiB heap, but barely any difference for a 1 GiB
+ // heap). The benefit of huge pages is also not worth it for small heaps,
+ // because only a very, very small part of the metadata is used for small heaps.
+ //
+ // N.B. If the heap goal exceeds the threshold then shrinks to a very small size
+ // again, then huge pages will still be enabled for this mapping. The reason is that
+ // there's no point unless we're also returning the physical memory for these
+ // metadata mappings back to the OS. That would be quite complex to do in general
+ // as the heap is likely fragmented after a reduction in heap size.
+ minHeapForMetadataHugePages = 1 << 30
)
// physPageSize is the size in bytes of the OS's physical pages.
physHugePageShift uint
)
-// OS memory management abstraction layer
-//
-// Regions of the address space managed by the runtime may be in one of four
-// states at any given time:
-// 1) None - Unreserved and unmapped, the default state of any region.
-// 2) Reserved - Owned by the runtime, but accessing it would cause a fault.
-// Does not count against the process' memory footprint.
-// 3) Prepared - Reserved, intended not to be backed by physical memory (though
-// an OS may implement this lazily). Can transition efficiently to
-// Ready. Accessing memory in such a region is undefined (may
-// fault, may give back unexpected zeroes, etc.).
-// 4) Ready - may be accessed safely.
-//
-// This set of states is more than is strictly necessary to support all the
-// currently supported platforms. One could get by with just None, Reserved, and
-// Ready. However, the Prepared state gives us flexibility for performance
-// purposes. For example, on POSIX-y operating systems, Reserved is usually a
-// private anonymous mmap'd region with PROT_NONE set, and to transition
-// to Ready would require setting PROT_READ|PROT_WRITE. However the
-// underspecification of Prepared lets us use just MADV_FREE to transition from
-// Ready to Prepared. Thus with the Prepared state we can set the permission
-// bits just once early on, we can efficiently tell the OS that it's free to
-// take pages away from us when we don't strictly need them.
-//
-// For each OS there is a common set of helpers defined that transition
-// memory regions between these states. The helpers are as follows:
-//
-// sysAlloc transitions an OS-chosen region of memory from None to Ready.
-// More specifically, it obtains a large chunk of zeroed memory from the
-// operating system, typically on the order of a hundred kilobytes
-// or a megabyte. This memory is always immediately available for use.
-//
-// sysFree transitions a memory region from any state to None. Therefore, it
-// returns memory unconditionally. It is used if an out-of-memory error has been
-// detected midway through an allocation or to carve out an aligned section of
-// the address space. It is okay if sysFree is a no-op only if sysReserve always
-// returns a memory region aligned to the heap allocator's alignment
-// restrictions.
-//
-// sysReserve transitions a memory region from None to Reserved. It reserves
-// address space in such a way that it would cause a fatal fault upon access
-// (either via permissions or not committing the memory). Such a reservation is
-// thus never backed by physical memory.
-// If the pointer passed to it is non-nil, the caller wants the
-// reservation there, but sysReserve can still choose another
-// location if that one is unavailable.
-// NOTE: sysReserve returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysReserve.
-//
-// sysMap transitions a memory region from Reserved to Prepared. It ensures the
-// memory region can be efficiently transitioned to Ready.
-//
-// sysUsed transitions a memory region from Prepared to Ready. It notifies the
-// operating system that the memory region is needed and ensures that the region
-// may be safely accessed. This is typically a no-op on systems that don't have
-// an explicit commit step and hard over-commit limits, but is critical on
-// Windows, for example.
-//
-// sysUnused transitions a memory region from Ready to Prepared. It notifies the
-// operating system that the physical pages backing this memory region are no
-// longer needed and can be reused for other purposes. The contents of a
-// sysUnused memory region are considered forfeit and the region must not be
-// accessed again until sysUsed is called.
-//
-// sysFault transitions a memory region from Ready or Prepared to Reserved. It
-// marks a region such that it will always fault if accessed. Used only for
-// debugging the runtime.
-
func mallocinit() {
if class_to_size[_TinySizeClass] != _TinySize {
throw("bad TinySizeClass")
}
- if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+ if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
- throw("heapArenaBitmapBytes not a power of 2")
- }
-
- // Copy class sizes out for statistics table.
- for i := range class_to_size {
- memstats.by_size[i].size = uint32(class_to_size[i])
+ throw("heapArenaBitmapWords not a power of 2")
}
// Check physPageSize.
print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n")
throw("bad pagesPerReclaimerChunk")
}
+ if goexperiment.AllocHeaders {
+ // Check that the minimum size (exclusive) for a malloc header is also
+ // a size class boundary. This is important to making sure checks align
+ // across different parts of the runtime.
+ minSizeForMallocHeaderIsSizeClass := false
+ for i := 0; i < len(class_to_size); i++ {
+ if minSizeForMallocHeader == uintptr(class_to_size[i]) {
+ minSizeForMallocHeaderIsSizeClass = true
+ break
+ }
+ }
+ if !minSizeForMallocHeaderIsSizeClass {
+ throw("min size of malloc header is not a size class boundary")
+ }
+ // Check that the pointer bitmap for all small sizes without a malloc header
+ // fits in a word.
+ if minSizeForMallocHeader/goarch.PtrSize > 8*goarch.PtrSize {
+ throw("max pointer/scan bitmap size for headerless objects is too large")
+ }
+ }
+
+ if minTagBits > taggedPointerBits {
+ throw("taggedPointerbits too small")
+ }
// Initialize the heap.
mheap_.init()
mcache0 = allocmcache()
lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas)
- lockInit(&proflock, lockRankProf)
+ lockInit(&profInsertLock, lockRankProfInsert)
+ lockInit(&profBlockLock, lockRankProfBlock)
+ lockInit(&profMemActiveLock, lockRankProfMemActive)
+ for i := range profMemFutureLock {
+ lockInit(&profMemFutureLock[i], lockRankProfMemFuture)
+ }
lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
// Create initial arena growth hints.
- if sys.PtrSize == 8 {
+ if goarch.PtrSize == 8 {
// On a 64-bit machine, we pick the following hints
// because:
//
//
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
+ //
+ // Space mapped for user arenas comes immediately after the range
+ // originally reserved for the regular heap when race mode is not
+ // enabled because user arena chunks can never be used for regular heap
+ // allocations and we want to avoid fragmenting the address space.
+ //
+ // In race mode we have no choice but to just use the same hints because
+ // the race detector requires that the heap be mapped contiguously.
for i := 0x7f; i >= 0; i-- {
var p uintptr
switch {
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
+ // Switch to generating hints for user arenas if we've gone
+ // through about half the hints. In race mode, take only about
+ // a quarter; we don't have very much space to work with.
+ hintList := &mheap_.arenaHints
+ if (!raceenabled && i > 0x3f) || (raceenabled && i > 0x5f) {
+ hintList = &mheap_.userArena.arenaHints
+ }
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
- hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+ hint.next, *hintList = *hintList, hint
}
} else {
// On a 32-bit machine, we're much more concerned
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
- }
+
+ // Place the hint for user arenas just after the large reservation.
+ //
+ // While this potentially competes with the hint above, in practice we probably
+ // aren't going to be getting this far anyway on 32-bit platforms.
+ userArenaHint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+ userArenaHint.addr = p
+ userArenaHint.next, mheap_.userArena.arenaHints = mheap_.userArena.arenaHints, userArenaHint
+ }
+ // Initialize the memory limit here because the allocator is going to look at it
+ // but we haven't called gcinit yet and we're definitely going to allocate memory before then.
+ gcController.memoryLimit.Store(maxInt64)
}
// sysAlloc allocates heap arena space for at least n bytes. The
// heapArenaBytes. sysAlloc returns nil on failure.
// There is no corresponding free function.
//
+// hintList is a list of hint addresses for where to allocate new
+// heap arenas. It must be non-nil.
+//
+// register indicates whether the heap arena should be registered
+// in allArenas.
+//
// sysAlloc returns a memory region in the Reserved state. This region must
// be transitioned to Prepared and then Ready before use.
//
// h must be locked.
-func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+func (h *mheap) sysAlloc(n uintptr, hintList **arenaHint, register bool) (v unsafe.Pointer, size uintptr) {
assertLockHeld(&h.lock)
n = alignUp(n, heapArenaBytes)
- // First, try the arena pre-reservation.
- v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
- if v != nil {
- size = n
- goto mapped
+ if hintList == &h.arenaHints {
+ // First, try the arena pre-reservation.
+ // Newly-used mappings are considered released.
+ //
+ // Only do this if we're using the regular heap arena hints.
+ // This behavior is only for the heap.
+ v = h.arena.alloc(n, heapArenaBytes, &gcController.heapReleased)
+ if v != nil {
+ size = n
+ goto mapped
+ }
}
// Try to grow the heap at a hint address.
- for h.arenaHints != nil {
- hint := h.arenaHints
+ for *hintList != nil {
+ hint := *hintList
p := hint.addr
if hint.down {
p -= n
// particular, this is already how Windows behaves, so
// it would simplify things there.
if v != nil {
- sysFree(v, n, nil)
+ sysFreeOS(v, n)
}
- h.arenaHints = hint.next
+ *hintList = hint.next
h.arenaHintAlloc.free(unsafe.Pointer(hint))
}
l2 := h.arenas[ri.l1()]
if l2 == nil {
// Allocate an L2 arena map.
- l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
+ //
+ // Use sysAllocOS instead of sysAlloc or persistentalloc because there's no
+ // statistic we can comfortably account for this space in. With this structure,
+ // we rely on demand paging to avoid large overheads, but tracking which memory
+ // is paged in is too expensive. Trying to account for the whole region means
+ // that it will appear like an enormous memory overhead in statistics, even though
+ // it is not.
+ l2 = (*[1 << arenaL2Bits]*heapArena)(sysAllocOS(unsafe.Sizeof(*l2)))
if l2 == nil {
throw("out of memory allocating heap arena map")
}
+ if h.arenasHugePages {
+ sysHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ } else {
+ sysNoHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ }
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
}
throw("arena already initialized")
}
var r *heapArena
- r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys))
+ r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), goarch.PtrSize, &memstats.gcMiscSys))
if r == nil {
- r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys))
+ r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), goarch.PtrSize, &memstats.gcMiscSys))
if r == nil {
throw("out of memory allocating heap arena metadata")
}
}
- // Add the arena to the arenas list.
- if len(h.allArenas) == cap(h.allArenas) {
- size := 2 * uintptr(cap(h.allArenas)) * sys.PtrSize
- if size == 0 {
- size = physPageSize
- }
- newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gcMiscSys))
- if newArray == nil {
- throw("out of memory allocating allArenas")
+ // Register the arena in allArenas if requested.
+ if register {
+ if len(h.allArenas) == cap(h.allArenas) {
+ size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
+ if size == 0 {
+ size = physPageSize
+ }
+ newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
+ if newArray == nil {
+ throw("out of memory allocating allArenas")
+ }
+ oldSlice := h.allArenas
+ *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
+ copy(h.allArenas, oldSlice)
+ // Do not free the old backing array because
+ // there may be concurrent readers. Since we
+ // double the array each time, this can lead
+ // to at most 2x waste.
}
- oldSlice := h.allArenas
- *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / sys.PtrSize)}
- copy(h.allArenas, oldSlice)
- // Do not free the old backing array because
- // there may be concurrent readers. Since we
- // double the array each time, this can lead
- // to at most 2x waste.
+ h.allArenas = h.allArenas[:len(h.allArenas)+1]
+ h.allArenas[len(h.allArenas)-1] = ri
}
- h.allArenas = h.allArenas[:len(h.allArenas)+1]
- h.allArenas[len(h.allArenas)-1] = ri
// Store atomically just in case an object from the
// new heap arena becomes visible before the heap lock
case p == 0:
return nil, 0
case p&(align-1) == 0:
- // We got lucky and got an aligned region, so we can
- // use the whole thing.
return unsafe.Pointer(p), size + align
case GOOS == "windows":
// On Windows we can't release pieces of a
// reservation, so we release the whole thing and
// re-reserve the aligned sub-region. This may race,
// so we may have to try again.
- sysFree(unsafe.Pointer(p), size+align, nil)
+ sysFreeOS(unsafe.Pointer(p), size+align)
p = alignUp(p, align)
p2 := sysReserve(unsafe.Pointer(p), size)
if p != uintptr(p2) {
// Must have raced. Try again.
- sysFree(p2, size, nil)
+ sysFreeOS(p2, size)
if retries++; retries == 100 {
throw("failed to allocate aligned heap memory; too many retries")
}
default:
// Trim off the unaligned parts.
pAligned := alignUp(p, align)
- sysFree(unsafe.Pointer(p), pAligned-p, nil)
+ sysFreeOS(unsafe.Pointer(p), pAligned-p)
end := pAligned + size
endLen := (p + size + align) - end
if endLen > 0 {
- sysFree(unsafe.Pointer(end), endLen, nil)
+ sysFreeOS(unsafe.Pointer(end), endLen)
}
return unsafe.Pointer(pAligned), size
}
}
+// enableMetadataHugePages enables huge pages for various sources of heap metadata.
+//
+// A note on latency: for sufficiently small heaps (<10s of GiB) this function will take constant
+// time, but may take time proportional to the size of the mapped heap beyond that.
+//
+// This function is idempotent.
+//
+// The heap lock must not be held over this operation, since it will briefly acquire
+// the heap lock.
+func (h *mheap) enableMetadataHugePages() {
+ // Enable huge pages for page structure.
+ h.pages.enableChunkHugePages()
+
+ // Grab the lock and set arenasHugePages if it's not.
+ //
+ // Once arenasHugePages is set, all new L2 entries will be eligible for
+ // huge pages. We'll set all the old entries after we release the lock.
+ lock(&h.lock)
+ if h.arenasHugePages {
+ unlock(&h.lock)
+ return
+ }
+ h.arenasHugePages = true
+ unlock(&h.lock)
+
+ // N.B. The arenas L1 map is quite small on all platforms, so it's fine to
+ // just iterate over the whole thing.
+ for i := range h.arenas {
+ l2 := (*[1 << arenaL2Bits]*heapArena)(atomic.Loadp(unsafe.Pointer(&h.arenas[i])))
+ if l2 == nil {
+ continue
+ }
+ sysHugePage(unsafe.Pointer(l2), unsafe.Sizeof(*l2))
+ }
+}
+
// base address for all 0-byte allocations
var zerobase uintptr
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
- theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
+ theBit := sys.TrailingZeros64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
- result := s.freeindex + uintptr(theBit)
+ result := s.freeindex + uint16(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
s.allocCache >>= uint(theBit + 1)
s.freeindex = freeidx
s.allocCount++
- return gclinkptr(result*s.elemsize + s.base())
+ return gclinkptr(uintptr(result)*s.elemsize + s.base())
}
}
return 0
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
- if uintptr(s.allocCount) != s.nelems {
+ if s.allocCount != s.nelems {
println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount != s.nelems && freeIndex == s.nelems")
}
throw("freeIndex is not valid")
}
- v = gclinkptr(freeIndex*s.elemsize + s.base())
+ v = gclinkptr(uintptr(freeIndex)*s.elemsize + s.base())
s.allocCount++
- if uintptr(s.allocCount) > s.nelems {
+ if s.allocCount > s.nelems {
println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount > s.nelems")
}
return unsafe.Pointer(&zerobase)
}
+ // It's possible for any malloc to trigger sweeping, which may in
+ // turn queue finalizers. Record this dynamic lock edge.
+ lockRankMayQueueFinalizer()
+
+ userSize := size
+ if asanenabled {
+ // Refer to ASAN runtime library, the malloc() function allocates extra memory,
+ // the redzone, around the user requested memory region. And the redzones are marked
+ // as unaddressable. We perform the same operations in Go to detect the overflows or
+ // underflows.
+ size += computeRZlog(size)
+ }
+
if debug.malloc {
if debug.sbrk != 0 {
align := uintptr(16)
// assistG is the G to charge for this allocation, or nil if
// GC is not currently active.
- var assistG *g
- if gcBlackenEnabled != 0 {
- // Charge the current user G for this allocation.
- assistG = getg()
- if assistG.m.curg != nil {
- assistG = assistG.m.curg
- }
- // Charge the allocation against the G. We'll account
- // for internal fragmentation at the end of mallocgc.
- assistG.gcAssistBytes -= int64(size)
-
- if assistG.gcAssistBytes < 0 {
- // This G is in debt. Assist the GC to correct
- // this before allocating. This must happen
- // before disabling preemption.
- gcAssistAlloc(assistG)
- }
- }
+ assistG := deductAssistCredit(size)
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
mp.mallocing = 1
shouldhelpgc := false
- dataSize := size
- c := getMCache()
+ dataSize := userSize
+ c := getMCache(mp)
if c == nil {
throw("mallocgc called without a P or outside bootstrapping")
}
var span *mspan
+ var header **_type
var x unsafe.Pointer
- noscan := typ == nil || typ.ptrdata == 0
+ noscan := typ == nil || typ.PtrBytes == 0
// In some cases block zeroing can profitably (for latency reduction purposes)
- // be delayed till preemption is possible; isZeroed tracks that state.
- isZeroed := true
- if size <= maxSmallSize {
+ // be delayed till preemption is possible; delayedZeroing tracks that state.
+ delayedZeroing := false
+ // Determine if it's a 'small' object that goes into a size-classed span.
+ //
+ // Note: This comparison looks a little strange, but it exists to smooth out
+ // the crossover between the largest size class and large objects that have
+ // their own spans. The small window of object sizes between maxSmallSize-mallocHeaderSize
+ // and maxSmallSize will be considered large, even though they might fit in
+ // a size class. In practice this is completely fine, since the largest small
+ // size class has a single object in it already, precisely to make the transition
+ // to large objects smooth.
+ if size <= maxSmallSize-mallocHeaderSize {
if noscan && size < maxTinySize {
// Tiny allocator.
//
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
off = alignUp(off, 8)
- } else if sys.PtrSize == 4 && size == 12 {
+ } else if goarch.PtrSize == 4 && size == 12 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
}
size = maxTinySize
} else {
+ hasHeader := !noscan && !heapBitsInSpan(size)
+ if goexperiment.AllocHeaders && hasHeader {
+ size += mallocHeaderSize
+ }
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
- memclrNoHeapPointers(unsafe.Pointer(v), size)
+ memclrNoHeapPointers(x, size)
+ }
+ if goexperiment.AllocHeaders && hasHeader {
+ header = (**_type)(x)
+ x = add(x, mallocHeaderSize)
+ size -= mallocHeaderSize
}
}
} else {
shouldhelpgc = true
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
- span, isZeroed = c.allocLarge(size, needzero && !noscan, noscan)
+ span = c.allocLarge(size, noscan)
span.freeindex = 1
span.allocCount = 1
- x = unsafe.Pointer(span.base())
size = span.elemsize
+ x = unsafe.Pointer(span.base())
+ if needzero && span.needzero != 0 {
+ if noscan {
+ delayedZeroing = true
+ } else {
+ memclrNoHeapPointers(x, size)
+ }
+ }
+ if goexperiment.AllocHeaders && !noscan {
+ header = &span.largeType
+ }
}
-
- var scanSize uintptr
if !noscan {
- heapBitsSetType(uintptr(x), size, dataSize, typ)
- if dataSize > typ.size {
- // Array allocation. If there are any
- // pointers, GC has to scan to the last
- // element.
- if typ.ptrdata != 0 {
- scanSize = dataSize - typ.size + typ.ptrdata
- }
+ if goexperiment.AllocHeaders {
+ c.scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
} else {
- scanSize = typ.ptrdata
+ var scanSize uintptr
+ heapBitsSetType(uintptr(x), size, dataSize, typ)
+ if dataSize > typ.Size_ {
+ // Array allocation. If there are any
+ // pointers, GC has to scan to the last
+ // element.
+ if typ.PtrBytes != 0 {
+ scanSize = dataSize - typ.Size_ + typ.PtrBytes
+ }
+ } else {
+ scanSize = typ.PtrBytes
+ }
+ c.scanAlloc += scanSize
}
- c.scanAlloc += scanSize
}
// Ensure that the stores above that initialize x to
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
+ // As x and the heap bits are initialized, update
+ // freeIndexForScan now so x is seen by the GC
+ // (including conservative scan) as an allocated object.
+ // While this pointer can't escape into user code as a
+ // _live_ pointer until we return, conservative scanning
+ // may find a dead pointer that happens to point into this
+ // object. Delaying this update until now ensures that
+ // conservative scanning considers this pointer dead until
+ // this point.
+ span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if gcphase != _GCoff {
- gcmarknewobject(span, uintptr(x), size, scanSize)
+ // Pass the full size of the allocation to the number of bytes
+ // marked.
+ //
+ // If !goexperiment.AllocHeaders, "size" doesn't include the
+ // allocation header, so use span.elemsize unconditionally.
+ gcmarknewobject(span, uintptr(x), span.elemsize)
}
if raceenabled {
msanmalloc(x, size)
}
+ if asanenabled {
+ // We should only read/write the memory with the size asked by the user.
+ // The rest of the allocated memory should be poisoned, so that we can report
+ // errors when accessing poisoned memory.
+ // The allocated memory is larger than required userSize, it will also include
+ // redzone and some other padding bytes.
+ rzBeg := unsafe.Add(x, userSize)
+ asanpoison(rzBeg, size-userSize)
+ asanunpoison(x, userSize)
+ }
+
+ if rate := MemProfileRate; rate > 0 {
+ // Note cache c only valid while m acquired; see #47302
+ if rate != 1 && size < c.nextSample {
+ c.nextSample -= size
+ } else {
+ profilealloc(mp, x, size)
+ }
+ }
mp.mallocing = 0
releasem(mp)
// Pointerfree data can be zeroed late in a context where preemption can occur.
// x will keep the memory alive.
- if !isZeroed && needzero {
- memclrNoHeapPointersChunked(size, x)
+ if delayedZeroing {
+ if !noscan {
+ throw("delayed zeroing on data that may contain pointers")
+ }
+ if goexperiment.AllocHeaders && header != nil {
+ throw("unexpected malloc header in delayed zeroing of large object")
+ }
+ memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
}
if debug.malloc {
}
}
- if rate := MemProfileRate; rate > 0 {
- if rate != 1 && size < c.nextSample {
- c.nextSample -= size
- } else {
- mp := acquirem()
- profilealloc(mp, x, size)
- releasem(mp)
- }
- }
-
if assistG != nil {
// Account for internal fragmentation in the assist
// debt now that we know it.
return x
}
+// deductAssistCredit reduces the current G's assist credit
+// by size bytes, and assists the GC if necessary.
+//
+// Caller must be preemptible.
+//
+// Returns the G for which the assist credit was accounted.
+func deductAssistCredit(size uintptr) *g {
+ var assistG *g
+ if gcBlackenEnabled != 0 {
+ // Charge the current user G for this allocation.
+ assistG = getg()
+ if assistG.m.curg != nil {
+ assistG = assistG.m.curg
+ }
+ // Charge the allocation against the G. We'll account
+ // for internal fragmentation at the end of mallocgc.
+ assistG.gcAssistBytes -= int64(size)
+
+ if assistG.gcAssistBytes < 0 {
+ // This G is in debt. Assist the GC to correct
+ // this before allocating. This must happen
+ // before disabling preemption.
+ gcAssistAlloc(assistG)
+ }
+ }
+ return assistG
+}
+
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also
// implementation of new builtin
// compiler (both frontend and SSA backend) knows the signature
-// of this function
+// of this function.
func newobject(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
//go:linkname reflect_unsafe_New reflect.unsafe_New
func reflect_unsafe_New(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
//go:linkname reflectlite_unsafe_New internal/reflectlite.unsafe_New
func reflectlite_unsafe_New(typ *_type) unsafe.Pointer {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
// newarray allocates an array of n elements of type typ.
func newarray(typ *_type, n int) unsafe.Pointer {
if n == 1 {
- return mallocgc(typ.size, typ, true)
+ return mallocgc(typ.Size_, typ, true)
}
- mem, overflow := math.MulUintptr(typ.size, uintptr(n))
+ mem, overflow := math.MulUintptr(typ.Size_, uintptr(n))
if overflow || mem > maxAlloc || n < 0 {
panic(plainError("runtime: allocation size out of range"))
}
}
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
- c := getMCache()
+ c := getMCache(mp)
if c == nil {
throw("profilealloc called without a P or outside bootstrapping")
}
}
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
- if g := getg(); g == g.m.gsignal {
+ if gp := getg(); gp == gp.m.gsignal {
return nextSampleNoFP()
}
}
// x = -log_e(q) * mean
// x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency
const randomBitCount = 26
- q := fastrand()%(1<<randomBitCount) + 1
+ q := fastrandn(1<<randomBitCount) + 1
qlog := fastlog2(float64(q)) - randomBitCount
if qlog > 0 {
qlog = 0
rate = 0x3fffffff
}
if rate != 0 {
- return uintptr(fastrand() % uint32(2*rate))
+ return uintptr(fastrandn(uint32(2 * rate)))
}
return 0
}
// Intended for things like function/type/debug-related persistent data.
// If align is 0, uses default align (currently 8).
// The returned memory will be zeroed.
+// sysStat must be non-nil.
//
-// Consider marking persistentalloc'd types go:notinheap.
+// Consider marking persistentalloc'd types not in heap by embedding
+// runtime/internal/sys.NotInHeap.
func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
var p *notInHeap
systemstack(func() {
// Must run on system stack because stack growth can (re)invoke it.
// See issue 9174.
+//
//go:systemstack
func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap {
const (
break
}
}
- persistent.off = alignUp(sys.PtrSize, align)
+ persistent.off = alignUp(goarch.PtrSize, align)
}
p := persistent.base.add(persistent.off)
persistent.off += size
// inPersistentAlloc reports whether p points to memory allocated by
// persistentalloc. This must be nosplit because it is called by the
// cgo checker code, which is called by the write barrier code.
+//
//go:nosplit
func inPersistentAlloc(p uintptr) bool {
chunk := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&persistentChunks)))
if pEnd := alignUp(l.next-1, physPageSize); pEnd > l.mapped {
if l.mapMemory {
// Transition from Reserved to Prepared to Ready.
- sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat)
- sysUsed(unsafe.Pointer(l.mapped), pEnd-l.mapped)
+ n := pEnd - l.mapped
+ sysMap(unsafe.Pointer(l.mapped), n, sysStat)
+ sysUsed(unsafe.Pointer(l.mapped), n, n)
}
l.mapped = pEnd
}
// notInHeap is off-heap memory allocated by a lower-level allocator
// like sysAlloc or persistentAlloc.
//
-// In general, it's better to use real types marked as go:notinheap,
-// but this serves as a generic type for situations where that isn't
-// possible (like in the allocators).
+// In general, it's better to use real types which embed
+// runtime/internal/sys.NotInHeap, but this serves as a generic type
+// for situations where that isn't possible (like in the allocators).
//
// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
-//
-//go:notinheap
-type notInHeap struct{}
+type notInHeap struct{ _ sys.NotInHeap }
func (p *notInHeap) add(bytes uintptr) *notInHeap {
return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
}
+
+// computeRZlog computes the size of the redzone.
+// Refer to the implementation of the compiler-rt.
+func computeRZlog(userSize uintptr) uintptr {
+ switch {
+ case userSize <= (64 - 16):
+ return 16 << 0
+ case userSize <= (128 - 32):
+ return 16 << 1
+ case userSize <= (512 - 64):
+ return 16 << 2
+ case userSize <= (4096 - 128):
+ return 16 << 3
+ case userSize <= (1<<14)-256:
+ return 16 << 4
+ case userSize <= (1<<15)-512:
+ return 16 << 5
+ case userSize <= (1<<16)-1024:
+ return 16 << 6
+ default:
+ return 16 << 7
+ }
+}