return
}
if after.HeapReleased-before.HeapReleased < bigBytes-slack {
- t.Fatalf("less than %d released: %d -> %d", bigBytes, before.HeapReleased, after.HeapReleased)
+ t.Fatalf("less than %d released: %d -> %d", bigBytes-slack, before.HeapReleased, after.HeapReleased)
}
}
// None of the tests need any higher-level locking, so we just
// take the lock internally.
lock(pp.mheapLock)
- pp.free(base, npages, true)
+ pp.free(base, npages)
unlock(pp.mheapLock)
})
}
func (p *PageAlloc) Scavenge(nbytes uintptr) (r uintptr) {
pp := (*pageAlloc)(p)
systemstack(func() {
- r = pp.scavenge(nbytes, nil)
+ r = pp.scavenge(nbytes, nil, true)
})
return
}
p := new(pageAlloc)
// We've got an entry, so initialize the pageAlloc.
- p.init(new(mutex), testSysStat)
+ p.init(new(mutex), testSysStat, true)
lockInit(p.mheapLock, lockRankMheap)
- p.test = true
for i, init := range chunks {
addr := chunkBase(chunkIdx(i))
})
// Initialize the bitmap and update pageAlloc metadata.
- chunk := p.chunkOf(chunkIndex(addr))
+ ci := chunkIndex(addr)
+ chunk := p.chunkOf(ci)
// Clear all the scavenged bits which grow set.
chunk.scavenged.clearRange(0, pallocChunkPages)
+ // Simulate the allocation and subsequent free of all pages in
+ // the chunk for the scavenge index. This sets the state equivalent
+ // with all pages within the index being free.
+ p.scav.index.alloc(ci, pallocChunkPages)
+ p.scav.index.free(ci, 0, pallocChunkPages)
+
// Apply scavenge state if applicable.
if scav != nil {
if scvg, ok := scav[i]; ok {
// it and it's a no-op anyway.
if s.N != 0 {
chunk.allocRange(s.I, s.N)
- }
- }
- // Make sure the scavenge index is updated.
- //
- // This is an inefficient way to do it, but it's also the simplest way.
- minPages := physPageSize / pageSize
- if minPages < 1 {
- minPages = 1
- }
- _, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages)
- if npages != 0 {
- p.scav.index.mark(addr, addr+pallocChunkBytes)
+ // Make sure the scavenge index is updated.
+ p.scav.index.alloc(ci, s.N)
+ }
}
// Update heap metadata for the allocRange calls above.
for l := 0; l < summaryLevels; l++ {
sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes)
}
- // Only necessary on 64-bit. This is a global on 32-bit.
- sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks)))
} else {
resSize := uintptr(0)
for _, s := range p.summary {
sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize))
}
+ // Free extra data structures.
+ sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks))*unsafe.Sizeof(atomicScavChunkData{}))
+
// Subtract back out whatever we mapped for the summaries.
// sysUsed adds to p.sysStat and memstats.mappedReady no matter what
// (and in anger should actually be accounted for), and there's no other
func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex {
s := new(ScavengeIndex)
- s.i.chunks = make([]atomic.Uint8, uintptr(1<<heapAddrBits/pallocChunkBytes/8))
- s.i.min.Store(int32(min / 8))
- s.i.max.Store(int32(max / 8))
+ // This is a bit lazy but we easily guarantee we'll be able
+ // to reference all the relevant chunks. The worst-case
+ // memory usage here is 512 MiB, but tests generally use
+ // small offsets from BaseChunkIdx, which results in ~100s
+ // of KiB in memory use.
+ //
+ // This may still be worth making better, at least by sharing
+ // this fairly large array across calls with a sync.Pool or
+ // something. Currently, when the tests are run serially,
+ // it takes around 0.5s. Not all that much, but if we have
+ // a lot of tests like this it could add up.
+ s.i.chunks = make([]atomicScavChunkData, max)
+ s.i.min.Store(uintptr(min))
+ s.i.max.Store(uintptr(max))
+ s.i.test = true
return s
}
-func (s *ScavengeIndex) Find() (ChunkIdx, uint) {
- ci, off := s.i.find()
+func (s *ScavengeIndex) Find(force bool) (ChunkIdx, uint) {
+ ci, off := s.i.find(force)
return ChunkIdx(ci), off
}
-func (s *ScavengeIndex) Mark(base, limit uintptr) {
- s.i.mark(base, limit)
+func (s *ScavengeIndex) AllocRange(base, limit uintptr) {
+ sc, ec := chunkIndex(base), chunkIndex(limit-1)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit-1)
+
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ s.i.alloc(sc, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ s.i.alloc(sc, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ s.i.alloc(c, pallocChunkPages)
+ }
+ s.i.alloc(ec, ei+1)
+ }
+}
+
+func (s *ScavengeIndex) FreeRange(base, limit uintptr) {
+ sc, ec := chunkIndex(base), chunkIndex(limit-1)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit-1)
+
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ s.i.free(sc, si, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ s.i.free(sc, si, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ s.i.free(c, 0, pallocChunkPages)
+ }
+ s.i.free(ec, 0, ei+1)
+ }
+}
+
+func (s *ScavengeIndex) ResetSearchAddrs() {
+ for _, a := range []*atomicOffAddr{&s.i.searchAddrBg, &s.i.searchAddrForce} {
+ addr, marked := a.Load()
+ if marked {
+ a.StoreUnmark(addr, addr)
+ }
+ a.Clear()
+ }
+ s.i.freeHWM = minOffAddr
+}
+
+func (s *ScavengeIndex) NextGen() {
+ s.i.nextGen()
+}
+
+func (s *ScavengeIndex) SetEmpty(ci ChunkIdx) {
+ s.i.setEmpty(chunkIdx(ci))
}
-func (s *ScavengeIndex) Clear(ci ChunkIdx) {
- s.i.clear(chunkIdx(ci))
+func (s *ScavengeIndex) SetNoHugePage(ci ChunkIdx) bool {
+ return s.i.setNoHugePage(chunkIdx(ci))
+}
+
+func CheckPackScavChunkData(gen uint32, inUse, lastInUse uint16, flags uint8) bool {
+ sc0 := scavChunkData{
+ gen: gen,
+ inUse: inUse,
+ lastInUse: lastInUse,
+ scavChunkFlags: scavChunkFlags(flags),
+ }
+ scp := sc0.pack()
+ sc1 := unpackScavChunkData(scp)
+ return sc0 == sc1
}
const GTrackingPeriod = gTrackingPeriod
var adviseUnused = uint32(_MADV_FREE)
func sysUnusedOS(v unsafe.Pointer, n uintptr) {
- // By default, Linux's "transparent huge page" support will
- // merge pages into a huge page if there's even a single
- // present regular page, undoing the effects of madvise(adviseUnused)
- // below. On amd64, that means khugepaged can turn a single
- // 4KB page to 2MB, bloating the process's RSS by as much as
- // 512X. (See issue #8832 and Linux kernel bug
- // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
- //
- // To work around this, we explicitly disable transparent huge
- // pages when we release pages of the heap. However, we have
- // to do this carefully because changing this flag tends to
- // split the VMA (memory mapping) containing v in to three
- // VMAs in order to track the different values of the
- // MADV_NOHUGEPAGE flag in the different regions. There's a
- // default limit of 65530 VMAs per address space (sysctl
- // vm.max_map_count), so we must be careful not to create too
- // many VMAs (see issue #12233).
- //
- // Since huge pages are huge, there's little use in adjusting
- // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
- // exploding the number of VMAs by only adjusting the
- // MADV_NOHUGEPAGE flag on a large granularity. This still
- // gets most of the benefit of huge pages while keeping the
- // number of VMAs under control. With hugePageSize = 2MB, even
- // a pessimal heap can reach 128GB before running out of VMAs.
- if physHugePageSize != 0 {
- // If it's a large allocation, we want to leave huge
- // pages enabled. Hence, we only adjust the huge page
- // flag on the huge pages containing v and v+n-1, and
- // only if those aren't aligned.
- var head, tail uintptr
- if uintptr(v)&(physHugePageSize-1) != 0 {
- // Compute huge page containing v.
- head = alignDown(uintptr(v), physHugePageSize)
- }
- if (uintptr(v)+n)&(physHugePageSize-1) != 0 {
- // Compute huge page containing v+n-1.
- tail = alignDown(uintptr(v)+n-1, physHugePageSize)
- }
-
- // Note that madvise will return EINVAL if the flag is
- // already set, which is quite likely. We ignore
- // errors.
- if head != 0 && head+physHugePageSize == tail {
- // head and tail are different but adjacent,
- // so do this in one call.
- madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE)
- } else {
- // Advise the huge pages containing v and v+n-1.
- if head != 0 {
- madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE)
- }
- if tail != 0 && tail != head {
- madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE)
- }
- }
- }
-
if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
// madvise will round this to any physical page
// *covered* by this range, so an unaligned madvise
throw("runtime: cannot remap pages in address space")
}
return
-
- // Don't do the sysHugePage optimization in hard decommit mode.
- // We're breaking up pages everywhere, there's no point.
}
- // Partially undo the NOHUGEPAGE marks from sysUnused
- // for whole huge pages between v and v+n. This may
- // leave huge pages off at the end points v and v+n
- // even though allocations may cover these entire huge
- // pages. We could detect this and undo NOHUGEPAGE on
- // the end points as well, but it's probably not worth
- // the cost because when neighboring allocations are
- // freed sysUnused will just set NOHUGEPAGE again.
- sysHugePageOS(v, n)
}
func sysHugePageOS(v unsafe.Pointer, n uintptr) {
injectglist(&work.sweepWaiters.list)
unlock(&work.sweepWaiters.lock)
+ // Increment the scavenge generation now.
+ //
+ // This moment represents peak heap in use because we're
+ // about to start sweeping.
+ mheap_.pages.scav.index.nextGen()
+
// Release the CPU limiter.
gcCPULimiter.finishGCTransition(now)
// fragmentation and reduce the RSS of Go applications.
//
// Scavenging in Go happens on two fronts: there's the background
-// (asynchronous) scavenger and the heap-growth (synchronous) scavenger.
+// (asynchronous) scavenger and the allocation-time (synchronous) scavenger.
//
// The former happens on a goroutine much like the background sweeper which is
// soft-capped at using scavengePercent of the mutator's time, based on
-// order-of-magnitude estimates of the costs of scavenging. The background
-// scavenger's primary goal is to bring the estimated heap RSS of the
+// order-of-magnitude estimates of the costs of scavenging. The latter happens
+// when allocating pages from the heap.
+//
+// The scavenger's primary goal is to bring the estimated heap RSS of the
// application down to a goal.
//
// Before we consider what this looks like, we need to split the world into two
//
// The goals are updated after each GC.
//
-// The synchronous heap-growth scavenging happens whenever the heap grows in
-// size, for some definition of heap-growth. The intuition behind this is that
-// the application had to grow the heap because existing fragments were
-// not sufficiently large to satisfy a page-level memory allocation, so we
-// scavenge those fragments eagerly to offset the growth in RSS that results.
+// Synchronous scavenging happens for one of two reasons: if an allocation would
+// exceed the memory limit or whenever the heap grows in size, for some
+// definition of heap-growth. The intuition behind this second reason is that the
+// application had to grow the heap because existing fragments were not sufficiently
+// large to satisfy a page-level memory allocation, so we scavenge those fragments
+// eagerly to offset the growth in RSS that results.
+//
+// Lastly, not all pages are available for scavenging at all times and in all cases.
+// The background scavenger and heap-growth scavenger only release memory in chunks
+// that have not been densely-allocated for at least 1 full GC cycle. The reason
+// behind this is likelihood of reuse: the Go heap is allocated in a first-fit order
+// and by the end of the GC mark phase, the heap tends to be densely packed. Releasing
+// memory in these densely packed chunks while they're being packed is counter-productive,
+// and worse, it breaks up huge pages on systems that support them. The scavenger (invoked
+// during memory allocation) further ensures that chunks it identifies as "dense" are
+// immediately eligible for being backed by huge pages. Note that for the most part these
+// density heuristics are best-effort heuristics. It's totally possible (but unlikely)
+// that a chunk that just became dense is scavenged in the case of a race between memory
+// allocation and scavenging.
+//
+// When synchronously scavenging for the memory limit or for debug.FreeOSMemory, these
+// "dense" packing heuristics are ignored (in other words, scavenging is "forced") because
+// in these scenarios returning memory to the OS is more important than keeping CPU
+// overheads low.
package runtime
// This ratio is used as part of multiplicative factor to help the scavenger account
// for the additional costs of using scavenged memory in its pacing.
scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
+
+ // scavChunkHiOcFrac indicates the fraction of pages that need to be allocated
+ // in the chunk in a single GC cycle for it to be considered high density.
+ scavChunkHiOccFrac = 0.96875
+ scavChunkHiOccPages = uint16(scavChunkHiOccFrac * pallocChunkPages)
)
// heapRetained returns an estimate of the current heap RSS.
if s.scavenge == nil {
s.scavenge = func(n uintptr) (uintptr, int64) {
start := nanotime()
- r := mheap_.pages.scavenge(n, nil)
+ r := mheap_.pages.scavenge(n, nil, false)
end := nanotime()
if start >= end {
return r, 0
// scavenge scavenges nbytes worth of free pages, starting with the
// highest address first. Successive calls continue from where it left
-// off until the heap is exhausted. Call scavengeStartGen to bring it
-// back to the top of the heap.
+// off until the heap is exhausted. force makes all memory available to
+// scavenge, ignoring huge page heuristics.
//
// Returns the amount of memory scavenged in bytes.
//
// scavenge always tries to scavenge nbytes worth of memory, and will
// only fail to do so if the heap is exhausted for now.
-func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool) uintptr {
+func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool, force bool) uintptr {
released := uintptr(0)
for released < nbytes {
- ci, pageIdx := p.scav.index.find()
+ ci, pageIdx := p.scav.index.find(force)
if ci == 0 {
break
}
// Mark the range we're about to scavenge as allocated, because
// we don't want any allocating goroutines to grab it while
- // the scavenging is in progress.
- if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
- throw("double scavenge")
- }
+ // the scavenging is in progress. Be careful here -- just do the
+ // bare minimum to avoid stepping on our own scavenging stats.
+ p.chunkOf(ci).allocRange(base, npages)
+ p.update(addr, uintptr(npages), true, true)
+
+ // Grab whether the chunk is hugepage backed and if it is,
+ // clear it. We're about to break up this huge page.
+ shouldNoHugePage := p.scav.index.setNoHugePage(ci)
// With that done, it's safe to unlock.
unlock(p.mheapLock)
if !p.test {
pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
- // Only perform the actual scavenging if we're not in a test.
+ // Only perform sys* operations if we're not in a test.
// It's dangerous to do so otherwise.
+ if shouldNoHugePage {
+ sysNoHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes)
+ }
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
// Update global accounting only when not in test, otherwise
// the runtime's accounting will be wrong.
- nbytes := int64(npages) * pageSize
+ nbytes := int64(npages * pageSize)
gcController.heapReleased.add(nbytes)
gcController.heapFree.add(-nbytes)
// Relock the heap, because now we need to make these pages
// available allocation. Free them back to the page allocator.
lock(p.mheapLock)
- p.free(addr, uintptr(npages), true)
+ if b := (offAddr{addr}); b.lessThan(p.searchAddr) {
+ p.searchAddr = b
+ }
+ p.chunkOf(ci).free(base, npages)
+ p.update(addr, uintptr(npages), true, false)
// Mark the range as scavenged.
p.chunkOf(ci).scavenged.setRange(base, npages)
}
}
// Mark this chunk as having no free pages.
- p.scav.index.clear(ci)
+ p.scav.index.setEmpty(ci)
unlock(p.mheapLock)
return 0
// scavengeIndex is a structure for efficiently managing which pageAlloc chunks have
// memory available to scavenge.
type scavengeIndex struct {
- // chunks is a bitmap representing the entire address space. Each bit represents
- // a single chunk, and a 1 value indicates the presence of pages available for
- // scavenging. Updates to the bitmap are serialized by the pageAlloc lock.
+ // chunks is a scavChunkData-per-chunk structure that indicates the presence of pages
+ // available for scavenging. Updates to the index are serialized by the pageAlloc lock.
//
- // The underlying storage of chunks is platform dependent and may not even be
- // totally mapped read/write. min and max reflect the extent that is safe to access.
- // min is inclusive, max is exclusive.
+ // It tracks chunk occupancy and a generation counter per chunk. If a chunk's occupancy
+ // never exceeds pallocChunkDensePages over the course of a single GC cycle, the chunk
+ // becomes eligible for scavenging on the next cycle. If a chunk ever hits this density
+ // threshold it immediately becomes unavailable for scavenging in the current cycle as
+ // well as the next.
//
- // searchAddr is the maximum address (in the offset address space, so we have a linear
+ // For a chunk size of 4 MiB this structure will only use 2 MiB for a 1 TiB contiguous heap.
+ chunks []atomicScavChunkData
+ min, max atomic.Uintptr
+
+ // searchAddr* is the maximum address (in the offset address space, so we have a linear
// view of the address space; see mranges.go:offAddr) containing memory available to
// scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups.
//
- // searchAddr is always inclusive and should be the base address of the highest runtime
+ // searchAddr* is always inclusive and should be the base address of the highest runtime
// page available for scavenging.
//
- // searchAddr is managed by both find and mark.
+ // searchAddrForce is managed by find and free.
+ // searchAddrBg is managed by find and nextGen.
//
- // Normally, find monotonically decreases searchAddr as it finds no more free pages to
+ // Normally, find monotonically decreases searchAddr* as it finds no more free pages to
// scavenge. However, mark, when marking a new chunk at an index greater than the current
// searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here
- // is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they
+ // is that concurrent calls to find will fail to monotonically decrease searchAddr*, and so they
// won't barge over new memory becoming available to scavenge. Furthermore, this ensures
// that some future caller of find *must* observe the new high index. That caller
// (or any other racing with it), then makes searchAddr positive before continuing, bringing
// A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr)
// is always guaranteed to be >= min and < max (converted to heap addresses).
//
- // TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster
- // iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either
- // copy them from math/bits or fix the fact that we can't import math/bits' code from
- // the runtime due to compiler instrumentation.
- searchAddr atomicOffAddr
- chunks []atomic.Uint8
- minHeapIdx atomic.Int32
- min, max atomic.Int32
+ // searchAddrBg is increased only on each new generation and is mainly used by the
+ // background scavenger and heap-growth scavenging. searchAddrForce is increased continuously
+ // as memory gets freed and is mainly used by eager memory reclaim such as debug.FreeOSMemory
+ // and scavenging to maintain the memory limit.
+ searchAddrBg atomicOffAddr
+ searchAddrForce atomicOffAddr
+
+ // freeHWM is the highest address (in offset address space) that was freed
+ // this generation.
+ freeHWM offAddr
+
+ // Generation counter. Updated by nextGen at the end of each mark phase.
+ gen uint32
+
+ // test indicates whether or not we're in a test.
+ test bool
}
// find returns the highest chunk index that may contain pages available to scavenge.
// It also returns an offset to start searching in the highest chunk.
-func (s *scavengeIndex) find() (chunkIdx, uint) {
- searchAddr, marked := s.searchAddr.Load()
+func (s *scavengeIndex) find(force bool) (chunkIdx, uint) {
+ cursor := &s.searchAddrBg
+ if force {
+ cursor = &s.searchAddrForce
+ }
+ searchAddr, marked := cursor.Load()
if searchAddr == minOffAddr.addr() {
// We got a cleared search addr.
return 0, 0
}
- // Starting from searchAddr's chunk, and moving down to minHeapIdx,
- // iterate until we find a chunk with pages to scavenge.
- min := s.minHeapIdx.Load()
- searchChunk := chunkIndex(uintptr(searchAddr))
- start := int32(searchChunk / 8)
+ // Starting from searchAddr's chunk, iterate until we find a chunk with pages to scavenge.
+ gen := s.gen
+ min := chunkIdx(s.min.Load())
+ start := chunkIndex(uintptr(searchAddr))
for i := start; i >= min; i-- {
- // Skip over irrelevant address space.
- chunks := s.chunks[i].Load()
- if chunks == 0 {
+ // Skip over chunks.
+ if !s.chunks[i].load().shouldScavenge(gen, force) {
continue
}
- // Note that we can't have 8 leading zeroes here because
- // we necessarily skipped that case. So, what's left is
- // an index. If there are no zeroes, we want the 7th
- // index, if 1 zero, the 6th, and so on.
- n := 7 - sys.LeadingZeros8(chunks)
- ci := chunkIdx(uint(i)*8 + uint(n))
- if searchChunk == ci {
- return ci, chunkPageIndex(uintptr(searchAddr))
+ // We're still scavenging this chunk.
+ if i == start {
+ return i, chunkPageIndex(uintptr(searchAddr))
}
// Try to reduce searchAddr to newSearchAddr.
- newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize
+ newSearchAddr := chunkBase(i) + pallocChunkBytes - pageSize
if marked {
// Attempt to be the first one to decrease the searchAddr
// after an increase. If we fail, that means there was another
// it doesn't matter. We may lose some performance having an
// incorrect search address, but it's far more important that
// we don't miss updates.
- s.searchAddr.StoreUnmark(searchAddr, newSearchAddr)
+ cursor.StoreUnmark(searchAddr, newSearchAddr)
} else {
// Decrease searchAddr.
- s.searchAddr.StoreMin(newSearchAddr)
+ cursor.StoreMin(newSearchAddr)
}
- return ci, pallocChunkPages - 1
+ return i, pallocChunkPages - 1
}
// Clear searchAddr, because we've exhausted the heap.
- s.searchAddr.Clear()
+ cursor.Clear()
return 0, 0
}
-// mark sets the inclusive range of chunks between indices start and end as
-// containing pages available to scavenge.
+// alloc updates metadata for chunk at index ci with the fact that
+// an allocation of npages occurred.
//
-// Must be serialized with other mark, markRange, and clear calls.
-func (s *scavengeIndex) mark(base, limit uintptr) {
- start, end := chunkIndex(base), chunkIndex(limit-pageSize)
- if start == end {
- // Within a chunk.
- mask := uint8(1 << (start % 8))
- s.chunks[start/8].Or(mask)
- } else if start/8 == end/8 {
- // Within the same byte in the index.
- mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8)
- s.chunks[start/8].Or(mask)
- } else {
- // Crosses multiple bytes in the index.
- startAligned := chunkIdx(alignUp(uintptr(start), 8))
- endAligned := chunkIdx(alignDown(uintptr(end), 8))
-
- // Do the end of the first byte first.
- if width := startAligned - start; width > 0 {
- mask := uint8(uint16(1<<width)-1) << (start % 8)
- s.chunks[start/8].Or(mask)
- }
- // Do the middle aligned sections that take up a whole
- // byte.
- for ci := startAligned; ci < endAligned; ci += 8 {
- s.chunks[ci/8].Store(^uint8(0))
- }
- // Do the end of the last byte.
- //
- // This width check doesn't match the one above
- // for start because aligning down into the endAligned
- // block means we always have at least one chunk in this
- // block (note that end is *inclusive*). This also means
- // that if end == endAligned+n, then what we really want
- // is to fill n+1 chunks, i.e. width n+1. By induction,
- // this is true for all n.
- if width := end - endAligned + 1; width > 0 {
- mask := uint8(uint16(1<<width) - 1)
- s.chunks[end/8].Or(mask)
+// alloc may only run concurrently with find.
+func (s *scavengeIndex) alloc(ci chunkIdx, npages uint) {
+ sc := s.chunks[ci].load()
+ sc.alloc(npages, s.gen)
+ if !sc.isHugePage() && sc.inUse > scavChunkHiOccPages {
+ // Mark dense chunks as specifically backed by huge pages.
+ sc.setHugePage()
+ if !s.test {
+ sysHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes)
}
}
- newSearchAddr := limit - pageSize
- searchAddr, _ := s.searchAddr.Load()
- // N.B. Because mark is serialized, it's not necessary to do a
- // full CAS here. mark only ever increases searchAddr, while
+ s.chunks[ci].store(sc)
+}
+
+// free updates metadata for chunk at index ci with the fact that
+// a free of npages occurred.
+//
+// free may only run concurrently with find.
+func (s *scavengeIndex) free(ci chunkIdx, page, npages uint) {
+ sc := s.chunks[ci].load()
+ sc.free(npages, s.gen)
+ s.chunks[ci].store(sc)
+
+ // Update scavenge search addresses.
+ addr := chunkBase(ci) + uintptr(page+npages-1)*pageSize
+ if s.freeHWM.lessThan(offAddr{addr}) {
+ s.freeHWM = offAddr{addr}
+ }
+ // N.B. Because free is serialized, it's not necessary to do a
+ // full CAS here. free only ever increases searchAddr, while
// find only ever decreases it. Since we only ever race with
// decreases, even if the value we loaded is stale, the actual
// value will never be larger.
- if (offAddr{searchAddr}).lessThan(offAddr{newSearchAddr}) {
- s.searchAddr.StoreMarked(newSearchAddr)
+ searchAddr, _ := s.searchAddrForce.Load()
+ if (offAddr{searchAddr}).lessThan(offAddr{addr}) {
+ s.searchAddrForce.StoreMarked(addr)
+ }
+}
+
+// nextGen moves the scavenger forward one generation. Must be called
+// once per GC cycle, but may be called more often to force more memory
+// to be released.
+//
+// nextGen may only run concurrently with find.
+func (s *scavengeIndex) nextGen() {
+ s.gen++
+ searchAddr, _ := s.searchAddrBg.Load()
+ if (offAddr{searchAddr}).lessThan(s.freeHWM) {
+ s.searchAddrBg.StoreMarked(s.freeHWM.addr())
}
+ s.freeHWM = minOffAddr
+}
+
+// setEmpty marks that the scavenger has finished looking at ci
+// for now to prevent the scavenger from getting stuck looking
+// at the same chunk.
+//
+// setEmpty may only run concurrently with find.
+func (s *scavengeIndex) setEmpty(ci chunkIdx) {
+ val := s.chunks[ci].load()
+ val.setEmpty()
+ s.chunks[ci].store(val)
}
-// clear sets the chunk at index ci as not containing pages available to scavenge.
+// setNoHugePage updates the backed-by-hugepages status of a particular chunk.
+// Returns true if the set was successful (not already backed by huge pages).
//
-// Must be serialized with other mark, markRange, and clear calls.
-func (s *scavengeIndex) clear(ci chunkIdx) {
- s.chunks[ci/8].And(^uint8(1 << (ci % 8)))
+// setNoHugePage may only run concurrently with find.
+func (s *scavengeIndex) setNoHugePage(ci chunkIdx) bool {
+ val := s.chunks[ci].load()
+ if !val.isHugePage() {
+ return false
+ }
+ val.setNoHugePage()
+ s.chunks[ci].store(val)
+ return true
+}
+
+// atomicScavChunkData is an atomic wrapper around a scavChunkData
+// that stores it in its packed form.
+type atomicScavChunkData struct {
+ value atomic.Uint64
+}
+
+// load loads and unpacks a scavChunkData.
+func (sc *atomicScavChunkData) load() scavChunkData {
+ return unpackScavChunkData(sc.value.Load())
+}
+
+// store packs and writes a new scavChunkData. store must be serialized
+// with other calls to store.
+func (sc *atomicScavChunkData) store(ssc scavChunkData) {
+ sc.value.Store(ssc.pack())
+}
+
+// scavChunkData tracks information about a palloc chunk for
+// scavenging. It packs well into 64 bits.
+//
+// The zero value always represents a valid newly-grown chunk.
+type scavChunkData struct {
+ // inUse indicates how many pages in this chunk are currently
+ // allocated.
+ //
+ // Only the first 10 bits are used.
+ inUse uint16
+
+ // lastInUse indicates how many pages in this chunk were allocated
+ // when we transitioned from gen-1 to gen.
+ //
+ // Only the first 10 bits are used.
+ lastInUse uint16
+
+ // gen is the generation counter from a scavengeIndex from the
+ // last time this scavChunkData was updated.
+ gen uint32
+
+ // scavChunkFlags represents additional flags
+ //
+ // Note: only 6 bits are available.
+ scavChunkFlags
+}
+
+// unpackScavChunkData unpacks a scavChunkData from a uint64.
+func unpackScavChunkData(sc uint64) scavChunkData {
+ return scavChunkData{
+ inUse: uint16(sc),
+ lastInUse: uint16(sc>>16) & scavChunkInUseMask,
+ gen: uint32(sc >> 32),
+ scavChunkFlags: scavChunkFlags(uint8(sc>>(16+logScavChunkInUseMax)) & scavChunkFlagsMask),
+ }
+}
+
+// pack returns sc packed into a uint64.
+func (sc scavChunkData) pack() uint64 {
+ return uint64(sc.inUse) |
+ (uint64(sc.lastInUse) << 16) |
+ (uint64(sc.scavChunkFlags) << (16 + logScavChunkInUseMax)) |
+ (uint64(sc.gen) << 32)
+}
+
+const (
+ // scavChunkHasFree indicates whether the chunk has anything left to
+ // scavenge. This is the opposite of "empty," used elsewhere in this
+ // file. The reason we say "HasFree" here is so the zero value is
+ // correct for a newly-grown chunk. (New memory is scavenged.)
+ scavChunkHasFree scavChunkFlags = 1 << iota
+ // scavChunkNoHugePage indicates whether this chunk has been marked
+ // sysNoHugePage. If not set, it means the chunk is marked sysHugePage.
+ // The negative here is unfortunate, but necessary to make it so that
+ // the zero value of scavChunkData accurately represents the state of
+ // a newly-grown chunk. (New memory is marked as backed by huge pages.)
+ scavChunkNoHugePage
+
+ // scavChunkMaxFlags is the maximum number of flags we can have, given how
+ // a scavChunkData is packed into 8 bytes.
+ scavChunkMaxFlags = 6
+ scavChunkFlagsMask = (1 << scavChunkMaxFlags) - 1
+
+ // logScavChunkInUseMax is the number of bits needed to represent the number
+ // of pages allocated in a single chunk. This is 1 more than log2 of the
+ // number of pages in the chunk because we need to represent a fully-allocated
+ // chunk.
+ logScavChunkInUseMax = logPallocChunkPages + 1
+ scavChunkInUseMask = (1 << logScavChunkInUseMax) - 1
+)
+
+// scavChunkFlags is a set of bit-flags for the scavenger for each palloc chunk.
+type scavChunkFlags uint8
+
+// isEmpty returns true if the hasFree flag is unset.
+func (sc *scavChunkFlags) isEmpty() bool {
+ return (*sc)&scavChunkHasFree == 0
+}
+
+// setEmpty clears the hasFree flag.
+func (sc *scavChunkFlags) setEmpty() {
+ *sc &^= scavChunkHasFree
+}
+
+// setNonEmpty sets the hasFree flag.
+func (sc *scavChunkFlags) setNonEmpty() {
+ *sc |= scavChunkHasFree
+}
+
+// isHugePage returns false if the noHugePage flag is set.
+func (sc *scavChunkFlags) isHugePage() bool {
+ return (*sc)&scavChunkNoHugePage == 0
+}
+
+// setHugePage clears the noHugePage flag.
+func (sc *scavChunkFlags) setHugePage() {
+ *sc &^= scavChunkNoHugePage
+}
+
+// setNoHugePage sets the noHugePage flag.
+func (sc *scavChunkFlags) setNoHugePage() {
+ *sc |= scavChunkNoHugePage
+}
+
+// shouldScavenge returns true if the corresponding chunk should be interrogated
+// by the scavenger.
+func (sc scavChunkData) shouldScavenge(currGen uint32, force bool) bool {
+ if sc.isEmpty() {
+ // Nothing to scavenge.
+ return false
+ }
+ if force {
+ // We're forcing the memory to be scavenged.
+ return true
+ }
+ if sc.gen == currGen {
+ // In the current generation, if either the current or last generation
+ // is dense, then skip scavenging. Inverting that, we should scavenge
+ // if both the current and last generation were not dense.
+ return sc.inUse < scavChunkHiOccPages && sc.lastInUse < scavChunkHiOccPages
+ }
+ // If we're one or more generations ahead, we know inUse represents the current
+ // state of the chunk, since otherwise it would've been updated already.
+ return sc.inUse < scavChunkHiOccPages
+}
+
+// alloc updates sc given that npages were allocated in the corresponding chunk.
+func (sc *scavChunkData) alloc(npages uint, newGen uint32) {
+ if uint(sc.inUse)+npages > pallocChunkPages {
+ print("runtime: inUse=", sc.inUse, " npages=", npages, "\n")
+ throw("too many pages allocated in chunk?")
+ }
+ if sc.gen != newGen {
+ sc.lastInUse = sc.inUse
+ sc.gen = newGen
+ }
+ sc.inUse += uint16(npages)
+ if sc.inUse == pallocChunkPages {
+ // There's nothing for the scavenger to take from here.
+ sc.setEmpty()
+ }
+}
+
+// free updates sc given that npages was freed in the corresponding chunk.
+func (sc *scavChunkData) free(npages uint, newGen uint32) {
+ if uint(sc.inUse) < npages {
+ print("runtime: inUse=", sc.inUse, " npages=", npages, "\n")
+ throw("allocated pages below zero?")
+ }
+ if sc.gen != newGen {
+ sc.lastInUse = sc.inUse
+ sc.gen = newGen
+ }
+ sc.inUse -= uint16(npages)
+ // The scavenger can no longer be done with this chunk now that
+ // new memory has been freed into it.
+ sc.setNonEmpty()
}
type piController struct {
}
func TestScavengeIndex(t *testing.T) {
- setup := func(t *testing.T) (func(ChunkIdx, uint), func(uintptr, uintptr)) {
+ // This test suite tests the scavengeIndex data structure.
+
+ // markFunc is a function that makes the address range [base, limit)
+ // available for scavenging in a test index.
+ type markFunc func(base, limit uintptr)
+
+ // findFunc is a function that searches for the next available page
+ // to scavenge in the index. It asserts that the page is found in
+ // chunk "ci" at page "offset."
+ type findFunc func(ci ChunkIdx, offset uint)
+
+ // The structure of the tests below is as follows:
+ //
+ // setup creates a fake scavengeIndex that can be mutated and queried by
+ // the functions it returns. Those functions capture the testing.T that
+ // setup is called with, so they're bound to the subtest they're created in.
+ //
+ // Tests are then organized into test cases which mark some pages as
+ // scavenge-able then try to find them. Tests expect that the initial
+ // state of the scavengeIndex has all of the chunks as dense in the last
+ // generation and empty to the scavenger.
+ //
+ // There are a few additional tests that interleave mark and find operations,
+ // so they're defined separately, but use the same infrastructure.
+ setup := func(t *testing.T, force bool) (mark markFunc, find findFunc, nextGen func()) {
t.Helper()
// Pick some reasonable bounds. We don't need a huge range just to test.
si := NewScavengeIndex(BaseChunkIdx, BaseChunkIdx+64)
- find := func(want ChunkIdx, wantOffset uint) {
+
+ // Initialize all the chunks as dense and empty.
+ //
+ // Also, reset search addresses so that we can get page offsets.
+ si.AllocRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0))
+ si.NextGen()
+ si.FreeRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0))
+ for ci := BaseChunkIdx; ci < BaseChunkIdx+64; ci++ {
+ si.SetEmpty(ci)
+ }
+ si.ResetSearchAddrs()
+
+ // Create and return test functions.
+ mark = func(base, limit uintptr) {
t.Helper()
- got, gotOffset := si.Find()
+ si.AllocRange(base, limit)
+ si.FreeRange(base, limit)
+ }
+ find = func(want ChunkIdx, wantOffset uint) {
+ t.Helper()
+
+ got, gotOffset := si.Find(force)
if want != got {
t.Errorf("find: wanted chunk index %d, got %d", want, got)
}
- if want != got {
+ if wantOffset != gotOffset {
t.Errorf("find: wanted page offset %d, got %d", wantOffset, gotOffset)
}
if t.Failed() {
t.FailNow()
}
- si.Clear(got)
+ si.SetEmpty(got)
}
- mark := func(base, limit uintptr) {
+ nextGen = func() {
t.Helper()
- si.Mark(base, limit)
+ si.NextGen()
}
- return find, mark
+ return
}
- t.Run("Uninitialized", func(t *testing.T) {
- find, _ := setup(t)
- find(0, 0)
- })
- t.Run("OnePage", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
- find(BaseChunkIdx, 3)
- find(0, 0)
- })
- t.Run("FirstPage", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
- find(BaseChunkIdx, 0)
- find(0, 0)
- })
- t.Run("SeveralPages", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
- find(BaseChunkIdx, 13)
- find(0, 0)
- })
- t.Run("WholeChunk", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
- find(BaseChunkIdx, PallocChunkPages-1)
- find(0, 0)
- })
- t.Run("LastPage", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
- find(BaseChunkIdx, PallocChunkPages-1)
- find(0, 0)
- })
- t.Run("TwoChunks", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
- find(BaseChunkIdx+1, 127)
- find(BaseChunkIdx, PallocChunkPages-1)
- find(0, 0)
- })
- t.Run("TwoChunksOffset", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
- find(BaseChunkIdx+8, 128)
- find(BaseChunkIdx+7, PallocChunkPages-1)
- find(0, 0)
- })
- t.Run("SevenChunksOffset", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
- find(BaseChunkIdx+13, 14)
- for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
- find(i, PallocChunkPages-1)
- }
- find(0, 0)
- })
- t.Run("ThirtyTwoChunks", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
- for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
- find(i, PallocChunkPages-1)
- }
- find(0, 0)
- })
- t.Run("ThirtyTwoChunksOffset", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
- for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
- find(i, PallocChunkPages-1)
- }
- find(0, 0)
- })
- t.Run("Mark", func(t *testing.T) {
- find, mark := setup(t)
+
+ // Each of these test cases calls mark and then find once.
+ type testCase struct {
+ name string
+ mark func(markFunc)
+ find func(findFunc)
+ }
+ for _, test := range []testCase{
+ {
+ name: "Uninitialized",
+ mark: func(_ markFunc) {},
+ find: func(_ findFunc) {},
+ },
+ {
+ name: "OnePage",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, 3)
+ },
+ },
+ {
+ name: "FirstPage",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, 0)
+ },
+ },
+ {
+ name: "SeveralPages",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, 13)
+ },
+ },
+ {
+ name: "WholeChunk",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, PallocChunkPages-1)
+ },
+ },
+ {
+ name: "LastPage",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, PallocChunkPages-1)
+ },
+ },
+ {
+ name: "TwoChunks",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx+1, 127)
+ find(BaseChunkIdx, PallocChunkPages-1)
+ },
+ },
+ {
+ name: "TwoChunksOffset",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx+8, 128)
+ find(BaseChunkIdx+7, PallocChunkPages-1)
+ },
+ },
+ {
+ name: "SevenChunksOffset",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx+13, 14)
+ for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ {
+ name: "ThirtyTwoChunks",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+ },
+ find: func(find findFunc) {
+ for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ {
+ name: "ThirtyTwoChunksOffset",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
+ },
+ find: func(find findFunc) {
+ for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ {
+ name: "Mark",
+ mark: func(mark markFunc) {
+ for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+ mark(PageBase(i, 0), PageBase(i+1, 0))
+ }
+ },
+ find: func(find findFunc) {
+ for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ {
+ name: "MarkIdempotentOneChunk",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+ },
+ find: func(find findFunc) {
+ find(BaseChunkIdx, PallocChunkPages-1)
+ },
+ },
+ {
+ name: "MarkIdempotentThirtyTwoChunks",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+ mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+ },
+ find: func(find findFunc) {
+ for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ {
+ name: "MarkIdempotentThirtyTwoChunksOffset",
+ mark: func(mark markFunc) {
+ mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
+ mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
+ },
+ find: func(find findFunc) {
+ for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
+ find(i, PallocChunkPages-1)
+ }
+ },
+ },
+ } {
+ test := test
+ t.Run("Bg/"+test.name, func(t *testing.T) {
+ mark, find, nextGen := setup(t, false)
+ test.mark(mark)
+ find(0, 0) // Make sure we find nothing at this point.
+ nextGen() // Move to the next generation.
+ test.find(find) // Now we should be able to find things.
+ find(0, 0) // The test should always fully exhaust the index.
+ })
+ t.Run("Force/"+test.name, func(t *testing.T) {
+ mark, find, _ := setup(t, true)
+ test.mark(mark)
+ test.find(find) // Finding should always work when forced.
+ find(0, 0) // The test should always fully exhaust the index.
+ })
+ }
+ t.Run("Bg/MarkInterleaved", func(t *testing.T) {
+ mark, find, nextGen := setup(t, false)
for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
mark(PageBase(i, 0), PageBase(i+1, 0))
- }
- for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+ nextGen()
find(i, PallocChunkPages-1)
}
find(0, 0)
})
- t.Run("MarkInterleaved", func(t *testing.T) {
- find, mark := setup(t)
+ t.Run("Force/MarkInterleaved", func(t *testing.T) {
+ mark, find, _ := setup(t, true)
for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
mark(PageBase(i, 0), PageBase(i+1, 0))
find(i, PallocChunkPages-1)
}
find(0, 0)
})
- t.Run("MarkIdempotentOneChunk", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
- find(BaseChunkIdx, PallocChunkPages-1)
- find(0, 0)
- })
- t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
- mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
- for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
- find(i, PallocChunkPages-1)
- }
- find(0, 0)
- })
- t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) {
- find, mark := setup(t)
- mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
- mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
- for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
- find(i, PallocChunkPages-1)
- }
- find(0, 0)
- })
+}
+
+func TestScavChunkDataPack(t *testing.T) {
+ if !CheckPackScavChunkData(1918237402, 512, 512, 0b11) {
+ t.Error("failed pack/unpack check for scavChunkData 1")
+ }
+ if !CheckPackScavChunkData(^uint32(0), 12, 0, 0b00) {
+ t.Error("failed pack/unpack check for scavChunkData 2")
+ }
}
func FuzzPIController(f *testing.F) {
c.fullUnswept(sg).reset()
}
- // Sweeping is done, so if the scavenger isn't already awake,
- // wake it up. There's definitely work for it to do at this
- // point.
+ // Sweeping is done, so there won't be any new memory to
+ // scavenge for a bit.
+ //
+ // If the scavenger isn't already awake, wake it up. There's
+ // definitely work for it to do at this point.
scavenger.wake()
nextMarkBitArenaEpoch()
h.central[i].mcentral.init(spanClass(i))
}
- h.pages.init(&h.lock, &memstats.gcMiscSys)
+ h.pages.init(&h.lock, &memstats.gcMiscSys, false)
}
// reclaim sweeps and reclaims at least npage pages into the heap.
// pages not to get touched until we return. Simultaneously, it's important
// to do this before calling sysUsed because that may commit address space.
bytesToScavenge := uintptr(0)
+ forceScavenge := false
if limit := gcController.memoryLimit.Load(); !gcCPULimiter.limiting() {
// Assist with scavenging to maintain the memory limit by the amount
// that we expect to page in.
// someone can set a really big memory limit that isn't maxInt64.
if uint64(scav)+inuse > uint64(limit) {
bytesToScavenge = uintptr(uint64(scav) + inuse - uint64(limit))
+ forceScavenge = true
}
}
if goal := scavenge.gcPercentGoal.Load(); goal != ^uint64(0) && growth > 0 {
// Scavenge, but back out if the limiter turns on.
h.pages.scavenge(bytesToScavenge, func() bool {
return gcCPULimiter.limiting()
- })
+ }, forceScavenge)
// Finish up accounting.
now = nanotime()
memstats.heapStats.release()
// Mark the space as free.
- h.pages.free(s.base(), s.npages, false)
+ h.pages.free(s.base(), s.npages)
// Free the span structure. We no longer have a use for it.
s.state.set(mSpanDead)
// scavengeAll acquires the heap lock (blocking any additional
// manipulation of the page allocator) and iterates over the whole
// heap, scavenging every free page available.
+//
+// Must run on the system stack because it acquires the heap lock.
+//
+//go:systemstack
func (h *mheap) scavengeAll() {
// Disallow malloc or panic while holding the heap lock. We do
// this here because this is a non-mallocgc entry-point to
gp := getg()
gp.m.mallocing++
- released := h.pages.scavenge(^uintptr(0), nil)
+ // Force scavenge everything.
+ released := h.pages.scavenge(^uintptr(0), nil, true)
gp.m.mallocing--
// known by the page allocator to be currently in-use (passed
// to grow).
//
- // This field is currently unused on 32-bit architectures but
- // is harmless to track. We care much more about having a
- // contiguous heap in these cases and take additional measures
- // to ensure that, so in nearly all cases this should have just
- // 1 element.
+ // We care much more about having a contiguous heap in these cases
+ // and take additional measures to ensure that, so in nearly all
+ // cases this should have just 1 element.
//
// All access is protected by the mheapLock.
inUse addrRanges
test bool
}
-func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat, test bool) {
if levelLogPages[0] > logMaxPackedValue {
// We can't represent 1<<levelLogPages[0] pages, the maximum number
// of pages we need to represent at the root level, in a summary, which
p.inUse.init(sysStat)
// System-dependent initialization.
- p.sysInit()
+ p.sysInit(test)
// Start with the searchAddr in a state indicating there's no free memory.
p.searchAddr = maxSearchAddr()
// Set the mheapLock.
p.mheapLock = mheapLock
+
+ // Set if we're in a test.
+ p.test = test
+ p.scav.index.test = test
}
// tryChunkOf returns the bitmap data for the given chunk.
// we need to ensure this newly-free memory is visible in the
// summaries.
p.update(base, size/pageSize, true, false)
+
+ // Mark all new memory as huge page eligible.
+ if !p.test {
+ sysHugePage(unsafe.Pointer(base), size)
+ }
}
// enableChunkHugePages enables huge pages for the chunk bitmap mappings (disabled by default).
chunk := p.chunkOf(sc)
scav += chunk.scavenged.popcntRange(si, ei+1-si)
chunk.allocRange(si, ei+1-si)
+ p.scav.index.alloc(sc, ei+1-si)
} else {
// The range crosses at least one chunk boundary.
chunk := p.chunkOf(sc)
scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si)
chunk.allocRange(si, pallocChunkPages-si)
+ p.scav.index.alloc(sc, pallocChunkPages-si)
for c := sc + 1; c < ec; c++ {
chunk := p.chunkOf(c)
scav += chunk.scavenged.popcntRange(0, pallocChunkPages)
chunk.allocAll()
+ p.scav.index.alloc(c, pallocChunkPages)
}
chunk = p.chunkOf(ec)
scav += chunk.scavenged.popcntRange(0, ei+1)
chunk.allocRange(0, ei+1)
+ p.scav.index.alloc(ec, ei+1)
}
p.update(base, npages, true, true)
return uintptr(scav) * pageSize
// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
+func (p *pageAlloc) free(base, npages uintptr) {
assertLockHeld(p.mheapLock)
// If we're freeing pages below the p.searchAddr, update searchAddr.
p.searchAddr = b
}
limit := base + npages*pageSize - 1
- if !scavenged {
- p.scav.index.mark(base, limit+1)
- }
if npages == 1 {
// Fast path: we're clearing a single bit, and we know exactly
// where it is, so mark it directly.
i := chunkIndex(base)
- p.chunkOf(i).free1(chunkPageIndex(base))
+ pi := chunkPageIndex(base)
+ p.chunkOf(i).free1(pi)
+ p.scav.index.free(i, pi, 1)
} else {
// Slow path: we're clearing more bits so we may need to iterate.
sc, ec := chunkIndex(base), chunkIndex(limit)
if sc == ec {
// The range doesn't cross any chunk boundaries.
p.chunkOf(sc).free(si, ei+1-si)
+ p.scav.index.free(sc, si, ei+1-si)
} else {
// The range crosses at least one chunk boundary.
p.chunkOf(sc).free(si, pallocChunkPages-si)
+ p.scav.index.free(sc, si, pallocChunkPages-si)
for c := sc + 1; c < ec; c++ {
p.chunkOf(c).freeAll()
+ p.scav.index.free(c, 0, pallocChunkPages)
}
p.chunkOf(ec).free(0, ei+1)
+ p.scav.index.free(ec, 0, ei+1)
}
}
p.update(base, npages, true, false)
package runtime
import (
- "runtime/internal/atomic"
"unsafe"
)
// scavengeIndexArray is the backing store for p.scav.index.chunks.
// On 32-bit platforms, it's small enough to just be a global.
-var scavengeIndexArray [((1 << heapAddrBits) / pallocChunkBytes) / 8]atomic.Uint8
+var scavengeIndexArray [(1 << heapAddrBits) / pallocChunkBytes]atomicScavChunkData
// See mpagealloc_64bit.go for details.
-func (p *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit(test bool) {
// Calculate how much memory all our entries will take up.
//
// This should be around 12 KiB or less.
reservation = add(reservation, uintptr(entries)*pallocSumBytes)
}
- // Set up the scavenge index.
- p.scav.index.chunks = scavengeIndexArray[:]
+ if test {
+ // Set up the scavenge index via sysAlloc so the test can free it later.
+ scavIndexSize := uintptr(len(scavengeIndexArray)) * unsafe.Sizeof(atomicScavChunkData{})
+ p.scav.index.chunks = ((*[(1 << heapAddrBits) / pallocChunkBytes]atomicScavChunkData)(sysAlloc(scavIndexSize, p.sysStat)))[:]
+ p.summaryMappedReady += scavIndexSize
+ } else {
+ // Set up the scavenge index.
+ p.scav.index.chunks = scavengeIndexArray[:]
+ }
+ p.scav.index.min.Store(1) // The 0th chunk is never going to be mapped for the heap.
+ p.scav.index.max.Store(uintptr(len(p.scav.index.chunks)))
}
// See mpagealloc_64bit.go for details.
package runtime
import (
- "runtime/internal/atomic"
"unsafe"
)
// sysInit performs architecture-dependent initialization of fields
// in pageAlloc. pageAlloc should be uninitialized except for sysStat
// if any runtime statistic should be updated.
-func (p *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit(test bool) {
// Reserve memory for each level. This will get mapped in
// as R/W by setArenas.
for l, shift := range levelShift {
}
// Set up the scavenge index.
- nbytes := uintptr(1<<heapAddrBits) / pallocChunkBytes / 8
- r := sysReserve(nil, nbytes)
- sl := notInHeapSlice{(*notInHeap)(r), int(nbytes), int(nbytes)}
- p.scav.index.chunks = *(*[]atomic.Uint8)(unsafe.Pointer(&sl))
+ p.scav.index.sysInit()
}
// sysGrow performs architecture-dependent operations on heap
// Prune need down to what needs to be newly mapped. Some parts of it may
// already be mapped by what inUse describes due to page alignment requirements
- // for mapping. prune's invariants are guaranteed by the fact that this
- // function will never be asked to remap the same memory twice.
+ // for mapping. Because this function will never be asked to remap the same
+ // memory twice, it should never be possible to prune in such a way that causes
+ // need to be split.
if inUseIndex > 0 {
need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1]))
}
}
// Update the scavenge index.
- p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat)
+ p.summaryMappedReady += p.scav.index.sysGrow(base, limit, p.sysStat)
}
-// grow increases the index's backing store in response to a heap growth.
+// sysGrow increases the index's backing store in response to a heap growth.
//
// Returns the amount of memory added to sysStat.
-func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
+func (s *scavengeIndex) sysGrow(base, limit uintptr, sysStat *sysMemStat) uintptr {
if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
throw("sysGrow bounds not aligned to pallocChunkBytes")
}
+ scSize := unsafe.Sizeof(atomicScavChunkData{})
// Map and commit the pieces of chunks that we need.
//
// We always map the full range of the minimum heap address to the
// index.
haveMin := s.min.Load()
haveMax := s.max.Load()
- needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize))
- needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize))
+ needMin := alignDown(uintptr(chunkIndex(base)), physPageSize/scSize)
+ needMax := alignUp(uintptr(chunkIndex(limit)), physPageSize/scSize)
// Extend the range down to what we have, if there's no overlap.
if needMax < haveMin {
needMax = haveMin
}
- if needMin > haveMax {
+ if haveMax != 0 && needMin > haveMax {
needMin = haveMax
}
have := makeAddrRange(
// Avoid a panic from indexing one past the last element.
- uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin),
- uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax),
+ uintptr(unsafe.Pointer(&s.chunks[0]))+haveMin*scSize,
+ uintptr(unsafe.Pointer(&s.chunks[0]))+haveMax*scSize,
)
need := makeAddrRange(
// Avoid a panic from indexing one past the last element.
- uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin),
- uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax),
+ uintptr(unsafe.Pointer(&s.chunks[0]))+needMin*scSize,
+ uintptr(unsafe.Pointer(&s.chunks[0]))+needMax*scSize,
)
// Subtract any overlap from rounding. We can't re-map memory because
// it'll be zeroed.
s.max.Store(needMax)
}
}
- // Update minHeapIdx. Note that even if there's no mapping work to do,
- // we may still have a new, lower minimum heap address.
- minHeapIdx := s.minHeapIdx.Load()
- if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx {
- s.minHeapIdx.Store(baseIdx)
- }
return need.size()
}
+
+// sysInit initializes the scavengeIndex' chunks array.
+func (s *scavengeIndex) sysInit() {
+ n := uintptr(1<<heapAddrBits) / pallocChunkBytes
+ nbytes := n * unsafe.Sizeof(atomicScavChunkData{})
+ r := sysReserve(nil, nbytes)
+ sl := notInHeapSlice{(*notInHeap)(r), int(n), int(n)}
+ s.chunks = *(*[]atomicScavChunkData)(unsafe.Pointer(&sl))
+}
for i := uint(0); i < 64; i++ {
if c.cache&(1<<i) != 0 {
p.chunkOf(ci).free1(pi + i)
+
+ // Update density statistics.
+ p.scav.index.free(ci, pi+i, 1)
}
if c.scav&(1<<i) != 0 {
p.chunkOf(ci).scavenged.setRange(pi+i, 1)
}
}
+
// Since this is a lot like a free, we need to make sure
// we update the searchAddr just like free does.
if b := (offAddr{c.base}); b.lessThan(p.searchAddr) {
p.searchAddr = maxSearchAddr()
return pageCache{}
}
- ci := chunkIndex(addr)
+ ci = chunkIndex(addr)
chunk = p.chunkOf(ci)
c = pageCache{
base: alignDown(addr, 64*pageSize),
// Update as an allocation, but note that it's not contiguous.
p.update(c.base, pageCachePages, false, true)
+ // Update density statistics.
+ p.scav.index.alloc(ci, uint(sys.OnesCount64(c.cache)))
+
// Set the search address to the last page represented by the cache.
// Since all of the pages in this block are going to the cache, and we
// searched for the first free page, we can confidently start at the