runtime: manage huge pages explicitly

author Michael Anthony Knyszek <mknyszek@google.com>

Fri, 23 Sep 2022 16:32:34 +0000 (16:32 +0000)

committer Michael Knyszek <mknyszek@google.com>

Wed, 19 Apr 2023 14:30:00 +0000 (14:30 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Fri, 23 Sep 2022 16:32:34 +0000 (16:32 +0000)
committer Michael Knyszek <mknyszek@google.com>
Wed, 19 Apr 2023 14:30:00 +0000 (14:30 +0000)
diff --git a/src/runtime/debug/garbage_test.go b/src/runtime/debug/garbage_test.go

index 7213bbe641dd6e38c64f2e504a23080ea25b3325..cd91782d27c1cedddbbbfbd87e0efc53ce41ac1d 100644 (file)
--- a/src/runtime/debug/garbage_test.go
+++ b/src/runtime/debug/garbage_test.go
@@ -146,7 +146,7 @@ func TestFreeOSMemory(t *testing.T) {
                 return
         }
         if after.HeapReleased-before.HeapReleased < bigBytes-slack {
-               t.Fatalf("less than %d released: %d -> %d", bigBytes, before.HeapReleased, after.HeapReleased)
+               t.Fatalf("less than %d released: %d -> %d", bigBytes-slack, before.HeapReleased, after.HeapReleased)
         }
  }
  
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index 498c63f5b65dae9c94127cf0f044d8ef301917e7..1045d510efa5ccc6c57230ec259b7f952789586e 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -831,7 +831,7 @@ func (p *PageAlloc) Free(base, npages uintptr) {
                 // None of the tests need any higher-level locking, so we just
                 // take the lock internally.
                 lock(pp.mheapLock)
-               pp.free(base, npages, true)
+               pp.free(base, npages)
                 unlock(pp.mheapLock)
         })
  }
@@ -841,7 +841,7 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
  func (p *PageAlloc) Scavenge(nbytes uintptr) (r uintptr) {
         pp := (*pageAlloc)(p)
         systemstack(func() {
-               r = pp.scavenge(nbytes, nil)
+               r = pp.scavenge(nbytes, nil, true)
         })
         return
  }
@@ -995,9 +995,8 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
         p := new(pageAlloc)
  
         // We've got an entry, so initialize the pageAlloc.
-       p.init(new(mutex), testSysStat)
+       p.init(new(mutex), testSysStat, true)
         lockInit(p.mheapLock, lockRankMheap)
-       p.test = true
         for i, init := range chunks {
                 addr := chunkBase(chunkIdx(i))
  
@@ -1009,11 +1008,18 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
                 })
  
                 // Initialize the bitmap and update pageAlloc metadata.
-               chunk := p.chunkOf(chunkIndex(addr))
+               ci := chunkIndex(addr)
+               chunk := p.chunkOf(ci)
  
                 // Clear all the scavenged bits which grow set.
                 chunk.scavenged.clearRange(0, pallocChunkPages)
  
+               // Simulate the allocation and subsequent free of all pages in
+               // the chunk for the scavenge index. This sets the state equivalent
+               // with all pages within the index being free.
+               p.scav.index.alloc(ci, pallocChunkPages)
+               p.scav.index.free(ci, 0, pallocChunkPages)
+
                 // Apply scavenge state if applicable.
                 if scav != nil {
                         if scvg, ok := scav[i]; ok {
@@ -1033,19 +1039,10 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
                         // it and it's a no-op anyway.
                         if s.N != 0 {
                                 chunk.allocRange(s.I, s.N)
-                       }
-               }
  
-               // Make sure the scavenge index is updated.
-               //
-               // This is an inefficient way to do it, but it's also the simplest way.
-               minPages := physPageSize / pageSize
-               if minPages < 1 {
-                       minPages = 1
-               }
-               _, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, minPages)
-               if npages != 0 {
-                       p.scav.index.mark(addr, addr+pallocChunkBytes)
+                               // Make sure the scavenge index is updated.
+                               p.scav.index.alloc(ci, s.N)
+                       }
                 }
  
                 // Update heap metadata for the allocRange calls above.
@@ -1070,8 +1067,6 @@ func FreePageAlloc(pp *PageAlloc) {
                 for l := 0; l < summaryLevels; l++ {
                         sysFreeOS(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes)
                 }
-               // Only necessary on 64-bit. This is a global on 32-bit.
-               sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks)))
         } else {
                 resSize := uintptr(0)
                 for _, s := range p.summary {
@@ -1080,6 +1075,9 @@ func FreePageAlloc(pp *PageAlloc) {
                 sysFreeOS(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize))
         }
  
+       // Free extra data structures.
+       sysFreeOS(unsafe.Pointer(&p.scav.index.chunks[0]), uintptr(cap(p.scav.index.chunks))*unsafe.Sizeof(atomicScavChunkData{}))
+
         // Subtract back out whatever we mapped for the summaries.
         // sysUsed adds to p.sysStat and memstats.mappedReady no matter what
         // (and in anger should actually be accounted for), and there's no other
@@ -1629,23 +1627,96 @@ type ScavengeIndex struct {
  
  func NewScavengeIndex(min, max ChunkIdx) *ScavengeIndex {
         s := new(ScavengeIndex)
-       s.i.chunks = make([]atomic.Uint8, uintptr(1<<heapAddrBits/pallocChunkBytes/8))
-       s.i.min.Store(int32(min / 8))
-       s.i.max.Store(int32(max / 8))
+       // This is a bit lazy but we easily guarantee we'll be able
+       // to reference all the relevant chunks. The worst-case
+       // memory usage here is 512 MiB, but tests generally use
+       // small offsets from BaseChunkIdx, which results in ~100s
+       // of KiB in memory use.
+       //
+       // This may still be worth making better, at least by sharing
+       // this fairly large array across calls with a sync.Pool or
+       // something. Currently, when the tests are run serially,
+       // it takes around 0.5s. Not all that much, but if we have
+       // a lot of tests like this it could add up.
+       s.i.chunks = make([]atomicScavChunkData, max)
+       s.i.min.Store(uintptr(min))
+       s.i.max.Store(uintptr(max))
+       s.i.test = true
         return s
  }
  
-func (s *ScavengeIndex) Find() (ChunkIdx, uint) {
-       ci, off := s.i.find()
+func (s *ScavengeIndex) Find(force bool) (ChunkIdx, uint) {
+       ci, off := s.i.find(force)
         return ChunkIdx(ci), off
  }
  
-func (s *ScavengeIndex) Mark(base, limit uintptr) {
-       s.i.mark(base, limit)
+func (s *ScavengeIndex) AllocRange(base, limit uintptr) {
+       sc, ec := chunkIndex(base), chunkIndex(limit-1)
+       si, ei := chunkPageIndex(base), chunkPageIndex(limit-1)
+
+       if sc == ec {
+               // The range doesn't cross any chunk boundaries.
+               s.i.alloc(sc, ei+1-si)
+       } else {
+               // The range crosses at least one chunk boundary.
+               s.i.alloc(sc, pallocChunkPages-si)
+               for c := sc + 1; c < ec; c++ {
+                       s.i.alloc(c, pallocChunkPages)
+               }
+               s.i.alloc(ec, ei+1)
+       }
+}
+
+func (s *ScavengeIndex) FreeRange(base, limit uintptr) {
+       sc, ec := chunkIndex(base), chunkIndex(limit-1)
+       si, ei := chunkPageIndex(base), chunkPageIndex(limit-1)
+
+       if sc == ec {
+               // The range doesn't cross any chunk boundaries.
+               s.i.free(sc, si, ei+1-si)
+       } else {
+               // The range crosses at least one chunk boundary.
+               s.i.free(sc, si, pallocChunkPages-si)
+               for c := sc + 1; c < ec; c++ {
+                       s.i.free(c, 0, pallocChunkPages)
+               }
+               s.i.free(ec, 0, ei+1)
+       }
+}
+
+func (s *ScavengeIndex) ResetSearchAddrs() {
+       for _, a := range []*atomicOffAddr{&s.i.searchAddrBg, &s.i.searchAddrForce} {
+               addr, marked := a.Load()
+               if marked {
+                       a.StoreUnmark(addr, addr)
+               }
+               a.Clear()
+       }
+       s.i.freeHWM = minOffAddr
+}
+
+func (s *ScavengeIndex) NextGen() {
+       s.i.nextGen()
+}
+
+func (s *ScavengeIndex) SetEmpty(ci ChunkIdx) {
+       s.i.setEmpty(chunkIdx(ci))
  }
  
-func (s *ScavengeIndex) Clear(ci ChunkIdx) {
-       s.i.clear(chunkIdx(ci))
+func (s *ScavengeIndex) SetNoHugePage(ci ChunkIdx) bool {
+       return s.i.setNoHugePage(chunkIdx(ci))
+}
+
+func CheckPackScavChunkData(gen uint32, inUse, lastInUse uint16, flags uint8) bool {
+       sc0 := scavChunkData{
+               gen:            gen,
+               inUse:          inUse,
+               lastInUse:      lastInUse,
+               scavChunkFlags: scavChunkFlags(flags),
+       }
+       scp := sc0.pack()
+       sc1 := unpackScavChunkData(scp)
+       return sc0 == sc1
  }
  
  const GTrackingPeriod = gTrackingPeriod
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go

index 96e890eedbf14fa311ea023697482b69935414b4..31815fb421941e5d10ec62f5e00c7609b4fa4d93 100644 (file)
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -37,64 +37,6 @@ func sysAllocOS(n uintptr) unsafe.Pointer {
  var adviseUnused = uint32(_MADV_FREE)
  
  func sysUnusedOS(v unsafe.Pointer, n uintptr) {
-       // By default, Linux's "transparent huge page" support will
-       // merge pages into a huge page if there's even a single
-       // present regular page, undoing the effects of madvise(adviseUnused)
-       // below. On amd64, that means khugepaged can turn a single
-       // 4KB page to 2MB, bloating the process's RSS by as much as
-       // 512X. (See issue #8832 and Linux kernel bug
-       // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
-       //
-       // To work around this, we explicitly disable transparent huge
-       // pages when we release pages of the heap. However, we have
-       // to do this carefully because changing this flag tends to
-       // split the VMA (memory mapping) containing v in to three
-       // VMAs in order to track the different values of the
-       // MADV_NOHUGEPAGE flag in the different regions. There's a
-       // default limit of 65530 VMAs per address space (sysctl
-       // vm.max_map_count), so we must be careful not to create too
-       // many VMAs (see issue #12233).
-       //
-       // Since huge pages are huge, there's little use in adjusting
-       // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
-       // exploding the number of VMAs by only adjusting the
-       // MADV_NOHUGEPAGE flag on a large granularity. This still
-       // gets most of the benefit of huge pages while keeping the
-       // number of VMAs under control. With hugePageSize = 2MB, even
-       // a pessimal heap can reach 128GB before running out of VMAs.
-       if physHugePageSize != 0 {
-               // If it's a large allocation, we want to leave huge
-               // pages enabled. Hence, we only adjust the huge page
-               // flag on the huge pages containing v and v+n-1, and
-               // only if those aren't aligned.
-               var head, tail uintptr
-               if uintptr(v)&(physHugePageSize-1) != 0 {
-                       // Compute huge page containing v.
-                       head = alignDown(uintptr(v), physHugePageSize)
-               }
-               if (uintptr(v)+n)&(physHugePageSize-1) != 0 {
-                       // Compute huge page containing v+n-1.
-                       tail = alignDown(uintptr(v)+n-1, physHugePageSize)
-               }
-
-               // Note that madvise will return EINVAL if the flag is
-               // already set, which is quite likely. We ignore
-               // errors.
-               if head != 0 && head+physHugePageSize == tail {
-                       // head and tail are different but adjacent,
-                       // so do this in one call.
-                       madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE)
-               } else {
-                       // Advise the huge pages containing v and v+n-1.
-                       if head != 0 {
-                               madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE)
-                       }
-                       if tail != 0 && tail != head {
-                               madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE)
-                       }
-               }
-       }
-
         if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
                 // madvise will round this to any physical page
                 // *covered* by this range, so an unaligned madvise
@@ -133,19 +75,7 @@ func sysUsedOS(v unsafe.Pointer, n uintptr) {
                         throw("runtime: cannot remap pages in address space")
                 }
                 return
-
-               // Don't do the sysHugePage optimization in hard decommit mode.
-               // We're breaking up pages everywhere, there's no point.
         }
-       // Partially undo the NOHUGEPAGE marks from sysUnused
-       // for whole huge pages between v and v+n. This may
-       // leave huge pages off at the end points v and v+n
-       // even though allocations may cover these entire huge
-       // pages. We could detect this and undo NOHUGEPAGE on
-       // the end points as well, but it's probably not worth
-       // the cost because when neighboring allocations are
-       // freed sysUnused will just set NOHUGEPAGE again.
-       sysHugePageOS(v, n)
  }
  
  func sysHugePageOS(v unsafe.Pointer, n uintptr) {
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index bb56ab80635d8ed179f220b4dfdc18d0524c4990..d2bf3d2d2ee2dd947fe37db141ce21d9320e6b43 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1069,6 +1069,12 @@ func gcMarkTermination() {
         injectglist(&work.sweepWaiters.list)
         unlock(&work.sweepWaiters.lock)
  
+       // Increment the scavenge generation now.
+       //
+       // This moment represents peak heap in use because we're
+       // about to start sweeping.
+       mheap_.pages.scav.index.nextGen()
+
         // Release the CPU limiter.
         gcCPULimiter.finishGCTransition(now)
  
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go

index e0c04ffbc437931e13751bf94c108250d509b0c5..5976ab49ccea3dad8040fbb2b1b6c37c81a510d9 100644 (file)
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -9,12 +9,14 @@
  // fragmentation and reduce the RSS of Go applications.
  //
  // Scavenging in Go happens on two fronts: there's the background
-// (asynchronous) scavenger and the heap-growth (synchronous) scavenger.
+// (asynchronous) scavenger and the allocation-time (synchronous) scavenger.
  //
  // The former happens on a goroutine much like the background sweeper which is
  // soft-capped at using scavengePercent of the mutator's time, based on
-// order-of-magnitude estimates of the costs of scavenging. The background
-// scavenger's primary goal is to bring the estimated heap RSS of the
+// order-of-magnitude estimates of the costs of scavenging. The latter happens
+// when allocating pages from the heap.
+//
+// The scavenger's primary goal is to bring the estimated heap RSS of the
  // application down to a goal.
  //
  // Before we consider what this looks like, we need to split the world into two
@@ -61,11 +63,30 @@
  //
  // The goals are updated after each GC.
  //
-// The synchronous heap-growth scavenging happens whenever the heap grows in
-// size, for some definition of heap-growth. The intuition behind this is that
-// the application had to grow the heap because existing fragments were
-// not sufficiently large to satisfy a page-level memory allocation, so we
-// scavenge those fragments eagerly to offset the growth in RSS that results.
+// Synchronous scavenging happens for one of two reasons: if an allocation would
+// exceed the memory limit or whenever the heap grows in size, for some
+// definition of heap-growth. The intuition behind this second reason is that the
+// application had to grow the heap because existing fragments were not sufficiently
+// large to satisfy a page-level memory allocation, so we scavenge those fragments
+// eagerly to offset the growth in RSS that results.
+//
+// Lastly, not all pages are available for scavenging at all times and in all cases.
+// The background scavenger and heap-growth scavenger only release memory in chunks
+// that have not been densely-allocated for at least 1 full GC cycle. The reason
+// behind this is likelihood of reuse: the Go heap is allocated in a first-fit order
+// and by the end of the GC mark phase, the heap tends to be densely packed. Releasing
+// memory in these densely packed chunks while they're being packed is counter-productive,
+// and worse, it breaks up huge pages on systems that support them. The scavenger (invoked
+// during memory allocation) further ensures that chunks it identifies as "dense" are
+// immediately eligible for being backed by huge pages. Note that for the most part these
+// density heuristics are best-effort heuristics. It's totally possible (but unlikely)
+// that a chunk that just became dense is scavenged in the case of a race between memory
+// allocation and scavenging.
+//
+// When synchronously scavenging for the memory limit or for debug.FreeOSMemory, these
+// "dense" packing heuristics are ignored (in other words, scavenging is "forced") because
+// in these scenarios returning memory to the OS is more important than keeping CPU
+// overheads low.
  
  package runtime
  
@@ -118,6 +139,11 @@ const (
         // This ratio is used as part of multiplicative factor to help the scavenger account
         // for the additional costs of using scavenged memory in its pacing.
         scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
+
+       // scavChunkHiOcFrac indicates the fraction of pages that need to be allocated
+       // in the chunk in a single GC cycle for it to be considered high density.
+       scavChunkHiOccFrac  = 0.96875
+       scavChunkHiOccPages = uint16(scavChunkHiOccFrac * pallocChunkPages)
  )
  
  // heapRetained returns an estimate of the current heap RSS.
@@ -366,7 +392,7 @@ func (s *scavengerState) init() {
         if s.scavenge == nil {
                 s.scavenge = func(n uintptr) (uintptr, int64) {
                         start := nanotime()
-                       r := mheap_.pages.scavenge(n, nil)
+                       r := mheap_.pages.scavenge(n, nil, false)
                         end := nanotime()
                         if start >= end {
                                 return r, 0
@@ -639,17 +665,17 @@ func bgscavenge(c chan int) {
  
  // scavenge scavenges nbytes worth of free pages, starting with the
  // highest address first. Successive calls continue from where it left
-// off until the heap is exhausted. Call scavengeStartGen to bring it
-// back to the top of the heap.
+// off until the heap is exhausted. force makes all memory available to
+// scavenge, ignoring huge page heuristics.
  //
  // Returns the amount of memory scavenged in bytes.
  //
  // scavenge always tries to scavenge nbytes worth of memory, and will
  // only fail to do so if the heap is exhausted for now.
-func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool) uintptr {
+func (p *pageAlloc) scavenge(nbytes uintptr, shouldStop func() bool, force bool) uintptr {
         released := uintptr(0)
         for released < nbytes {
-               ci, pageIdx := p.scav.index.find()
+               ci, pageIdx := p.scav.index.find(force)
                 if ci == 0 {
                         break
                 }
@@ -737,10 +763,14 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
  
                         // Mark the range we're about to scavenge as allocated, because
                         // we don't want any allocating goroutines to grab it while
-                       // the scavenging is in progress.
-                       if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
-                               throw("double scavenge")
-                       }
+                       // the scavenging is in progress. Be careful here -- just do the
+                       // bare minimum to avoid stepping on our own scavenging stats.
+                       p.chunkOf(ci).allocRange(base, npages)
+                       p.update(addr, uintptr(npages), true, true)
+
+                       // Grab whether the chunk is hugepage backed and if it is,
+                       // clear it. We're about to break up this huge page.
+                       shouldNoHugePage := p.scav.index.setNoHugePage(ci)
  
                         // With that done, it's safe to unlock.
                         unlock(p.mheapLock)
@@ -748,13 +778,16 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
                         if !p.test {
                                 pageTraceScav(getg().m.p.ptr(), 0, addr, uintptr(npages))
  
-                               // Only perform the actual scavenging if we're not in a test.
+                               // Only perform sys* operations if we're not in a test.
                                 // It's dangerous to do so otherwise.
+                               if shouldNoHugePage {
+                                       sysNoHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes)
+                               }
                                 sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
  
                                 // Update global accounting only when not in test, otherwise
                                 // the runtime's accounting will be wrong.
-                               nbytes := int64(npages) * pageSize
+                               nbytes := int64(npages * pageSize)
                                 gcController.heapReleased.add(nbytes)
                                 gcController.heapFree.add(-nbytes)
  
@@ -767,7 +800,11 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
                         // Relock the heap, because now we need to make these pages
                         // available allocation. Free them back to the page allocator.
                         lock(p.mheapLock)
-                       p.free(addr, uintptr(npages), true)
+                       if b := (offAddr{addr}); b.lessThan(p.searchAddr) {
+                               p.searchAddr = b
+                       }
+                       p.chunkOf(ci).free(base, npages)
+                       p.update(addr, uintptr(npages), true, false)
  
                         // Mark the range as scavenged.
                         p.chunkOf(ci).scavenged.setRange(base, npages)
@@ -777,7 +814,7 @@ func (p *pageAlloc) scavengeOne(ci chunkIdx, searchIdx uint, max uintptr) uintpt
                 }
         }
         // Mark this chunk as having no free pages.
-       p.scav.index.clear(ci)
+       p.scav.index.setEmpty(ci)
         unlock(p.mheapLock)
  
         return 0
@@ -965,27 +1002,33 @@ func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (ui
  // scavengeIndex is a structure for efficiently managing which pageAlloc chunks have
  // memory available to scavenge.
  type scavengeIndex struct {
-       // chunks is a bitmap representing the entire address space. Each bit represents
-       // a single chunk, and a 1 value indicates the presence of pages available for
-       // scavenging. Updates to the bitmap are serialized by the pageAlloc lock.
+       // chunks is a scavChunkData-per-chunk structure that indicates the presence of pages
+       // available for scavenging. Updates to the index are serialized by the pageAlloc lock.
         //
-       // The underlying storage of chunks is platform dependent and may not even be
-       // totally mapped read/write. min and max reflect the extent that is safe to access.
-       // min is inclusive, max is exclusive.
+       // It tracks chunk occupancy and a generation counter per chunk. If a chunk's occupancy
+       // never exceeds pallocChunkDensePages over the course of a single GC cycle, the chunk
+       // becomes eligible for scavenging on the next cycle. If a chunk ever hits this density
+       // threshold it immediately becomes unavailable for scavenging in the current cycle as
+       // well as the next.
         //
-       // searchAddr is the maximum address (in the offset address space, so we have a linear
+       // For a chunk size of 4 MiB this structure will only use 2 MiB for a 1 TiB contiguous heap.
+       chunks   []atomicScavChunkData
+       min, max atomic.Uintptr
+
+       // searchAddr* is the maximum address (in the offset address space, so we have a linear
         // view of the address space; see mranges.go:offAddr) containing memory available to
         // scavenge. It is a hint to the find operation to avoid O(n^2) behavior in repeated lookups.
         //
-       // searchAddr is always inclusive and should be the base address of the highest runtime
+       // searchAddr* is always inclusive and should be the base address of the highest runtime
         // page available for scavenging.
         //
-       // searchAddr is managed by both find and mark.
+       // searchAddrForce is managed by find and free.
+       // searchAddrBg is managed by find and nextGen.
         //
-       // Normally, find monotonically decreases searchAddr as it finds no more free pages to
+       // Normally, find monotonically decreases searchAddr* as it finds no more free pages to
         // scavenge. However, mark, when marking a new chunk at an index greater than the current
         // searchAddr, sets searchAddr to the *negative* index into chunks of that page. The trick here
-       // is that concurrent calls to find will fail to monotonically decrease searchAddr, and so they
+       // is that concurrent calls to find will fail to monotonically decrease searchAddr*, and so they
         // won't barge over new memory becoming available to scavenge. Furthermore, this ensures
         // that some future caller of find *must* observe the new high index. That caller
         // (or any other racing with it), then makes searchAddr positive before continuing, bringing
@@ -994,47 +1037,52 @@ type scavengeIndex struct {
         // A pageAlloc lock serializes updates between min, max, and searchAddr, so abs(searchAddr)
         // is always guaranteed to be >= min and < max (converted to heap addresses).
         //
-       // TODO(mknyszek): Ideally we would use something bigger than a uint8 for faster
-       // iteration like uint32, but we lack the bit twiddling intrinsics. We'd need to either
-       // copy them from math/bits or fix the fact that we can't import math/bits' code from
-       // the runtime due to compiler instrumentation.
-       searchAddr atomicOffAddr
-       chunks     []atomic.Uint8
-       minHeapIdx atomic.Int32
-       min, max   atomic.Int32
+       // searchAddrBg is increased only on each new generation and is mainly used by the
+       // background scavenger and heap-growth scavenging. searchAddrForce is increased continuously
+       // as memory gets freed and is mainly used by eager memory reclaim such as debug.FreeOSMemory
+       // and scavenging to maintain the memory limit.
+       searchAddrBg    atomicOffAddr
+       searchAddrForce atomicOffAddr
+
+       // freeHWM is the highest address (in offset address space) that was freed
+       // this generation.
+       freeHWM offAddr
+
+       // Generation counter. Updated by nextGen at the end of each mark phase.
+       gen uint32
+
+       // test indicates whether or not we're in a test.
+       test bool
  }
  
  // find returns the highest chunk index that may contain pages available to scavenge.
  // It also returns an offset to start searching in the highest chunk.
-func (s *scavengeIndex) find() (chunkIdx, uint) {
-       searchAddr, marked := s.searchAddr.Load()
+func (s *scavengeIndex) find(force bool) (chunkIdx, uint) {
+       cursor := &s.searchAddrBg
+       if force {
+               cursor = &s.searchAddrForce
+       }
+       searchAddr, marked := cursor.Load()
         if searchAddr == minOffAddr.addr() {
                 // We got a cleared search addr.
                 return 0, 0
         }
  
-       // Starting from searchAddr's chunk, and moving down to minHeapIdx,
-       // iterate until we find a chunk with pages to scavenge.
-       min := s.minHeapIdx.Load()
-       searchChunk := chunkIndex(uintptr(searchAddr))
-       start := int32(searchChunk / 8)
+       // Starting from searchAddr's chunk, iterate until we find a chunk with pages to scavenge.
+       gen := s.gen
+       min := chunkIdx(s.min.Load())
+       start := chunkIndex(uintptr(searchAddr))
         for i := start; i >= min; i-- {
-               // Skip over irrelevant address space.
-               chunks := s.chunks[i].Load()
-               if chunks == 0 {
+               // Skip over chunks.
+               if !s.chunks[i].load().shouldScavenge(gen, force) {
                         continue
                 }
-               // Note that we can't have 8 leading zeroes here because
-               // we necessarily skipped that case. So, what's left is
-               // an index. If there are no zeroes, we want the 7th
-               // index, if 1 zero, the 6th, and so on.
-               n := 7 - sys.LeadingZeros8(chunks)
-               ci := chunkIdx(uint(i)*8 + uint(n))
-               if searchChunk == ci {
-                       return ci, chunkPageIndex(uintptr(searchAddr))
+               // We're still scavenging this chunk.
+               if i == start {
+                       return i, chunkPageIndex(uintptr(searchAddr))
                 }
                 // Try to reduce searchAddr to newSearchAddr.
-               newSearchAddr := chunkBase(ci) + pallocChunkBytes - pageSize
+               newSearchAddr := chunkBase(i) + pallocChunkBytes - pageSize
                 if marked {
                         // Attempt to be the first one to decrease the searchAddr
                         // after an increase. If we fail, that means there was another
@@ -1042,78 +1090,273 @@ func (s *scavengeIndex) find() (chunkIdx, uint) {
                         // it doesn't matter. We may lose some performance having an
                         // incorrect search address, but it's far more important that
                         // we don't miss updates.
-                       s.searchAddr.StoreUnmark(searchAddr, newSearchAddr)
+                       cursor.StoreUnmark(searchAddr, newSearchAddr)
                 } else {
                         // Decrease searchAddr.
-                       s.searchAddr.StoreMin(newSearchAddr)
+                       cursor.StoreMin(newSearchAddr)
                 }
-               return ci, pallocChunkPages - 1
+               return i, pallocChunkPages - 1
         }
         // Clear searchAddr, because we've exhausted the heap.
-       s.searchAddr.Clear()
+       cursor.Clear()
         return 0, 0
  }
  
-// mark sets the inclusive range of chunks between indices start and end as
-// containing pages available to scavenge.
+// alloc updates metadata for chunk at index ci with the fact that
+// an allocation of npages occurred.
  //
-// Must be serialized with other mark, markRange, and clear calls.
-func (s *scavengeIndex) mark(base, limit uintptr) {
-       start, end := chunkIndex(base), chunkIndex(limit-pageSize)
-       if start == end {
-               // Within a chunk.
-               mask := uint8(1 << (start % 8))
-               s.chunks[start/8].Or(mask)
-       } else if start/8 == end/8 {
-               // Within the same byte in the index.
-               mask := uint8(uint16(1<<(end-start+1))-1) << (start % 8)
-               s.chunks[start/8].Or(mask)
-       } else {
-               // Crosses multiple bytes in the index.
-               startAligned := chunkIdx(alignUp(uintptr(start), 8))
-               endAligned := chunkIdx(alignDown(uintptr(end), 8))
-
-               // Do the end of the first byte first.
-               if width := startAligned - start; width > 0 {
-                       mask := uint8(uint16(1<<width)-1) << (start % 8)
-                       s.chunks[start/8].Or(mask)
-               }
-               // Do the middle aligned sections that take up a whole
-               // byte.
-               for ci := startAligned; ci < endAligned; ci += 8 {
-                       s.chunks[ci/8].Store(^uint8(0))
-               }
-               // Do the end of the last byte.
-               //
-               // This width check doesn't match the one above
-               // for start because aligning down into the endAligned
-               // block means we always have at least one chunk in this
-               // block (note that end is *inclusive*). This also means
-               // that if end == endAligned+n, then what we really want
-               // is to fill n+1 chunks, i.e. width n+1. By induction,
-               // this is true for all n.
-               if width := end - endAligned + 1; width > 0 {
-                       mask := uint8(uint16(1<<width) - 1)
-                       s.chunks[end/8].Or(mask)
+// alloc may only run concurrently with find.
+func (s *scavengeIndex) alloc(ci chunkIdx, npages uint) {
+       sc := s.chunks[ci].load()
+       sc.alloc(npages, s.gen)
+       if !sc.isHugePage() && sc.inUse > scavChunkHiOccPages {
+               // Mark dense chunks as specifically backed by huge pages.
+               sc.setHugePage()
+               if !s.test {
+                       sysHugePage(unsafe.Pointer(chunkBase(ci)), pallocChunkBytes)
                 }
         }
-       newSearchAddr := limit - pageSize
-       searchAddr, _ := s.searchAddr.Load()
-       // N.B. Because mark is serialized, it's not necessary to do a
-       // full CAS here. mark only ever increases searchAddr, while
+       s.chunks[ci].store(sc)
+}
+
+// free updates metadata for chunk at index ci with the fact that
+// a free of npages occurred.
+//
+// free may only run concurrently with find.
+func (s *scavengeIndex) free(ci chunkIdx, page, npages uint) {
+       sc := s.chunks[ci].load()
+       sc.free(npages, s.gen)
+       s.chunks[ci].store(sc)
+
+       // Update scavenge search addresses.
+       addr := chunkBase(ci) + uintptr(page+npages-1)*pageSize
+       if s.freeHWM.lessThan(offAddr{addr}) {
+               s.freeHWM = offAddr{addr}
+       }
+       // N.B. Because free is serialized, it's not necessary to do a
+       // full CAS here. free only ever increases searchAddr, while
         // find only ever decreases it. Since we only ever race with
         // decreases, even if the value we loaded is stale, the actual
         // value will never be larger.
-       if (offAddr{searchAddr}).lessThan(offAddr{newSearchAddr}) {
-               s.searchAddr.StoreMarked(newSearchAddr)
+       searchAddr, _ := s.searchAddrForce.Load()
+       if (offAddr{searchAddr}).lessThan(offAddr{addr}) {
+               s.searchAddrForce.StoreMarked(addr)
+       }
+}
+
+// nextGen moves the scavenger forward one generation. Must be called
+// once per GC cycle, but may be called more often to force more memory
+// to be released.
+//
+// nextGen may only run concurrently with find.
+func (s *scavengeIndex) nextGen() {
+       s.gen++
+       searchAddr, _ := s.searchAddrBg.Load()
+       if (offAddr{searchAddr}).lessThan(s.freeHWM) {
+               s.searchAddrBg.StoreMarked(s.freeHWM.addr())
         }
+       s.freeHWM = minOffAddr
+}
+
+// setEmpty marks that the scavenger has finished looking at ci
+// for now to prevent the scavenger from getting stuck looking
+// at the same chunk.
+//
+// setEmpty may only run concurrently with find.
+func (s *scavengeIndex) setEmpty(ci chunkIdx) {
+       val := s.chunks[ci].load()
+       val.setEmpty()
+       s.chunks[ci].store(val)
  }
  
-// clear sets the chunk at index ci as not containing pages available to scavenge.
+// setNoHugePage updates the backed-by-hugepages status of a particular chunk.
+// Returns true if the set was successful (not already backed by huge pages).
  //
-// Must be serialized with other mark, markRange, and clear calls.
-func (s *scavengeIndex) clear(ci chunkIdx) {
-       s.chunks[ci/8].And(^uint8(1 << (ci % 8)))
+// setNoHugePage may only run concurrently with find.
+func (s *scavengeIndex) setNoHugePage(ci chunkIdx) bool {
+       val := s.chunks[ci].load()
+       if !val.isHugePage() {
+               return false
+       }
+       val.setNoHugePage()
+       s.chunks[ci].store(val)
+       return true
+}
+
+// atomicScavChunkData is an atomic wrapper around a scavChunkData
+// that stores it in its packed form.
+type atomicScavChunkData struct {
+       value atomic.Uint64
+}
+
+// load loads and unpacks a scavChunkData.
+func (sc *atomicScavChunkData) load() scavChunkData {
+       return unpackScavChunkData(sc.value.Load())
+}
+
+// store packs and writes a new scavChunkData. store must be serialized
+// with other calls to store.
+func (sc *atomicScavChunkData) store(ssc scavChunkData) {
+       sc.value.Store(ssc.pack())
+}
+
+// scavChunkData tracks information about a palloc chunk for
+// scavenging. It packs well into 64 bits.
+//
+// The zero value always represents a valid newly-grown chunk.
+type scavChunkData struct {
+       // inUse indicates how many pages in this chunk are currently
+       // allocated.
+       //
+       // Only the first 10 bits are used.
+       inUse uint16
+
+       // lastInUse indicates how many pages in this chunk were allocated
+       // when we transitioned from gen-1 to gen.
+       //
+       // Only the first 10 bits are used.
+       lastInUse uint16
+
+       // gen is the generation counter from a scavengeIndex from the
+       // last time this scavChunkData was updated.
+       gen uint32
+
+       // scavChunkFlags represents additional flags
+       //
+       // Note: only 6 bits are available.
+       scavChunkFlags
+}
+
+// unpackScavChunkData unpacks a scavChunkData from a uint64.
+func unpackScavChunkData(sc uint64) scavChunkData {
+       return scavChunkData{
+               inUse:          uint16(sc),
+               lastInUse:      uint16(sc>>16) & scavChunkInUseMask,
+               gen:            uint32(sc >> 32),
+               scavChunkFlags: scavChunkFlags(uint8(sc>>(16+logScavChunkInUseMax)) & scavChunkFlagsMask),
+       }
+}
+
+// pack returns sc packed into a uint64.
+func (sc scavChunkData) pack() uint64 {
+       return uint64(sc.inUse) |
+               (uint64(sc.lastInUse) << 16) |
+               (uint64(sc.scavChunkFlags) << (16 + logScavChunkInUseMax)) |
+               (uint64(sc.gen) << 32)
+}
+
+const (
+       // scavChunkHasFree indicates whether the chunk has anything left to
+       // scavenge. This is the opposite of "empty," used elsewhere in this
+       // file. The reason we say "HasFree" here is so the zero value is
+       // correct for a newly-grown chunk. (New memory is scavenged.)
+       scavChunkHasFree scavChunkFlags = 1 << iota
+       // scavChunkNoHugePage indicates whether this chunk has been marked
+       // sysNoHugePage. If not set, it means the chunk is marked sysHugePage.
+       // The negative here is unfortunate, but necessary to make it so that
+       // the zero value of scavChunkData accurately represents the state of
+       // a newly-grown chunk. (New memory is marked as backed by huge pages.)
+       scavChunkNoHugePage
+
+       // scavChunkMaxFlags is the maximum number of flags we can have, given how
+       // a scavChunkData is packed into 8 bytes.
+       scavChunkMaxFlags  = 6
+       scavChunkFlagsMask = (1 << scavChunkMaxFlags) - 1
+
+       // logScavChunkInUseMax is the number of bits needed to represent the number
+       // of pages allocated in a single chunk. This is 1 more than log2 of the
+       // number of pages in the chunk because we need to represent a fully-allocated
+       // chunk.
+       logScavChunkInUseMax = logPallocChunkPages + 1
+       scavChunkInUseMask   = (1 << logScavChunkInUseMax) - 1
+)
+
+// scavChunkFlags is a set of bit-flags for the scavenger for each palloc chunk.
+type scavChunkFlags uint8
+
+// isEmpty returns true if the hasFree flag is unset.
+func (sc *scavChunkFlags) isEmpty() bool {
+       return (*sc)&scavChunkHasFree == 0
+}
+
+// setEmpty clears the hasFree flag.
+func (sc *scavChunkFlags) setEmpty() {
+       *sc &^= scavChunkHasFree
+}
+
+// setNonEmpty sets the hasFree flag.
+func (sc *scavChunkFlags) setNonEmpty() {
+       *sc |= scavChunkHasFree
+}
+
+// isHugePage returns false if the noHugePage flag is set.
+func (sc *scavChunkFlags) isHugePage() bool {
+       return (*sc)&scavChunkNoHugePage == 0
+}
+
+// setHugePage clears the noHugePage flag.
+func (sc *scavChunkFlags) setHugePage() {
+       *sc &^= scavChunkNoHugePage
+}
+
+// setNoHugePage sets the noHugePage flag.
+func (sc *scavChunkFlags) setNoHugePage() {
+       *sc |= scavChunkNoHugePage
+}
+
+// shouldScavenge returns true if the corresponding chunk should be interrogated
+// by the scavenger.
+func (sc scavChunkData) shouldScavenge(currGen uint32, force bool) bool {
+       if sc.isEmpty() {
+               // Nothing to scavenge.
+               return false
+       }
+       if force {
+               // We're forcing the memory to be scavenged.
+               return true
+       }
+       if sc.gen == currGen {
+               // In the current generation, if either the current or last generation
+               // is dense, then skip scavenging. Inverting that, we should scavenge
+               // if both the current and last generation were not dense.
+               return sc.inUse < scavChunkHiOccPages && sc.lastInUse < scavChunkHiOccPages
+       }
+       // If we're one or more generations ahead, we know inUse represents the current
+       // state of the chunk, since otherwise it would've been updated already.
+       return sc.inUse < scavChunkHiOccPages
+}
+
+// alloc updates sc given that npages were allocated in the corresponding chunk.
+func (sc *scavChunkData) alloc(npages uint, newGen uint32) {
+       if uint(sc.inUse)+npages > pallocChunkPages {
+               print("runtime: inUse=", sc.inUse, " npages=", npages, "\n")
+               throw("too many pages allocated in chunk?")
+       }
+       if sc.gen != newGen {
+               sc.lastInUse = sc.inUse
+               sc.gen = newGen
+       }
+       sc.inUse += uint16(npages)
+       if sc.inUse == pallocChunkPages {
+               // There's nothing for the scavenger to take from here.
+               sc.setEmpty()
+       }
+}
+
+// free updates sc given that npages was freed in the corresponding chunk.
+func (sc *scavChunkData) free(npages uint, newGen uint32) {
+       if uint(sc.inUse) < npages {
+               print("runtime: inUse=", sc.inUse, " npages=", npages, "\n")
+               throw("allocated pages below zero?")
+       }
+       if sc.gen != newGen {
+               sc.lastInUse = sc.inUse
+               sc.gen = newGen
+       }
+       sc.inUse -= uint16(npages)
+       // The scavenger can no longer be done with this chunk now that
+       // new memory has been freed into it.
+       sc.setNonEmpty()
  }
  
  type piController struct {
diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go

index c436ff060fa7434d1a48cc54e978a2d9f7160a29..d7624d6d725581e8379622ea95e748f3e2d079cd 100644 (file)
--- a/src/runtime/mgcscavenge_test.go
+++ b/src/runtime/mgcscavenge_test.go
@@ -564,149 +564,278 @@ func TestScavenger(t *testing.T) {
  }
  
  func TestScavengeIndex(t *testing.T) {
-       setup := func(t *testing.T) (func(ChunkIdx, uint), func(uintptr, uintptr)) {
+       // This test suite tests the scavengeIndex data structure.
+
+       // markFunc is a function that makes the address range [base, limit)
+       // available for scavenging in a test index.
+       type markFunc func(base, limit uintptr)
+
+       // findFunc is a function that searches for the next available page
+       // to scavenge in the index. It asserts that the page is found in
+       // chunk "ci" at page "offset."
+       type findFunc func(ci ChunkIdx, offset uint)
+
+       // The structure of the tests below is as follows:
+       //
+       // setup creates a fake scavengeIndex that can be mutated and queried by
+       // the functions it returns. Those functions capture the testing.T that
+       // setup is called with, so they're bound to the subtest they're created in.
+       //
+       // Tests are then organized into test cases which mark some pages as
+       // scavenge-able then try to find them. Tests expect that the initial
+       // state of the scavengeIndex has all of the chunks as dense in the last
+       // generation and empty to the scavenger.
+       //
+       // There are a few additional tests that interleave mark and find operations,
+       // so they're defined separately, but use the same infrastructure.
+       setup := func(t *testing.T, force bool) (mark markFunc, find findFunc, nextGen func()) {
                 t.Helper()
  
                 // Pick some reasonable bounds. We don't need a huge range just to test.
                 si := NewScavengeIndex(BaseChunkIdx, BaseChunkIdx+64)
-               find := func(want ChunkIdx, wantOffset uint) {
+
+               // Initialize all the chunks as dense and empty.
+               //
+               // Also, reset search addresses so that we can get page offsets.
+               si.AllocRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0))
+               si.NextGen()
+               si.FreeRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+64, 0))
+               for ci := BaseChunkIdx; ci < BaseChunkIdx+64; ci++ {
+                       si.SetEmpty(ci)
+               }
+               si.ResetSearchAddrs()
+
+               // Create and return test functions.
+               mark = func(base, limit uintptr) {
                         t.Helper()
  
-                       got, gotOffset := si.Find()
+                       si.AllocRange(base, limit)
+                       si.FreeRange(base, limit)
+               }
+               find = func(want ChunkIdx, wantOffset uint) {
+                       t.Helper()
+
+                       got, gotOffset := si.Find(force)
                         if want != got {
                                 t.Errorf("find: wanted chunk index %d, got %d", want, got)
                         }
-                       if want != got {
+                       if wantOffset != gotOffset {
                                 t.Errorf("find: wanted page offset %d, got %d", wantOffset, gotOffset)
                         }
                         if t.Failed() {
                                 t.FailNow()
                         }
-                       si.Clear(got)
+                       si.SetEmpty(got)
                 }
-               mark := func(base, limit uintptr) {
+               nextGen = func() {
                         t.Helper()
  
-                       si.Mark(base, limit)
+                       si.NextGen()
                 }
-               return find, mark
+               return
         }
-       t.Run("Uninitialized", func(t *testing.T) {
-               find, _ := setup(t)
-               find(0, 0)
-       })
-       t.Run("OnePage", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
-               find(BaseChunkIdx, 3)
-               find(0, 0)
-       })
-       t.Run("FirstPage", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
-               find(BaseChunkIdx, 0)
-               find(0, 0)
-       })
-       t.Run("SeveralPages", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
-               find(BaseChunkIdx, 13)
-               find(0, 0)
-       })
-       t.Run("WholeChunk", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
-               find(BaseChunkIdx, PallocChunkPages-1)
-               find(0, 0)
-       })
-       t.Run("LastPage", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
-               find(BaseChunkIdx, PallocChunkPages-1)
-               find(0, 0)
-       })
-       t.Run("TwoChunks", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
-               find(BaseChunkIdx+1, 127)
-               find(BaseChunkIdx, PallocChunkPages-1)
-               find(0, 0)
-       })
-       t.Run("TwoChunksOffset", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
-               find(BaseChunkIdx+8, 128)
-               find(BaseChunkIdx+7, PallocChunkPages-1)
-               find(0, 0)
-       })
-       t.Run("SevenChunksOffset", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
-               find(BaseChunkIdx+13, 14)
-               for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
-                       find(i, PallocChunkPages-1)
-               }
-               find(0, 0)
-       })
-       t.Run("ThirtyTwoChunks", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
-               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
-                       find(i, PallocChunkPages-1)
-               }
-               find(0, 0)
-       })
-       t.Run("ThirtyTwoChunksOffset", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
-               for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
-                       find(i, PallocChunkPages-1)
-               }
-               find(0, 0)
-       })
-       t.Run("Mark", func(t *testing.T) {
-               find, mark := setup(t)
+
+       // Each of these test cases calls mark and then find once.
+       type testCase struct {
+               name string
+               mark func(markFunc)
+               find func(findFunc)
+       }
+       for _, test := range []testCase{
+               {
+                       name: "Uninitialized",
+                       mark: func(_ markFunc) {},
+                       find: func(_ findFunc) {},
+               },
+               {
+                       name: "OnePage",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 3), PageBase(BaseChunkIdx, 4))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, 3)
+                       },
+               },
+               {
+                       name: "FirstPage",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx, 1))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, 0)
+                       },
+               },
+               {
+                       name: "SeveralPages",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 9), PageBase(BaseChunkIdx, 14))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, 13)
+                       },
+               },
+               {
+                       name: "WholeChunk",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, PallocChunkPages-1)
+                       },
+               },
+               {
+                       name: "LastPage",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, PallocChunkPages-1), PageBase(BaseChunkIdx+1, 0))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, PallocChunkPages-1)
+                       },
+               },
+               {
+                       name: "TwoChunks",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx+1, 127)
+                               find(BaseChunkIdx, PallocChunkPages-1)
+                       },
+               },
+               {
+                       name: "TwoChunksOffset",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx+8, 128)
+                               find(BaseChunkIdx+7, PallocChunkPages-1)
+                       },
+               },
+               {
+                       name: "SevenChunksOffset",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx+6, 11), PageBase(BaseChunkIdx+13, 15))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx+13, 14)
+                               for i := BaseChunkIdx + 12; i >= BaseChunkIdx+6; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+               {
+                       name: "ThirtyTwoChunks",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+                       },
+                       find: func(find findFunc) {
+                               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+               {
+                       name: "ThirtyTwoChunksOffset",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx+3, 0), PageBase(BaseChunkIdx+35, 0))
+                       },
+                       find: func(find findFunc) {
+                               for i := BaseChunkIdx + 34; i >= BaseChunkIdx+3; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+               {
+                       name: "Mark",
+                       mark: func(mark markFunc) {
+                               for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
+                                       mark(PageBase(i, 0), PageBase(i+1, 0))
+                               }
+                       },
+                       find: func(find findFunc) {
+                               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+               {
+                       name: "MarkIdempotentOneChunk",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
+                       },
+                       find: func(find findFunc) {
+                               find(BaseChunkIdx, PallocChunkPages-1)
+                       },
+               },
+               {
+                       name: "MarkIdempotentThirtyTwoChunks",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+                               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
+                       },
+                       find: func(find findFunc) {
+                               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+               {
+                       name: "MarkIdempotentThirtyTwoChunksOffset",
+                       mark: func(mark markFunc) {
+                               mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
+                               mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
+                       },
+                       find: func(find findFunc) {
+                               for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
+                                       find(i, PallocChunkPages-1)
+                               }
+                       },
+               },
+       } {
+               test := test
+               t.Run("Bg/"+test.name, func(t *testing.T) {
+                       mark, find, nextGen := setup(t, false)
+                       test.mark(mark)
+                       find(0, 0)      // Make sure we find nothing at this point.
+                       nextGen()       // Move to the next generation.
+                       test.find(find) // Now we should be able to find things.
+                       find(0, 0)      // The test should always fully exhaust the index.
+               })
+               t.Run("Force/"+test.name, func(t *testing.T) {
+                       mark, find, _ := setup(t, true)
+                       test.mark(mark)
+                       test.find(find) // Finding should always work when forced.
+                       find(0, 0)      // The test should always fully exhaust the index.
+               })
+       }
+       t.Run("Bg/MarkInterleaved", func(t *testing.T) {
+               mark, find, nextGen := setup(t, false)
                 for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
                         mark(PageBase(i, 0), PageBase(i+1, 0))
-               }
-               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
+                       nextGen()
                         find(i, PallocChunkPages-1)
                 }
                 find(0, 0)
         })
-       t.Run("MarkInterleaved", func(t *testing.T) {
-               find, mark := setup(t)
+       t.Run("Force/MarkInterleaved", func(t *testing.T) {
+               mark, find, _ := setup(t, true)
                 for i := BaseChunkIdx; i < BaseChunkIdx+32; i++ {
                         mark(PageBase(i, 0), PageBase(i+1, 0))
                         find(i, PallocChunkPages-1)
                 }
                 find(0, 0)
         })
-       t.Run("MarkIdempotentOneChunk", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0))
-               find(BaseChunkIdx, PallocChunkPages-1)
-               find(0, 0)
-       })
-       t.Run("MarkIdempotentThirtyTwoChunks", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
-               mark(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+32, 0))
-               for i := BaseChunkIdx + 31; i >= BaseChunkIdx; i-- {
-                       find(i, PallocChunkPages-1)
-               }
-               find(0, 0)
-       })
-       t.Run("MarkIdempotentThirtyTwoChunksOffset", func(t *testing.T) {
-               find, mark := setup(t)
-               mark(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+31, 0))
-               mark(PageBase(BaseChunkIdx+5, 0), PageBase(BaseChunkIdx+36, 0))
-               for i := BaseChunkIdx + 35; i >= BaseChunkIdx+4; i-- {
-                       find(i, PallocChunkPages-1)
-               }
-               find(0, 0)
-       })
+}
+
+func TestScavChunkDataPack(t *testing.T) {
+       if !CheckPackScavChunkData(1918237402, 512, 512, 0b11) {
+               t.Error("failed pack/unpack check for scavChunkData 1")
+       }
+       if !CheckPackScavChunkData(^uint32(0), 12, 0, 0b00) {
+               t.Error("failed pack/unpack check for scavChunkData 2")
+       }
  }
  
  func FuzzPIController(f *testing.F) {
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go

index 773e27e646f69300a535f1c39c313eba60314f86..febe51975096edb14d90eca07fb156bb3764b8ee 100644 (file)
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -260,9 +260,11 @@ func finishsweep_m() {
                 c.fullUnswept(sg).reset()
         }
  
-       // Sweeping is done, so if the scavenger isn't already awake,
-       // wake it up. There's definitely work for it to do at this
-       // point.
+       // Sweeping is done, so there won't be any new memory to
+       // scavenge for a bit.
+       //
+       // If the scavenger isn't already awake, wake it up. There's
+       // definitely work for it to do at this point.
         scavenger.wake()
  
         nextMarkBitArenaEpoch()
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index 06592fe95b79cacd02206bf5bb4082b5959afb23..ee005978fb096cce4ce2773d403ab0ff1d2f038e 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -773,7 +773,7 @@ func (h *mheap) init() {
                 h.central[i].mcentral.init(spanClass(i))
         }
  
-       h.pages.init(&h.lock, &memstats.gcMiscSys)
+       h.pages.init(&h.lock, &memstats.gcMiscSys, false)
  }
  
  // reclaim sweeps and reclaims at least npage pages into the heap.
@@ -1274,6 +1274,7 @@ HaveSpan:
         // pages not to get touched until we return. Simultaneously, it's important
         // to do this before calling sysUsed because that may commit address space.
         bytesToScavenge := uintptr(0)
+       forceScavenge := false
         if limit := gcController.memoryLimit.Load(); !gcCPULimiter.limiting() {
                 // Assist with scavenging to maintain the memory limit by the amount
                 // that we expect to page in.
@@ -1282,6 +1283,7 @@ HaveSpan:
                 // someone can set a really big memory limit that isn't maxInt64.
                 if uint64(scav)+inuse > uint64(limit) {
                         bytesToScavenge = uintptr(uint64(scav) + inuse - uint64(limit))
+                       forceScavenge = true
                 }
         }
         if goal := scavenge.gcPercentGoal.Load(); goal != ^uint64(0) && growth > 0 {
@@ -1323,7 +1325,7 @@ HaveSpan:
                 // Scavenge, but back out if the limiter turns on.
                 h.pages.scavenge(bytesToScavenge, func() bool {
                         return gcCPULimiter.limiting()
-               })
+               }, forceScavenge)
  
                 // Finish up accounting.
                 now = nanotime()
@@ -1629,7 +1631,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
         memstats.heapStats.release()
  
         // Mark the space as free.
-       h.pages.free(s.base(), s.npages, false)
+       h.pages.free(s.base(), s.npages)
  
         // Free the span structure. We no longer have a use for it.
         s.state.set(mSpanDead)
@@ -1639,6 +1641,10 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
  // scavengeAll acquires the heap lock (blocking any additional
  // manipulation of the page allocator) and iterates over the whole
  // heap, scavenging every free page available.
+//
+// Must run on the system stack because it acquires the heap lock.
+//
+//go:systemstack
  func (h *mheap) scavengeAll() {
         // Disallow malloc or panic while holding the heap lock. We do
         // this here because this is a non-mallocgc entry-point to
@@ -1646,7 +1652,8 @@ func (h *mheap) scavengeAll() {
         gp := getg()
         gp.m.mallocing++
  
-       released := h.pages.scavenge(^uintptr(0), nil)
+       // Force scavenge everything.
+       released := h.pages.scavenge(^uintptr(0), nil, true)
  
         gp.m.mallocing--
  
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go

index 4f35cafc2404a37d2269f91c20fdc90acf2a0607..da1b14e5a4f5e01b772b8a10bad80f5a1c979935 100644 (file)
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -257,11 +257,9 @@ type pageAlloc struct {
         // known by the page allocator to be currently in-use (passed
         // to grow).
         //
-       // This field is currently unused on 32-bit architectures but
-       // is harmless to track. We care much more about having a
-       // contiguous heap in these cases and take additional measures
-       // to ensure that, so in nearly all cases this should have just
-       // 1 element.
+       // We care much more about having a contiguous heap in these cases
+       // and take additional measures to ensure that, so in nearly all
+       // cases this should have just 1 element.
         //
         // All access is protected by the mheapLock.
         inUse addrRanges
@@ -300,7 +298,7 @@ type pageAlloc struct {
         test bool
  }
  
-func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat, test bool) {
         if levelLogPages[0] > logMaxPackedValue {
                 // We can't represent 1<<levelLogPages[0] pages, the maximum number
                 // of pages we need to represent at the root level, in a summary, which
@@ -315,13 +313,17 @@ func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
         p.inUse.init(sysStat)
  
         // System-dependent initialization.
-       p.sysInit()
+       p.sysInit(test)
  
         // Start with the searchAddr in a state indicating there's no free memory.
         p.searchAddr = maxSearchAddr()
  
         // Set the mheapLock.
         p.mheapLock = mheapLock
+
+       // Set if we're in a test.
+       p.test = test
+       p.scav.index.test = test
  }
  
  // tryChunkOf returns the bitmap data for the given chunk.
@@ -415,6 +417,11 @@ func (p *pageAlloc) grow(base, size uintptr) {
         // we need to ensure this newly-free memory is visible in the
         // summaries.
         p.update(base, size/pageSize, true, false)
+
+       // Mark all new memory as huge page eligible.
+       if !p.test {
+               sysHugePage(unsafe.Pointer(base), size)
+       }
  }
  
  // enableChunkHugePages enables huge pages for the chunk bitmap mappings (disabled by default).
@@ -568,19 +575,23 @@ func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
                 chunk := p.chunkOf(sc)
                 scav += chunk.scavenged.popcntRange(si, ei+1-si)
                 chunk.allocRange(si, ei+1-si)
+               p.scav.index.alloc(sc, ei+1-si)
         } else {
                 // The range crosses at least one chunk boundary.
                 chunk := p.chunkOf(sc)
                 scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si)
                 chunk.allocRange(si, pallocChunkPages-si)
+               p.scav.index.alloc(sc, pallocChunkPages-si)
                 for c := sc + 1; c < ec; c++ {
                         chunk := p.chunkOf(c)
                         scav += chunk.scavenged.popcntRange(0, pallocChunkPages)
                         chunk.allocAll()
+                       p.scav.index.alloc(c, pallocChunkPages)
                 }
                 chunk = p.chunkOf(ec)
                 scav += chunk.scavenged.popcntRange(0, ei+1)
                 chunk.allocRange(0, ei+1)
+               p.scav.index.alloc(ec, ei+1)
         }
         p.update(base, npages, true, true)
         return uintptr(scav) * pageSize
@@ -914,7 +925,7 @@ Found:
  // Must run on the system stack because p.mheapLock must be held.
  //
  //go:systemstack
-func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
+func (p *pageAlloc) free(base, npages uintptr) {
         assertLockHeld(p.mheapLock)
  
         // If we're freeing pages below the p.searchAddr, update searchAddr.
@@ -922,14 +933,13 @@ func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
                 p.searchAddr = b
         }
         limit := base + npages*pageSize - 1
-       if !scavenged {
-               p.scav.index.mark(base, limit+1)
-       }
         if npages == 1 {
                 // Fast path: we're clearing a single bit, and we know exactly
                 // where it is, so mark it directly.
                 i := chunkIndex(base)
-               p.chunkOf(i).free1(chunkPageIndex(base))
+               pi := chunkPageIndex(base)
+               p.chunkOf(i).free1(pi)
+               p.scav.index.free(i, pi, 1)
         } else {
                 // Slow path: we're clearing more bits so we may need to iterate.
                 sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -938,13 +948,17 @@ func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
                 if sc == ec {
                         // The range doesn't cross any chunk boundaries.
                         p.chunkOf(sc).free(si, ei+1-si)
+                       p.scav.index.free(sc, si, ei+1-si)
                 } else {
                         // The range crosses at least one chunk boundary.
                         p.chunkOf(sc).free(si, pallocChunkPages-si)
+                       p.scav.index.free(sc, si, pallocChunkPages-si)
                         for c := sc + 1; c < ec; c++ {
                                 p.chunkOf(c).freeAll()
+                               p.scav.index.free(c, 0, pallocChunkPages)
                         }
                         p.chunkOf(ec).free(0, ei+1)
+                       p.scav.index.free(ec, 0, ei+1)
                 }
         }
         p.update(base, npages, true, false)
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go

index 859c61d8a5e78e6ce47c3db9aa608c5605464fd2..03990e47cf63108f13d8f15f06ef0b8dba1ef9d8 100644 (file)
--- a/src/runtime/mpagealloc_32bit.go
+++ b/src/runtime/mpagealloc_32bit.go
@@ -12,7 +12,6 @@
  package runtime
  
  import (
-       "runtime/internal/atomic"
         "unsafe"
  )
  
@@ -58,10 +57,10 @@ var levelLogPages = [summaryLevels]uint{
  
  // scavengeIndexArray is the backing store for p.scav.index.chunks.
  // On 32-bit platforms, it's small enough to just be a global.
-var scavengeIndexArray [((1 << heapAddrBits) / pallocChunkBytes) / 8]atomic.Uint8
+var scavengeIndexArray [(1 << heapAddrBits) / pallocChunkBytes]atomicScavChunkData
  
  // See mpagealloc_64bit.go for details.
-func (p *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit(test bool) {
         // Calculate how much memory all our entries will take up.
         //
         // This should be around 12 KiB or less.
@@ -95,8 +94,17 @@ func (p *pageAlloc) sysInit() {
                 reservation = add(reservation, uintptr(entries)*pallocSumBytes)
         }
  
-       // Set up the scavenge index.
-       p.scav.index.chunks = scavengeIndexArray[:]
+       if test {
+               // Set up the scavenge index via sysAlloc so the test can free it later.
+               scavIndexSize := uintptr(len(scavengeIndexArray)) * unsafe.Sizeof(atomicScavChunkData{})
+               p.scav.index.chunks = ((*[(1 << heapAddrBits) / pallocChunkBytes]atomicScavChunkData)(sysAlloc(scavIndexSize, p.sysStat)))[:]
+               p.summaryMappedReady += scavIndexSize
+       } else {
+               // Set up the scavenge index.
+               p.scav.index.chunks = scavengeIndexArray[:]
+       }
+       p.scav.index.min.Store(1) // The 0th chunk is never going to be mapped for the heap.
+       p.scav.index.max.Store(uintptr(len(p.scav.index.chunks)))
  }
  
  // See mpagealloc_64bit.go for details.
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go

index 48859a7d018bb523f821f8977d6ff5025e1301cb..a6f1954679a6c7a56156fb2074e17a0793831dab 100644 (file)
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -7,7 +7,6 @@
  package runtime
  
  import (
-       "runtime/internal/atomic"
         "unsafe"
  )
  
@@ -69,7 +68,7 @@ var levelLogPages = [summaryLevels]uint{
  // sysInit performs architecture-dependent initialization of fields
  // in pageAlloc. pageAlloc should be uninitialized except for sysStat
  // if any runtime statistic should be updated.
-func (p *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit(test bool) {
         // Reserve memory for each level. This will get mapped in
         // as R/W by setArenas.
         for l, shift := range levelShift {
@@ -88,10 +87,7 @@ func (p *pageAlloc) sysInit() {
         }
  
         // Set up the scavenge index.
-       nbytes := uintptr(1<<heapAddrBits) / pallocChunkBytes / 8
-       r := sysReserve(nil, nbytes)
-       sl := notInHeapSlice{(*notInHeap)(r), int(nbytes), int(nbytes)}
-       p.scav.index.chunks = *(*[]atomic.Uint8)(unsafe.Pointer(&sl))
+       p.scav.index.sysInit()
  }
  
  // sysGrow performs architecture-dependent operations on heap
@@ -168,8 +164,9 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) {
  
                 // Prune need down to what needs to be newly mapped. Some parts of it may
                 // already be mapped by what inUse describes due to page alignment requirements
-               // for mapping. prune's invariants are guaranteed by the fact that this
-               // function will never be asked to remap the same memory twice.
+               // for mapping. Because this function will never be asked to remap the same
+               // memory twice, it should never be possible to prune in such a way that causes
+               // need to be split.
                 if inUseIndex > 0 {
                         need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1]))
                 }
@@ -188,17 +185,18 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) {
         }
  
         // Update the scavenge index.
-       p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat)
+       p.summaryMappedReady += p.scav.index.sysGrow(base, limit, p.sysStat)
  }
  
-// grow increases the index's backing store in response to a heap growth.
+// sysGrow increases the index's backing store in response to a heap growth.
  //
  // Returns the amount of memory added to sysStat.
-func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
+func (s *scavengeIndex) sysGrow(base, limit uintptr, sysStat *sysMemStat) uintptr {
         if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
                 print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
                 throw("sysGrow bounds not aligned to pallocChunkBytes")
         }
+       scSize := unsafe.Sizeof(atomicScavChunkData{})
         // Map and commit the pieces of chunks that we need.
         //
         // We always map the full range of the minimum heap address to the
@@ -212,24 +210,24 @@ func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
         // index.
         haveMin := s.min.Load()
         haveMax := s.max.Load()
-       needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize))
-       needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize))
+       needMin := alignDown(uintptr(chunkIndex(base)), physPageSize/scSize)
+       needMax := alignUp(uintptr(chunkIndex(limit)), physPageSize/scSize)
         // Extend the range down to what we have, if there's no overlap.
         if needMax < haveMin {
                 needMax = haveMin
         }
-       if needMin > haveMax {
+       if haveMax != 0 && needMin > haveMax {
                 needMin = haveMax
         }
         have := makeAddrRange(
                 // Avoid a panic from indexing one past the last element.
-               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin),
-               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax),
+               uintptr(unsafe.Pointer(&s.chunks[0]))+haveMin*scSize,
+               uintptr(unsafe.Pointer(&s.chunks[0]))+haveMax*scSize,
         )
         need := makeAddrRange(
                 // Avoid a panic from indexing one past the last element.
-               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin),
-               uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax),
+               uintptr(unsafe.Pointer(&s.chunks[0]))+needMin*scSize,
+               uintptr(unsafe.Pointer(&s.chunks[0]))+needMax*scSize,
         )
         // Subtract any overlap from rounding. We can't re-map memory because
         // it'll be zeroed.
@@ -247,11 +245,14 @@ func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
                         s.max.Store(needMax)
                 }
         }
-       // Update minHeapIdx. Note that even if there's no mapping work to do,
-       // we may still have a new, lower minimum heap address.
-       minHeapIdx := s.minHeapIdx.Load()
-       if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx {
-               s.minHeapIdx.Store(baseIdx)
-       }
         return need.size()
  }
+
+// sysInit initializes the scavengeIndex' chunks array.
+func (s *scavengeIndex) sysInit() {
+       n := uintptr(1<<heapAddrBits) / pallocChunkBytes
+       nbytes := n * unsafe.Sizeof(atomicScavChunkData{})
+       r := sysReserve(nil, nbytes)
+       sl := notInHeapSlice{(*notInHeap)(r), int(n), int(n)}
+       s.chunks = *(*[]atomicScavChunkData)(unsafe.Pointer(&sl))
+}
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go

index 5bc9c84408db525e107d274864362635f4e2f162..245b0cbfef2cf40165338cd71377c1dc24345b8d 100644 (file)
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -89,11 +89,15 @@ func (c *pageCache) flush(p *pageAlloc) {
         for i := uint(0); i < 64; i++ {
                 if c.cache&(1<<i) != 0 {
                         p.chunkOf(ci).free1(pi + i)
+
+                       // Update density statistics.
+                       p.scav.index.free(ci, pi+i, 1)
                 }
                 if c.scav&(1<<i) != 0 {
                         p.chunkOf(ci).scavenged.setRange(pi+i, 1)
                 }
         }
+
         // Since this is a lot like a free, we need to make sure
         // we update the searchAddr just like free does.
         if b := (offAddr{c.base}); b.lessThan(p.searchAddr) {
@@ -145,7 +149,7 @@ func (p *pageAlloc) allocToCache() pageCache {
                         p.searchAddr = maxSearchAddr()
                         return pageCache{}
                 }
-               ci := chunkIndex(addr)
+               ci = chunkIndex(addr)
                 chunk = p.chunkOf(ci)
                 c = pageCache{
                         base:  alignDown(addr, 64*pageSize),
@@ -163,6 +167,9 @@ func (p *pageAlloc) allocToCache() pageCache {
         // Update as an allocation, but note that it's not contiguous.
         p.update(c.base, pageCachePages, false, true)
  
+       // Update density statistics.
+       p.scav.index.alloc(ci, uint(sys.OnesCount64(c.cache)))
+
         // Set the search address to the last page represented by the cache.
         // Since all of the pages in this block are going to the cache, and we
         // searched for the first free page, we can confidently start at the
author	Michael Anthony Knyszek <mknyszek@google.com>
	Fri, 23 Sep 2022 16:32:34 +0000 (16:32 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Wed, 19 Apr 2023 14:30:00 +0000 (14:30 +0000)
src/runtime/debug/garbage_test.go		patch \| blob \| history
src/runtime/export_test.go		patch \| blob \| history
src/runtime/mem_linux.go		patch \| blob \| history
src/runtime/mgc.go		patch \| blob \| history
src/runtime/mgcscavenge.go		patch \| blob \| history
src/runtime/mgcscavenge_test.go		patch \| blob \| history
src/runtime/mgcsweep.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/mpagealloc.go		patch \| blob \| history
src/runtime/mpagealloc_32bit.go		patch \| blob \| history
src/runtime/mpagealloc_64bit.go		patch \| blob \| history
src/runtime/mpagecache.go		patch \| blob \| history