src/runtime/mheap.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Page heap.
   6 //
   7 // See malloc.go for overview.
   8
   9 package runtime
  10
  11 import (
  12         "internal/cpu"
  13         "internal/goarch"
  14         "internal/goexperiment"
  15         "runtime/internal/atomic"
  16         "runtime/internal/sys"
  17         "unsafe"
  18 )
  19
  20 const (
  21         // minPhysPageSize is a lower-bound on the physical page size. The
  22         // true physical page size may be larger than this. In contrast,
  23         // sys.PhysPageSize is an upper-bound on the physical page size.
  24         minPhysPageSize = 4096
  25
  26         // maxPhysPageSize is the maximum page size the runtime supports.
  27         maxPhysPageSize = 512 << 10
  28
  29         // maxPhysHugePageSize sets an upper-bound on the maximum huge page size
  30         // that the runtime supports.
  31         maxPhysHugePageSize = pallocChunkBytes
  32
  33         // pagesPerReclaimerChunk indicates how many pages to scan from the
  34         // pageInUse bitmap at a time. Used by the page reclaimer.
  35         //
  36         // Higher values reduce contention on scanning indexes (such as
  37         // h.reclaimIndex), but increase the minimum latency of the
  38         // operation.
  39         //
  40         // The time required to scan this many pages can vary a lot depending
  41         // on how many spans are actually freed. Experimentally, it can
  42         // scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
  43         // free spans at ~32 MB/ms. Using 512 pages bounds this at
  44         // roughly 100µs.
  45         //
  46         // Must be a multiple of the pageInUse bitmap element size and
  47         // must also evenly divide pagesPerArena.
  48         pagesPerReclaimerChunk = 512
  49
  50         // physPageAlignedStacks indicates whether stack allocations must be
  51         // physical page aligned. This is a requirement for MAP_STACK on
  52         // OpenBSD.
  53         physPageAlignedStacks = GOOS == "openbsd"
  54 )
  55
  56 // Main malloc heap.
  57 // The heap itself is the "free" and "scav" treaps,
  58 // but all the other global data is here too.
  59 //
  60 // mheap must not be heap-allocated because it contains mSpanLists,
  61 // which must not be heap-allocated.
  62 type mheap struct {
  63         _ sys.NotInHeap
  64
  65         // lock must only be acquired on the system stack, otherwise a g
  66         // could self-deadlock if its stack grows with the lock held.
  67         lock mutex
  68
  69         pages pageAlloc // page allocation data structure
  70
  71         sweepgen uint32 // sweep generation, see comment in mspan; written during STW
  72
  73         // allspans is a slice of all mspans ever created. Each mspan
  74         // appears exactly once.
  75         //
  76         // The memory for allspans is manually managed and can be
  77         // reallocated and move as the heap grows.
  78         //
  79         // In general, allspans is protected by mheap_.lock, which
  80         // prevents concurrent access as well as freeing the backing
  81         // store. Accesses during STW might not hold the lock, but
  82         // must ensure that allocation cannot happen around the
  83         // access (since that may free the backing store).
  84         allspans []*mspan // all spans out there
  85
  86         // Proportional sweep
  87         //
  88         // These parameters represent a linear function from gcController.heapLive
  89         // to page sweep count. The proportional sweep system works to
  90         // stay in the black by keeping the current page sweep count
  91         // above this line at the current gcController.heapLive.
  92         //
  93         // The line has slope sweepPagesPerByte and passes through a
  94         // basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
  95         // any given time, the system is at (gcController.heapLive,
  96         // pagesSwept) in this space.
  97         //
  98         // It is important that the line pass through a point we
  99         // control rather than simply starting at a 0,0 origin
 100         // because that lets us adjust sweep pacing at any time while
 101         // accounting for current progress. If we could only adjust
 102         // the slope, it would create a discontinuity in debt if any
 103         // progress has already been made.
 104         pagesInUse         atomic.Uintptr // pages of spans in stats mSpanInUse
 105         pagesSwept         atomic.Uint64  // pages swept this cycle
 106         pagesSweptBasis    atomic.Uint64  // pagesSwept to use as the origin of the sweep ratio
 107         sweepHeapLiveBasis uint64         // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
 108         sweepPagesPerByte  float64        // proportional sweep ratio; written with lock, read without
 109
 110         // Page reclaimer state
 111
 112         // reclaimIndex is the page index in allArenas of next page to
 113         // reclaim. Specifically, it refers to page (i %
 114         // pagesPerArena) of arena allArenas[i / pagesPerArena].
 115         //
 116         // If this is >= 1<<63, the page reclaimer is done scanning
 117         // the page marks.
 118         reclaimIndex atomic.Uint64
 119
 120         // reclaimCredit is spare credit for extra pages swept. Since
 121         // the page reclaimer works in large chunks, it may reclaim
 122         // more than requested. Any spare pages released go to this
 123         // credit pool.
 124         reclaimCredit atomic.Uintptr
 125
 126         _ cpu.CacheLinePad // prevents false-sharing between arenas and preceding variables
 127
 128         // arenas is the heap arena map. It points to the metadata for
 129         // the heap for every arena frame of the entire usable virtual
 130         // address space.
 131         //
 132         // Use arenaIndex to compute indexes into this array.
 133         //
 134         // For regions of the address space that are not backed by the
 135         // Go heap, the arena map contains nil.
 136         //
 137         // Modifications are protected by mheap_.lock. Reads can be
 138         // performed without locking; however, a given entry can
 139         // transition from nil to non-nil at any time when the lock
 140         // isn't held. (Entries never transitions back to nil.)
 141         //
 142         // In general, this is a two-level mapping consisting of an L1
 143         // map and possibly many L2 maps. This saves space when there
 144         // are a huge number of arena frames. However, on many
 145         // platforms (even 64-bit), arenaL1Bits is 0, making this
 146         // effectively a single-level map. In this case, arenas[0]
 147         // will never be nil.
 148         arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
 149
 150         // arenasHugePages indicates whether arenas' L2 entries are eligible
 151         // to be backed by huge pages.
 152         arenasHugePages bool
 153
 154         // heapArenaAlloc is pre-reserved space for allocating heapArena
 155         // objects. This is only used on 32-bit, where we pre-reserve
 156         // this space to avoid interleaving it with the heap itself.
 157         heapArenaAlloc linearAlloc
 158
 159         // arenaHints is a list of addresses at which to attempt to
 160         // add more heap arenas. This is initially populated with a
 161         // set of general hint addresses, and grown with the bounds of
 162         // actual heap arena ranges.
 163         arenaHints *arenaHint
 164
 165         // arena is a pre-reserved space for allocating heap arenas
 166         // (the actual arenas). This is only used on 32-bit.
 167         arena linearAlloc
 168
 169         // allArenas is the arenaIndex of every mapped arena. This can
 170         // be used to iterate through the address space.
 171         //
 172         // Access is protected by mheap_.lock. However, since this is
 173         // append-only and old backing arrays are never freed, it is
 174         // safe to acquire mheap_.lock, copy the slice header, and
 175         // then release mheap_.lock.
 176         allArenas []arenaIdx
 177
 178         // sweepArenas is a snapshot of allArenas taken at the
 179         // beginning of the sweep cycle. This can be read safely by
 180         // simply blocking GC (by disabling preemption).
 181         sweepArenas []arenaIdx
 182
 183         // markArenas is a snapshot of allArenas taken at the beginning
 184         // of the mark cycle. Because allArenas is append-only, neither
 185         // this slice nor its contents will change during the mark, so
 186         // it can be read safely.
 187         markArenas []arenaIdx
 188
 189         // curArena is the arena that the heap is currently growing
 190         // into. This should always be physPageSize-aligned.
 191         curArena struct {
 192                 base, end uintptr
 193         }
 194
 195         // central free lists for small size classes.
 196         // the padding makes sure that the mcentrals are
 197         // spaced CacheLinePadSize bytes apart, so that each mcentral.lock
 198         // gets its own cache line.
 199         // central is indexed by spanClass.
 200         central [numSpanClasses]struct {
 201                 mcentral mcentral
 202                 pad      [(cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte
 203         }
 204
 205         spanalloc              fixalloc // allocator for span*
 206         cachealloc             fixalloc // allocator for mcache*
 207         specialfinalizeralloc  fixalloc // allocator for specialfinalizer*
 208         specialprofilealloc    fixalloc // allocator for specialprofile*
 209         specialReachableAlloc  fixalloc // allocator for specialReachable
 210         specialPinCounterAlloc fixalloc // allocator for specialPinCounter
 211         speciallock            mutex    // lock for special record allocators.
 212         arenaHintAlloc         fixalloc // allocator for arenaHints
 213
 214         // User arena state.
 215         //
 216         // Protected by mheap_.lock.
 217         userArena struct {
 218                 // arenaHints is a list of addresses at which to attempt to
 219                 // add more heap arenas for user arena chunks. This is initially
 220                 // populated with a set of general hint addresses, and grown with
 221                 // the bounds of actual heap arena ranges.
 222                 arenaHints *arenaHint
 223
 224                 // quarantineList is a list of user arena spans that have been set to fault, but
 225                 // are waiting for all pointers into them to go away. Sweeping handles
 226                 // identifying when this is true, and moves the span to the ready list.
 227                 quarantineList mSpanList
 228
 229                 // readyList is a list of empty user arena spans that are ready for reuse.
 230                 readyList mSpanList
 231         }
 232
 233         unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
 234 }
 235
 236 var mheap_ mheap
 237
 238 // A heapArena stores metadata for a heap arena. heapArenas are stored
 239 // outside of the Go heap and accessed via the mheap_.arenas index.
 240 type heapArena struct {
 241         _ sys.NotInHeap
 242
 243         // heapArenaPtrScalar contains pointer/scalar data about the heap for this heap arena.
 244         heapArenaPtrScalar
 245
 246         // spans maps from virtual address page ID within this arena to *mspan.
 247         // For allocated spans, their pages map to the span itself.
 248         // For free spans, only the lowest and highest pages map to the span itself.
 249         // Internal pages map to an arbitrary span.
 250         // For pages that have never been allocated, spans entries are nil.
 251         //
 252         // Modifications are protected by mheap.lock. Reads can be
 253         // performed without locking, but ONLY from indexes that are
 254         // known to contain in-use or stack spans. This means there
 255         // must not be a safe-point between establishing that an
 256         // address is live and looking it up in the spans array.
 257         spans [pagesPerArena]*mspan
 258
 259         // pageInUse is a bitmap that indicates which spans are in
 260         // state mSpanInUse. This bitmap is indexed by page number,
 261         // but only the bit corresponding to the first page in each
 262         // span is used.
 263         //
 264         // Reads and writes are atomic.
 265         pageInUse [pagesPerArena / 8]uint8
 266
 267         // pageMarks is a bitmap that indicates which spans have any
 268         // marked objects on them. Like pageInUse, only the bit
 269         // corresponding to the first page in each span is used.
 270         //
 271         // Writes are done atomically during marking. Reads are
 272         // non-atomic and lock-free since they only occur during
 273         // sweeping (and hence never race with writes).
 274         //
 275         // This is used to quickly find whole spans that can be freed.
 276         //
 277         // TODO(austin): It would be nice if this was uint64 for
 278         // faster scanning, but we don't have 64-bit atomic bit
 279         // operations.
 280         pageMarks [pagesPerArena / 8]uint8
 281
 282         // pageSpecials is a bitmap that indicates which spans have
 283         // specials (finalizers or other). Like pageInUse, only the bit
 284         // corresponding to the first page in each span is used.
 285         //
 286         // Writes are done atomically whenever a special is added to
 287         // a span and whenever the last special is removed from a span.
 288         // Reads are done atomically to find spans containing specials
 289         // during marking.
 290         pageSpecials [pagesPerArena / 8]uint8
 291
 292         // checkmarks stores the debug.gccheckmark state. It is only
 293         // used if debug.gccheckmark > 0.
 294         checkmarks *checkmarksMap
 295
 296         // zeroedBase marks the first byte of the first page in this
 297         // arena which hasn't been used yet and is therefore already
 298         // zero. zeroedBase is relative to the arena base.
 299         // Increases monotonically until it hits heapArenaBytes.
 300         //
 301         // This field is sufficient to determine if an allocation
 302         // needs to be zeroed because the page allocator follows an
 303         // address-ordered first-fit policy.
 304         //
 305         // Read atomically and written with an atomic CAS.
 306         zeroedBase uintptr
 307 }
 308
 309 // arenaHint is a hint for where to grow the heap arenas. See
 310 // mheap_.arenaHints.
 311 type arenaHint struct {
 312         _    sys.NotInHeap
 313         addr uintptr
 314         down bool
 315         next *arenaHint
 316 }
 317
 318 // An mspan is a run of pages.
 319 //
 320 // When a mspan is in the heap free treap, state == mSpanFree
 321 // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
 322 // If the mspan is in the heap scav treap, then in addition to the
 323 // above scavenged == true. scavenged == false in all other cases.
 324 //
 325 // When a mspan is allocated, state == mSpanInUse or mSpanManual
 326 // and heapmap(i) == span for all s->start <= i < s->start+s->npages.
 327
 328 // Every mspan is in one doubly-linked list, either in the mheap's
 329 // busy list or one of the mcentral's span lists.
 330
 331 // An mspan representing actual memory has state mSpanInUse,
 332 // mSpanManual, or mSpanFree. Transitions between these states are
 333 // constrained as follows:
 334 //
 335 //   - A span may transition from free to in-use or manual during any GC
 336 //     phase.
 337 //
 338 //   - During sweeping (gcphase == _GCoff), a span may transition from
 339 //     in-use to free (as a result of sweeping) or manual to free (as a
 340 //     result of stacks being freed).
 341 //
 342 //   - During GC (gcphase != _GCoff), a span *must not* transition from
 343 //     manual or in-use to free. Because concurrent GC may read a pointer
 344 //     and then look up its span, the span state must be monotonic.
 345 //
 346 // Setting mspan.state to mSpanInUse or mSpanManual must be done
 347 // atomically and only after all other span fields are valid.
 348 // Likewise, if inspecting a span is contingent on it being
 349 // mSpanInUse, the state should be loaded atomically and checked
 350 // before depending on other fields. This allows the garbage collector
 351 // to safely deal with potentially invalid pointers, since resolving
 352 // such pointers may race with a span being allocated.
 353 type mSpanState uint8
 354
 355 const (
 356         mSpanDead   mSpanState = iota
 357         mSpanInUse             // allocated for garbage collected heap
 358         mSpanManual            // allocated for manual management (e.g., stack allocator)
 359 )
 360
 361 // mSpanStateNames are the names of the span states, indexed by
 362 // mSpanState.
 363 var mSpanStateNames = []string{
 364         "mSpanDead",
 365         "mSpanInUse",
 366         "mSpanManual",
 367 }
 368
 369 // mSpanStateBox holds an atomic.Uint8 to provide atomic operations on
 370 // an mSpanState. This is a separate type to disallow accidental comparison
 371 // or assignment with mSpanState.
 372 type mSpanStateBox struct {
 373         s atomic.Uint8
 374 }
 375
 376 // It is nosplit to match get, below.
 377
 378 //go:nosplit
 379 func (b *mSpanStateBox) set(s mSpanState) {
 380         b.s.Store(uint8(s))
 381 }
 382
 383 // It is nosplit because it's called indirectly by typedmemclr,
 384 // which must not be preempted.
 385
 386 //go:nosplit
 387 func (b *mSpanStateBox) get() mSpanState {
 388         return mSpanState(b.s.Load())
 389 }
 390
 391 // mSpanList heads a linked list of spans.
 392 type mSpanList struct {
 393         _     sys.NotInHeap
 394         first *mspan // first span in list, or nil if none
 395         last  *mspan // last span in list, or nil if none
 396 }
 397
 398 type mspan struct {
 399         _    sys.NotInHeap
 400         next *mspan     // next span in list, or nil if none
 401         prev *mspan     // previous span in list, or nil if none
 402         list *mSpanList // For debugging. TODO: Remove.
 403
 404         startAddr uintptr // address of first byte of span aka s.base()
 405         npages    uintptr // number of pages in span
 406
 407         manualFreeList gclinkptr // list of free objects in mSpanManual spans
 408
 409         // freeindex is the slot index between 0 and nelems at which to begin scanning
 410         // for the next free object in this span.
 411         // Each allocation scans allocBits starting at freeindex until it encounters a 0
 412         // indicating a free object. freeindex is then adjusted so that subsequent scans begin
 413         // just past the newly discovered free object.
 414         //
 415         // If freeindex == nelem, this span has no free objects.
 416         //
 417         // allocBits is a bitmap of objects in this span.
 418         // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
 419         // then object n is free;
 420         // otherwise, object n is allocated. Bits starting at nelem are
 421         // undefined and should never be referenced.
 422         //
 423         // Object n starts at address n*elemsize + (start << pageShift).
 424         freeindex uint16
 425         // TODO: Look up nelems from sizeclass and remove this field if it
 426         // helps performance.
 427         nelems uint16 // number of object in the span.
 428         // freeIndexForScan is like freeindex, except that freeindex is
 429         // used by the allocator whereas freeIndexForScan is used by the
 430         // GC scanner. They are two fields so that the GC sees the object
 431         // is allocated only when the object and the heap bits are
 432         // initialized (see also the assignment of freeIndexForScan in
 433         // mallocgc, and issue 54596).
 434         freeIndexForScan uint16
 435
 436         // Cache of the allocBits at freeindex. allocCache is shifted
 437         // such that the lowest bit corresponds to the bit freeindex.
 438         // allocCache holds the complement of allocBits, thus allowing
 439         // ctz (count trailing zero) to use it directly.
 440         // allocCache may contain bits beyond s.nelems; the caller must ignore
 441         // these.
 442         allocCache uint64
 443
 444         // allocBits and gcmarkBits hold pointers to a span's mark and
 445         // allocation bits. The pointers are 8 byte aligned.
 446         // There are three arenas where this data is held.
 447         // free: Dirty arenas that are no longer accessed
 448         //       and can be reused.
 449         // next: Holds information to be used in the next GC cycle.
 450         // current: Information being used during this GC cycle.
 451         // previous: Information being used during the last GC cycle.
 452         // A new GC cycle starts with the call to finishsweep_m.
 453         // finishsweep_m moves the previous arena to the free arena,
 454         // the current arena to the previous arena, and
 455         // the next arena to the current arena.
 456         // The next arena is populated as the spans request
 457         // memory to hold gcmarkBits for the next GC cycle as well
 458         // as allocBits for newly allocated spans.
 459         //
 460         // The pointer arithmetic is done "by hand" instead of using
 461         // arrays to avoid bounds checks along critical performance
 462         // paths.
 463         // The sweep will free the old allocBits and set allocBits to the
 464         // gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
 465         // out memory.
 466         allocBits  *gcBits
 467         gcmarkBits *gcBits
 468         pinnerBits *gcBits // bitmap for pinned objects; accessed atomically
 469
 470         // sweep generation:
 471         // if sweepgen == h->sweepgen - 2, the span needs sweeping
 472         // if sweepgen == h->sweepgen - 1, the span is currently being swept
 473         // if sweepgen == h->sweepgen, the span is swept and ready to use
 474         // if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
 475         // if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
 476         // h->sweepgen is incremented by 2 after every GC
 477
 478         sweepgen              uint32
 479         divMul                uint32        // for divide by elemsize
 480         allocCount            uint16        // number of allocated objects
 481         spanclass             spanClass     // size class and noscan (uint8)
 482         state                 mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
 483         needzero              uint8         // needs to be zeroed before allocation
 484         isUserArenaChunk      bool          // whether or not this span represents a user arena
 485         allocCountBeforeCache uint16        // a copy of allocCount that is stored just before this span is cached
 486         elemsize              uintptr       // computed from sizeclass or from npages
 487         limit                 uintptr       // end of data in span
 488         speciallock           mutex         // guards specials list and changes to pinnerBits
 489         specials              *special      // linked list of special records sorted by offset.
 490         userArenaChunkFree    addrRange     // interval for managing chunk allocation
 491         largeType             *_type        // malloc header for large objects.
 492 }
 493
 494 func (s *mspan) base() uintptr {
 495         return s.startAddr
 496 }
 497
 498 func (s *mspan) layout() (size, n, total uintptr) {
 499         total = s.npages << _PageShift
 500         size = s.elemsize
 501         if size > 0 {
 502                 n = total / size
 503         }
 504         return
 505 }
 506
 507 // recordspan adds a newly allocated span to h.allspans.
 508 //
 509 // This only happens the first time a span is allocated from
 510 // mheap.spanalloc (it is not called when a span is reused).
 511 //
 512 // Write barriers are disallowed here because it can be called from
 513 // gcWork when allocating new workbufs. However, because it's an
 514 // indirect call from the fixalloc initializer, the compiler can't see
 515 // this.
 516 //
 517 // The heap lock must be held.
 518 //
 519 //go:nowritebarrierrec
 520 func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 521         h := (*mheap)(vh)
 522         s := (*mspan)(p)
 523
 524         assertLockHeld(&h.lock)
 525
 526         if len(h.allspans) >= cap(h.allspans) {
 527                 n := 64 * 1024 / goarch.PtrSize
 528                 if n < cap(h.allspans)*3/2 {
 529                         n = cap(h.allspans) * 3 / 2
 530                 }
 531                 var new []*mspan
 532                 sp := (*slice)(unsafe.Pointer(&new))
 533                 sp.array = sysAlloc(uintptr(n)*goarch.PtrSize, &memstats.other_sys)
 534                 if sp.array == nil {
 535                         throw("runtime: cannot allocate memory")
 536                 }
 537                 sp.len = len(h.allspans)
 538                 sp.cap = n
 539                 if len(h.allspans) > 0 {
 540                         copy(new, h.allspans)
 541                 }
 542                 oldAllspans := h.allspans
 543                 *(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
 544                 if len(oldAllspans) != 0 {
 545                         sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
 546                 }
 547         }
 548         h.allspans = h.allspans[:len(h.allspans)+1]
 549         h.allspans[len(h.allspans)-1] = s
 550 }
 551
 552 // A spanClass represents the size class and noscan-ness of a span.
 553 //
 554 // Each size class has a noscan spanClass and a scan spanClass. The
 555 // noscan spanClass contains only noscan objects, which do not contain
 556 // pointers and thus do not need to be scanned by the garbage
 557 // collector.
 558 type spanClass uint8
 559
 560 const (
 561         numSpanClasses = _NumSizeClasses << 1
 562         tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
 563 )
 564
 565 func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
 566         return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
 567 }
 568
 569 //go:nosplit
 570 func (sc spanClass) sizeclass() int8 {
 571         return int8(sc >> 1)
 572 }
 573
 574 //go:nosplit
 575 func (sc spanClass) noscan() bool {
 576         return sc&1 != 0
 577 }
 578
 579 // arenaIndex returns the index into mheap_.arenas of the arena
 580 // containing metadata for p. This index combines of an index into the
 581 // L1 map and an index into the L2 map and should be used as
 582 // mheap_.arenas[ai.l1()][ai.l2()].
 583 //
 584 // If p is outside the range of valid heap addresses, either l1() or
 585 // l2() will be out of bounds.
 586 //
 587 // It is nosplit because it's called by spanOf and several other
 588 // nosplit functions.
 589 //
 590 //go:nosplit
 591 func arenaIndex(p uintptr) arenaIdx {
 592         return arenaIdx((p - arenaBaseOffset) / heapArenaBytes)
 593 }
 594
 595 // arenaBase returns the low address of the region covered by heap
 596 // arena i.
 597 func arenaBase(i arenaIdx) uintptr {
 598         return uintptr(i)*heapArenaBytes + arenaBaseOffset
 599 }
 600
 601 type arenaIdx uint
 602
 603 // l1 returns the "l1" portion of an arenaIdx.
 604 //
 605 // Marked nosplit because it's called by spanOf and other nosplit
 606 // functions.
 607 //
 608 //go:nosplit
 609 func (i arenaIdx) l1() uint {
 610         if arenaL1Bits == 0 {
 611                 // Let the compiler optimize this away if there's no
 612                 // L1 map.
 613                 return 0
 614         } else {
 615                 return uint(i) >> arenaL1Shift
 616         }
 617 }
 618
 619 // l2 returns the "l2" portion of an arenaIdx.
 620 //
 621 // Marked nosplit because it's called by spanOf and other nosplit funcs.
 622 // functions.
 623 //
 624 //go:nosplit
 625 func (i arenaIdx) l2() uint {
 626         if arenaL1Bits == 0 {
 627                 return uint(i)
 628         } else {
 629                 return uint(i) & (1<<arenaL2Bits - 1)
 630         }
 631 }
 632
 633 // inheap reports whether b is a pointer into a (potentially dead) heap object.
 634 // It returns false for pointers into mSpanManual spans.
 635 // Non-preemptible because it is used by write barriers.
 636 //
 637 //go:nowritebarrier
 638 //go:nosplit
 639 func inheap(b uintptr) bool {
 640         return spanOfHeap(b) != nil
 641 }
 642
 643 // inHeapOrStack is a variant of inheap that returns true for pointers
 644 // into any allocated heap span.
 645 //
 646 //go:nowritebarrier
 647 //go:nosplit
 648 func inHeapOrStack(b uintptr) bool {
 649         s := spanOf(b)
 650         if s == nil || b < s.base() {
 651                 return false
 652         }
 653         switch s.state.get() {
 654         case mSpanInUse, mSpanManual:
 655                 return b < s.limit
 656         default:
 657                 return false
 658         }
 659 }
 660
 661 // spanOf returns the span of p. If p does not point into the heap
 662 // arena or no span has ever contained p, spanOf returns nil.
 663 //
 664 // If p does not point to allocated memory, this may return a non-nil
 665 // span that does *not* contain p. If this is a possibility, the
 666 // caller should either call spanOfHeap or check the span bounds
 667 // explicitly.
 668 //
 669 // Must be nosplit because it has callers that are nosplit.
 670 //
 671 //go:nosplit
 672 func spanOf(p uintptr) *mspan {
 673         // This function looks big, but we use a lot of constant
 674         // folding around arenaL1Bits to get it under the inlining
 675         // budget. Also, many of the checks here are safety checks
 676         // that Go needs to do anyway, so the generated code is quite
 677         // short.
 678         ri := arenaIndex(p)
 679         if arenaL1Bits == 0 {
 680                 // If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
 681                 if ri.l2() >= uint(len(mheap_.arenas[0])) {
 682                         return nil
 683                 }
 684         } else {
 685                 // If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
 686                 if ri.l1() >= uint(len(mheap_.arenas)) {
 687                         return nil
 688                 }
 689         }
 690         l2 := mheap_.arenas[ri.l1()]
 691         if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
 692                 return nil
 693         }
 694         ha := l2[ri.l2()]
 695         if ha == nil {
 696                 return nil
 697         }
 698         return ha.spans[(p/pageSize)%pagesPerArena]
 699 }
 700
 701 // spanOfUnchecked is equivalent to spanOf, but the caller must ensure
 702 // that p points into an allocated heap arena.
 703 //
 704 // Must be nosplit because it has callers that are nosplit.
 705 //
 706 //go:nosplit
 707 func spanOfUnchecked(p uintptr) *mspan {
 708         ai := arenaIndex(p)
 709         return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
 710 }
 711
 712 // spanOfHeap is like spanOf, but returns nil if p does not point to a
 713 // heap object.
 714 //
 715 // Must be nosplit because it has callers that are nosplit.
 716 //
 717 //go:nosplit
 718 func spanOfHeap(p uintptr) *mspan {
 719         s := spanOf(p)
 720         // s is nil if it's never been allocated. Otherwise, we check
 721         // its state first because we don't trust this pointer, so we
 722         // have to synchronize with span initialization. Then, it's
 723         // still possible we picked up a stale span pointer, so we
 724         // have to check the span's bounds.
 725         if s == nil || s.state.get() != mSpanInUse || p < s.base() || p >= s.limit {
 726                 return nil
 727         }
 728         return s
 729 }
 730
 731 // pageIndexOf returns the arena, page index, and page mask for pointer p.
 732 // The caller must ensure p is in the heap.
 733 func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) {
 734         ai := arenaIndex(p)
 735         arena = mheap_.arenas[ai.l1()][ai.l2()]
 736         pageIdx = ((p / pageSize) / 8) % uintptr(len(arena.pageInUse))
 737         pageMask = byte(1 << ((p / pageSize) % 8))
 738         return
 739 }
 740
 741 // Initialize the heap.
 742 func (h *mheap) init() {
 743         lockInit(&h.lock, lockRankMheap)
 744         lockInit(&h.speciallock, lockRankMheapSpecial)
 745
 746         h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
 747         h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 748         h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
 749         h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
 750         h.specialReachableAlloc.init(unsafe.Sizeof(specialReachable{}), nil, nil, &memstats.other_sys)
 751         h.specialPinCounterAlloc.init(unsafe.Sizeof(specialPinCounter{}), nil, nil, &memstats.other_sys)
 752         h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
 753
 754         // Don't zero mspan allocations. Background sweeping can
 755         // inspect a span concurrently with allocating it, so it's
 756         // important that the span's sweepgen survive across freeing
 757         // and re-allocating a span to prevent background sweeping
 758         // from improperly cas'ing it from 0.
 759         //
 760         // This is safe because mspan contains no heap pointers.
 761         h.spanalloc.zero = false
 762
 763         // h->mapcache needs no init
 764
 765         for i := range h.central {
 766                 h.central[i].mcentral.init(spanClass(i))
 767         }
 768
 769         h.pages.init(&h.lock, &memstats.gcMiscSys, false)
 770 }
 771
 772 // reclaim sweeps and reclaims at least npage pages into the heap.
 773 // It is called before allocating npage pages to keep growth in check.
 774 //
 775 // reclaim implements the page-reclaimer half of the sweeper.
 776 //
 777 // h.lock must NOT be held.
 778 func (h *mheap) reclaim(npage uintptr) {
 779         // TODO(austin): Half of the time spent freeing spans is in
 780         // locking/unlocking the heap (even with low contention). We
 781         // could make the slow path here several times faster by
 782         // batching heap frees.
 783
 784         // Bail early if there's no more reclaim work.
 785         if h.reclaimIndex.Load() >= 1<<63 {
 786                 return
 787         }
 788
 789         // Disable preemption so the GC can't start while we're
 790         // sweeping, so we can read h.sweepArenas, and so
 791         // traceGCSweepStart/Done pair on the P.
 792         mp := acquirem()
 793
 794         if traceEnabled() {
 795                 traceGCSweepStart()
 796         }
 797
 798         arenas := h.sweepArenas
 799         locked := false
 800         for npage > 0 {
 801                 // Pull from accumulated credit first.
 802                 if credit := h.reclaimCredit.Load(); credit > 0 {
 803                         take := credit
 804                         if take > npage {
 805                                 // Take only what we need.
 806                                 take = npage
 807                         }
 808                         if h.reclaimCredit.CompareAndSwap(credit, credit-take) {
 809                                 npage -= take
 810                         }
 811                         continue
 812                 }
 813
 814                 // Claim a chunk of work.
 815                 idx := uintptr(h.reclaimIndex.Add(pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
 816                 if idx/pagesPerArena >= uintptr(len(arenas)) {
 817                         // Page reclaiming is done.
 818                         h.reclaimIndex.Store(1 << 63)
 819                         break
 820                 }
 821
 822                 if !locked {
 823                         // Lock the heap for reclaimChunk.
 824                         lock(&h.lock)
 825                         locked = true
 826                 }
 827
 828                 // Scan this chunk.
 829                 nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk)
 830                 if nfound <= npage {
 831                         npage -= nfound
 832                 } else {
 833                         // Put spare pages toward global credit.
 834                         h.reclaimCredit.Add(nfound - npage)
 835                         npage = 0
 836                 }
 837         }
 838         if locked {
 839                 unlock(&h.lock)
 840         }
 841
 842         if traceEnabled() {
 843                 traceGCSweepDone()
 844         }
 845         releasem(mp)
 846 }
 847
 848 // reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
 849 // It returns the number of pages returned to the heap.
 850 //
 851 // h.lock must be held and the caller must be non-preemptible. Note: h.lock may be
 852 // temporarily unlocked and re-locked in order to do sweeping or if tracing is
 853 // enabled.
 854 func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 855         // The heap lock must be held because this accesses the
 856         // heapArena.spans arrays using potentially non-live pointers.
 857         // In particular, if a span were freed and merged concurrently
 858         // with this probing heapArena.spans, it would be possible to
 859         // observe arbitrary, stale span pointers.
 860         assertLockHeld(&h.lock)
 861
 862         n0 := n
 863         var nFreed uintptr
 864         sl := sweep.active.begin()
 865         if !sl.valid {
 866                 return 0
 867         }
 868         for n > 0 {
 869                 ai := arenas[pageIdx/pagesPerArena]
 870                 ha := h.arenas[ai.l1()][ai.l2()]
 871
 872                 // Get a chunk of the bitmap to work on.
 873                 arenaPage := uint(pageIdx % pagesPerArena)
 874                 inUse := ha.pageInUse[arenaPage/8:]
 875                 marked := ha.pageMarks[arenaPage/8:]
 876                 if uintptr(len(inUse)) > n/8 {
 877                         inUse = inUse[:n/8]
 878                         marked = marked[:n/8]
 879                 }
 880
 881                 // Scan this bitmap chunk for spans that are in-use
 882                 // but have no marked objects on them.
 883                 for i := range inUse {
 884                         inUseUnmarked := atomic.Load8(&inUse[i]) &^ marked[i]
 885                         if inUseUnmarked == 0 {
 886                                 continue
 887                         }
 888
 889                         for j := uint(0); j < 8; j++ {
 890                                 if inUseUnmarked&(1<<j) != 0 {
 891                                         s := ha.spans[arenaPage+uint(i)*8+j]
 892                                         if s, ok := sl.tryAcquire(s); ok {
 893                                                 npages := s.npages
 894                                                 unlock(&h.lock)
 895                                                 if s.sweep(false) {
 896                                                         nFreed += npages
 897                                                 }
 898                                                 lock(&h.lock)
 899                                                 // Reload inUse. It's possible nearby
 900                                                 // spans were freed when we dropped the
 901                                                 // lock and we don't want to get stale
 902                                                 // pointers from the spans array.
 903                                                 inUseUnmarked = atomic.Load8(&inUse[i]) &^ marked[i]
 904                                         }
 905                                 }
 906                         }
 907                 }
 908
 909                 // Advance.
 910                 pageIdx += uintptr(len(inUse) * 8)
 911                 n -= uintptr(len(inUse) * 8)
 912         }
 913         sweep.active.end(sl)
 914         if traceEnabled() {
 915                 unlock(&h.lock)
 916                 // Account for pages scanned but not reclaimed.
 917                 traceGCSweepSpan((n0 - nFreed) * pageSize)
 918                 lock(&h.lock)
 919         }
 920
 921         assertLockHeld(&h.lock) // Must be locked on return.
 922         return nFreed
 923 }
 924
 925 // spanAllocType represents the type of allocation to make, or
 926 // the type of allocation to be freed.
 927 type spanAllocType uint8
 928
 929 const (
 930         spanAllocHeap          spanAllocType = iota // heap span
 931         spanAllocStack                              // stack span
 932         spanAllocPtrScalarBits                      // unrolled GC prog bitmap span
 933         spanAllocWorkBuf                            // work buf span
 934 )
 935
 936 // manual returns true if the span allocation is manually managed.
 937 func (s spanAllocType) manual() bool {
 938         return s != spanAllocHeap
 939 }
 940
 941 // alloc allocates a new span of npage pages from the GC'd heap.
 942 //
 943 // spanclass indicates the span's size class and scannability.
 944 //
 945 // Returns a span that has been fully initialized. span.needzero indicates
 946 // whether the span has been zeroed. Note that it may not be.
 947 func (h *mheap) alloc(npages uintptr, spanclass spanClass) *mspan {
 948         // Don't do any operations that lock the heap on the G stack.
 949         // It might trigger stack growth, and the stack growth code needs
 950         // to be able to allocate heap.
 951         var s *mspan
 952         systemstack(func() {
 953                 // To prevent excessive heap growth, before allocating n pages
 954                 // we need to sweep and reclaim at least n pages.
 955                 if !isSweepDone() {
 956                         h.reclaim(npages)
 957                 }
 958                 s = h.allocSpan(npages, spanAllocHeap, spanclass)
 959         })
 960         return s
 961 }
 962
 963 // allocManual allocates a manually-managed span of npage pages.
 964 // allocManual returns nil if allocation fails.
 965 //
 966 // allocManual adds the bytes used to *stat, which should be a
 967 // memstats in-use field. Unlike allocations in the GC'd heap, the
 968 // allocation does *not* count toward heapInUse.
 969 //
 970 // The memory backing the returned span may not be zeroed if
 971 // span.needzero is set.
 972 //
 973 // allocManual must be called on the system stack because it may
 974 // acquire the heap lock via allocSpan. See mheap for details.
 975 //
 976 // If new code is written to call allocManual, do NOT use an
 977 // existing spanAllocType value and instead declare a new one.
 978 //
 979 //go:systemstack
 980 func (h *mheap) allocManual(npages uintptr, typ spanAllocType) *mspan {
 981         if !typ.manual() {
 982                 throw("manual span allocation called with non-manually-managed type")
 983         }
 984         return h.allocSpan(npages, typ, 0)
 985 }
 986
 987 // setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
 988 // is s.
 989 func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
 990         p := base / pageSize
 991         ai := arenaIndex(base)
 992         ha := h.arenas[ai.l1()][ai.l2()]
 993         for n := uintptr(0); n < npage; n++ {
 994                 i := (p + n) % pagesPerArena
 995                 if i == 0 {
 996                         ai = arenaIndex(base + n*pageSize)
 997                         ha = h.arenas[ai.l1()][ai.l2()]
 998                 }
 999                 ha.spans[i] = s
1000         }
1001 }
1002
1003 // allocNeedsZero checks if the region of address space [base, base+npage*pageSize),
1004 // assumed to be allocated, needs to be zeroed, updating heap arena metadata for
1005 // future allocations.
1006 //
1007 // This must be called each time pages are allocated from the heap, even if the page
1008 // allocator can otherwise prove the memory it's allocating is already zero because
1009 // they're fresh from the operating system. It updates heapArena metadata that is
1010 // critical for future page allocations.
1011 //
1012 // There are no locking constraints on this method.
1013 func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
1014         for npage > 0 {
1015                 ai := arenaIndex(base)
1016                 ha := h.arenas[ai.l1()][ai.l2()]
1017
1018                 zeroedBase := atomic.Loaduintptr(&ha.zeroedBase)
1019                 arenaBase := base % heapArenaBytes
1020                 if arenaBase < zeroedBase {
1021                         // We extended into the non-zeroed part of the
1022                         // arena, so this region needs to be zeroed before use.
1023                         //
1024                         // zeroedBase is monotonically increasing, so if we see this now then
1025                         // we can be sure we need to zero this memory region.
1026                         //
1027                         // We still need to update zeroedBase for this arena, and
1028                         // potentially more arenas.
1029                         needZero = true
1030                 }
1031                 // We may observe arenaBase > zeroedBase if we're racing with one or more
1032                 // allocations which are acquiring memory directly before us in the address
1033                 // space. But, because we know no one else is acquiring *this* memory, it's
1034                 // still safe to not zero.
1035
1036                 // Compute how far into the arena we extend into, capped
1037                 // at heapArenaBytes.
1038                 arenaLimit := arenaBase + npage*pageSize
1039                 if arenaLimit > heapArenaBytes {
1040                         arenaLimit = heapArenaBytes
1041                 }
1042                 // Increase ha.zeroedBase so it's >= arenaLimit.
1043                 // We may be racing with other updates.
1044                 for arenaLimit > zeroedBase {
1045                         if atomic.Casuintptr(&ha.zeroedBase, zeroedBase, arenaLimit) {
1046                                 break
1047                         }
1048                         zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
1049                         // Double check basic conditions of zeroedBase.
1050                         if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
1051                                 // The zeroedBase moved into the space we were trying to
1052                                 // claim. That's very bad, and indicates someone allocated
1053                                 // the same region we did.
1054                                 throw("potentially overlapping in-use allocations detected")
1055                         }
1056                 }
1057
1058                 // Move base forward and subtract from npage to move into
1059                 // the next arena, or finish.
1060                 base += arenaLimit - arenaBase
1061                 npage -= (arenaLimit - arenaBase) / pageSize
1062         }
1063         return
1064 }
1065
1066 // tryAllocMSpan attempts to allocate an mspan object from
1067 // the P-local cache, but may fail.
1068 //
1069 // h.lock need not be held.
1070 //
1071 // This caller must ensure that its P won't change underneath
1072 // it during this function. Currently to ensure that we enforce
1073 // that the function is run on the system stack, because that's
1074 // the only place it is used now. In the future, this requirement
1075 // may be relaxed if its use is necessary elsewhere.
1076 //
1077 //go:systemstack
1078 func (h *mheap) tryAllocMSpan() *mspan {
1079         pp := getg().m.p.ptr()
1080         // If we don't have a p or the cache is empty, we can't do
1081         // anything here.
1082         if pp == nil || pp.mspancache.len == 0 {
1083                 return nil
1084         }
1085         // Pull off the last entry in the cache.
1086         s := pp.mspancache.buf[pp.mspancache.len-1]
1087         pp.mspancache.len--
1088         return s
1089 }
1090
1091 // allocMSpanLocked allocates an mspan object.
1092 //
1093 // h.lock must be held.
1094 //
1095 // allocMSpanLocked must be called on the system stack because
1096 // its caller holds the heap lock. See mheap for details.
1097 // Running on the system stack also ensures that we won't
1098 // switch Ps during this function. See tryAllocMSpan for details.
1099 //
1100 //go:systemstack
1101 func (h *mheap) allocMSpanLocked() *mspan {
1102         assertLockHeld(&h.lock)
1103
1104         pp := getg().m.p.ptr()
1105         if pp == nil {
1106                 // We don't have a p so just do the normal thing.
1107                 return (*mspan)(h.spanalloc.alloc())
1108         }
1109         // Refill the cache if necessary.
1110         if pp.mspancache.len == 0 {
1111                 const refillCount = len(pp.mspancache.buf) / 2
1112                 for i := 0; i < refillCount; i++ {
1113                         pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc())
1114                 }
1115                 pp.mspancache.len = refillCount
1116         }
1117         // Pull off the last entry in the cache.
1118         s := pp.mspancache.buf[pp.mspancache.len-1]
1119         pp.mspancache.len--
1120         return s
1121 }
1122
1123 // freeMSpanLocked free an mspan object.
1124 //
1125 // h.lock must be held.
1126 //
1127 // freeMSpanLocked must be called on the system stack because
1128 // its caller holds the heap lock. See mheap for details.
1129 // Running on the system stack also ensures that we won't
1130 // switch Ps during this function. See tryAllocMSpan for details.
1131 //
1132 //go:systemstack
1133 func (h *mheap) freeMSpanLocked(s *mspan) {
1134         assertLockHeld(&h.lock)
1135
1136         pp := getg().m.p.ptr()
1137         // First try to free the mspan directly to the cache.
1138         if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
1139                 pp.mspancache.buf[pp.mspancache.len] = s
1140                 pp.mspancache.len++
1141                 return
1142         }
1143         // Failing that (or if we don't have a p), just free it to
1144         // the heap.
1145         h.spanalloc.free(unsafe.Pointer(s))
1146 }
1147
1148 // allocSpan allocates an mspan which owns npages worth of memory.
1149 //
1150 // If typ.manual() == false, allocSpan allocates a heap span of class spanclass
1151 // and updates heap accounting. If manual == true, allocSpan allocates a
1152 // manually-managed span (spanclass is ignored), and the caller is
1153 // responsible for any accounting related to its use of the span. Either
1154 // way, allocSpan will atomically add the bytes in the newly allocated
1155 // span to *sysStat.
1156 //
1157 // The returned span is fully initialized.
1158 //
1159 // h.lock must not be held.
1160 //
1161 // allocSpan must be called on the system stack both because it acquires
1162 // the heap lock and because it must block GC transitions.
1163 //
1164 //go:systemstack
1165 func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) {
1166         // Function-global state.
1167         gp := getg()
1168         base, scav := uintptr(0), uintptr(0)
1169         growth := uintptr(0)
1170
1171         // On some platforms we need to provide physical page aligned stack
1172         // allocations. Where the page size is less than the physical page
1173         // size, we already manage to do this by default.
1174         needPhysPageAlign := physPageAlignedStacks && typ == spanAllocStack && pageSize < physPageSize
1175
1176         // If the allocation is small enough, try the page cache!
1177         // The page cache does not support aligned allocations, so we cannot use
1178         // it if we need to provide a physical page aligned stack allocation.
1179         pp := gp.m.p.ptr()
1180         if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 {
1181                 c := &pp.pcache
1182
1183                 // If the cache is empty, refill it.
1184                 if c.empty() {
1185                         lock(&h.lock)
1186                         *c = h.pages.allocToCache()
1187                         unlock(&h.lock)
1188                 }
1189
1190                 // Try to allocate from the cache.
1191                 base, scav = c.alloc(npages)
1192                 if base != 0 {
1193                         s = h.tryAllocMSpan()
1194                         if s != nil {
1195                                 goto HaveSpan
1196                         }
1197                         // We have a base but no mspan, so we need
1198                         // to lock the heap.
1199                 }
1200         }
1201
1202         // For one reason or another, we couldn't get the
1203         // whole job done without the heap lock.
1204         lock(&h.lock)
1205
1206         if needPhysPageAlign {
1207                 // Overallocate by a physical page to allow for later alignment.
1208                 extraPages := physPageSize / pageSize
1209
1210                 // Find a big enough region first, but then only allocate the
1211                 // aligned portion. We can't just allocate and then free the
1212                 // edges because we need to account for scavenged memory, and
1213                 // that's difficult with alloc.
1214                 //
1215                 // Note that we skip updates to searchAddr here. It's OK if
1216                 // it's stale and higher than normal; it'll operate correctly,
1217                 // just come with a performance cost.
1218                 base, _ = h.pages.find(npages + extraPages)
1219                 if base == 0 {
1220                         var ok bool
1221                         growth, ok = h.grow(npages + extraPages)
1222                         if !ok {
1223                                 unlock(&h.lock)
1224                                 return nil
1225                         }
1226                         base, _ = h.pages.find(npages + extraPages)
1227                         if base == 0 {
1228                                 throw("grew heap, but no adequate free space found")
1229                         }
1230                 }
1231                 base = alignUp(base, physPageSize)
1232                 scav = h.pages.allocRange(base, npages)
1233         }
1234
1235         if base == 0 {
1236                 // Try to acquire a base address.
1237                 base, scav = h.pages.alloc(npages)
1238                 if base == 0 {
1239                         var ok bool
1240                         growth, ok = h.grow(npages)
1241                         if !ok {
1242                                 unlock(&h.lock)
1243                                 return nil
1244                         }
1245                         base, scav = h.pages.alloc(npages)
1246                         if base == 0 {
1247                                 throw("grew heap, but no adequate free space found")
1248                         }
1249                 }
1250         }
1251         if s == nil {
1252                 // We failed to get an mspan earlier, so grab
1253                 // one now that we have the heap lock.
1254                 s = h.allocMSpanLocked()
1255         }
1256         unlock(&h.lock)
1257
1258 HaveSpan:
1259         // Decide if we need to scavenge in response to what we just allocated.
1260         // Specifically, we track the maximum amount of memory to scavenge of all
1261         // the alternatives below, assuming that the maximum satisfies *all*
1262         // conditions we check (e.g. if we need to scavenge X to satisfy the
1263         // memory limit and Y to satisfy heap-growth scavenging, and Y > X, then
1264         // it's fine to pick Y, because the memory limit is still satisfied).
1265         //
1266         // It's fine to do this after allocating because we expect any scavenged
1267         // pages not to get touched until we return. Simultaneously, it's important
1268         // to do this before calling sysUsed because that may commit address space.
1269         bytesToScavenge := uintptr(0)
1270         forceScavenge := false
1271         if limit := gcController.memoryLimit.Load(); !gcCPULimiter.limiting() {
1272                 // Assist with scavenging to maintain the memory limit by the amount
1273                 // that we expect to page in.
1274                 inuse := gcController.mappedReady.Load()
1275                 // Be careful about overflow, especially with uintptrs. Even on 32-bit platforms
1276                 // someone can set a really big memory limit that isn't maxInt64.
1277                 if uint64(scav)+inuse > uint64(limit) {
1278                         bytesToScavenge = uintptr(uint64(scav) + inuse - uint64(limit))
1279                         forceScavenge = true
1280                 }
1281         }
1282         if goal := scavenge.gcPercentGoal.Load(); goal != ^uint64(0) && growth > 0 {
1283                 // We just caused a heap growth, so scavenge down what will soon be used.
1284                 // By scavenging inline we deal with the failure to allocate out of
1285                 // memory fragments by scavenging the memory fragments that are least
1286                 // likely to be re-used.
1287                 //
1288                 // Only bother with this because we're not using a memory limit. We don't
1289                 // care about heap growths as long as we're under the memory limit, and the
1290                 // previous check for scaving already handles that.
1291                 if retained := heapRetained(); retained+uint64(growth) > goal {
1292                         // The scavenging algorithm requires the heap lock to be dropped so it
1293                         // can acquire it only sparingly. This is a potentially expensive operation
1294                         // so it frees up other goroutines to allocate in the meanwhile. In fact,
1295                         // they can make use of the growth we just created.
1296                         todo := growth
1297                         if overage := uintptr(retained + uint64(growth) - goal); todo > overage {
1298                                 todo = overage
1299                         }
1300                         if todo > bytesToScavenge {
1301                                 bytesToScavenge = todo
1302                         }
1303                 }
1304         }
1305         // There are a few very limited circumstances where we won't have a P here.
1306         // It's OK to simply skip scavenging in these cases. Something else will notice
1307         // and pick up the tab.
1308         var now int64
1309         if pp != nil && bytesToScavenge > 0 {
1310                 // Measure how long we spent scavenging and add that measurement to the assist
1311                 // time so we can track it for the GC CPU limiter.
1312                 //
1313                 // Limiter event tracking might be disabled if we end up here
1314                 // while on a mark worker.
1315                 start := nanotime()
1316                 track := pp.limiterEvent.start(limiterEventScavengeAssist, start)
1317
1318                 // Scavenge, but back out if the limiter turns on.
1319                 released := h.pages.scavenge(bytesToScavenge, func() bool {
1320                         return gcCPULimiter.limiting()
1321                 }, forceScavenge)
1322
1323                 mheap_.pages.scav.releasedEager.Add(released)
1324
1325                 // Finish up accounting.
1326                 now = nanotime()
1327                 if track {
1328                         pp.limiterEvent.stop(limiterEventScavengeAssist, now)
1329                 }
1330                 scavenge.assistTime.Add(now - start)
1331         }
1332
1333         // Initialize the span.
1334         h.initSpan(s, typ, spanclass, base, npages)
1335
1336         // Commit and account for any scavenged memory that the span now owns.
1337         nbytes := npages * pageSize
1338         if scav != 0 {
1339                 // sysUsed all the pages that are actually available
1340                 // in the span since some of them might be scavenged.
1341                 sysUsed(unsafe.Pointer(base), nbytes, scav)
1342                 gcController.heapReleased.add(-int64(scav))
1343         }
1344         // Update stats.
1345         gcController.heapFree.add(-int64(nbytes - scav))
1346         if typ == spanAllocHeap {
1347                 gcController.heapInUse.add(int64(nbytes))
1348         }
1349         // Update consistent stats.
1350         stats := memstats.heapStats.acquire()
1351         atomic.Xaddint64(&stats.committed, int64(scav))
1352         atomic.Xaddint64(&stats.released, -int64(scav))
1353         switch typ {
1354         case spanAllocHeap:
1355                 atomic.Xaddint64(&stats.inHeap, int64(nbytes))
1356         case spanAllocStack:
1357                 atomic.Xaddint64(&stats.inStacks, int64(nbytes))
1358         case spanAllocPtrScalarBits:
1359                 atomic.Xaddint64(&stats.inPtrScalarBits, int64(nbytes))
1360         case spanAllocWorkBuf:
1361                 atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes))
1362         }
1363         memstats.heapStats.release()
1364
1365         pageTraceAlloc(pp, now, base, npages)
1366         return s
1367 }
1368
1369 // initSpan initializes a blank span s which will represent the range
1370 // [base, base+npages*pageSize). typ is the type of span being allocated.
1371 func (h *mheap) initSpan(s *mspan, typ spanAllocType, spanclass spanClass, base, npages uintptr) {
1372         // At this point, both s != nil and base != 0, and the heap
1373         // lock is no longer held. Initialize the span.
1374         s.init(base, npages)
1375         if h.allocNeedsZero(base, npages) {
1376                 s.needzero = 1
1377         }
1378         nbytes := npages * pageSize
1379         if typ.manual() {
1380                 s.manualFreeList = 0
1381                 s.nelems = 0
1382                 s.limit = s.base() + s.npages*pageSize
1383                 s.state.set(mSpanManual)
1384         } else {
1385                 // We must set span properties before the span is published anywhere
1386                 // since we're not holding the heap lock.
1387                 s.spanclass = spanclass
1388                 if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
1389                         s.elemsize = nbytes
1390                         s.nelems = 1
1391                         s.divMul = 0
1392                 } else {
1393                         s.elemsize = uintptr(class_to_size[sizeclass])
1394                         if goexperiment.AllocHeaders && !s.spanclass.noscan() && heapBitsInSpan(s.elemsize) {
1395                                 // In the allocheaders experiment, reserve space for the pointer/scan bitmap at the end.
1396                                 s.nelems = uint16((nbytes - (nbytes / goarch.PtrSize / 8)) / s.elemsize)
1397                         } else {
1398                                 s.nelems = uint16(nbytes / s.elemsize)
1399                         }
1400                         s.divMul = class_to_divmagic[sizeclass]
1401                 }
1402
1403                 // Initialize mark and allocation structures.
1404                 s.freeindex = 0
1405                 s.freeIndexForScan = 0
1406                 s.allocCache = ^uint64(0) // all 1s indicating all free.
1407                 s.gcmarkBits = newMarkBits(uintptr(s.nelems))
1408                 s.allocBits = newAllocBits(uintptr(s.nelems))
1409
1410                 // It's safe to access h.sweepgen without the heap lock because it's
1411                 // only ever updated with the world stopped and we run on the
1412                 // systemstack which blocks a STW transition.
1413                 atomic.Store(&s.sweepgen, h.sweepgen)
1414
1415                 // Now that the span is filled in, set its state. This
1416                 // is a publication barrier for the other fields in
1417                 // the span. While valid pointers into this span
1418                 // should never be visible until the span is returned,
1419                 // if the garbage collector finds an invalid pointer,
1420                 // access to the span may race with initialization of
1421                 // the span. We resolve this race by atomically
1422                 // setting the state after the span is fully
1423                 // initialized, and atomically checking the state in
1424                 // any situation where a pointer is suspect.
1425                 s.state.set(mSpanInUse)
1426         }
1427
1428         // Publish the span in various locations.
1429
1430         // This is safe to call without the lock held because the slots
1431         // related to this span will only ever be read or modified by
1432         // this thread until pointers into the span are published (and
1433         // we execute a publication barrier at the end of this function
1434         // before that happens) or pageInUse is updated.
1435         h.setSpans(s.base(), npages, s)
1436
1437         if !typ.manual() {
1438                 // Mark in-use span in arena page bitmap.
1439                 //
1440                 // This publishes the span to the page sweeper, so
1441                 // it's imperative that the span be completely initialized
1442                 // prior to this line.
1443                 arena, pageIdx, pageMask := pageIndexOf(s.base())
1444                 atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
1445
1446                 // Update related page sweeper stats.
1447                 h.pagesInUse.Add(npages)
1448         }
1449
1450         // Make sure the newly allocated span will be observed
1451         // by the GC before pointers into the span are published.
1452         publicationBarrier()
1453 }
1454
1455 // Try to add at least npage pages of memory to the heap,
1456 // returning how much the heap grew by and whether it worked.
1457 //
1458 // h.lock must be held.
1459 func (h *mheap) grow(npage uintptr) (uintptr, bool) {
1460         assertLockHeld(&h.lock)
1461
1462         // We must grow the heap in whole palloc chunks.
1463         // We call sysMap below but note that because we
1464         // round up to pallocChunkPages which is on the order
1465         // of MiB (generally >= to the huge page size) we
1466         // won't be calling it too much.
1467         ask := alignUp(npage, pallocChunkPages) * pageSize
1468
1469         totalGrowth := uintptr(0)
1470         // This may overflow because ask could be very large
1471         // and is otherwise unrelated to h.curArena.base.
1472         end := h.curArena.base + ask
1473         nBase := alignUp(end, physPageSize)
1474         if nBase > h.curArena.end || /* overflow */ end < h.curArena.base {
1475                 // Not enough room in the current arena. Allocate more
1476                 // arena space. This may not be contiguous with the
1477                 // current arena, so we have to request the full ask.
1478                 av, asize := h.sysAlloc(ask, &h.arenaHints, true)
1479                 if av == nil {
1480                         inUse := gcController.heapFree.load() + gcController.heapReleased.load() + gcController.heapInUse.load()
1481                         print("runtime: out of memory: cannot allocate ", ask, "-byte block (", inUse, " in use)\n")
1482                         return 0, false
1483                 }
1484
1485                 if uintptr(av) == h.curArena.end {
1486                         // The new space is contiguous with the old
1487                         // space, so just extend the current space.
1488                         h.curArena.end = uintptr(av) + asize
1489                 } else {
1490                         // The new space is discontiguous. Track what
1491                         // remains of the current space and switch to
1492                         // the new space. This should be rare.
1493                         if size := h.curArena.end - h.curArena.base; size != 0 {
1494                                 // Transition this space from Reserved to Prepared and mark it
1495                                 // as released since we'll be able to start using it after updating
1496                                 // the page allocator and releasing the lock at any time.
1497                                 sysMap(unsafe.Pointer(h.curArena.base), size, &gcController.heapReleased)
1498                                 // Update stats.
1499                                 stats := memstats.heapStats.acquire()
1500                                 atomic.Xaddint64(&stats.released, int64(size))
1501                                 memstats.heapStats.release()
1502                                 // Update the page allocator's structures to make this
1503                                 // space ready for allocation.
1504                                 h.pages.grow(h.curArena.base, size)
1505                                 totalGrowth += size
1506                         }
1507                         // Switch to the new space.
1508                         h.curArena.base = uintptr(av)
1509                         h.curArena.end = uintptr(av) + asize
1510                 }
1511
1512                 // Recalculate nBase.
1513                 // We know this won't overflow, because sysAlloc returned
1514                 // a valid region starting at h.curArena.base which is at
1515                 // least ask bytes in size.
1516                 nBase = alignUp(h.curArena.base+ask, physPageSize)
1517         }
1518
1519         // Grow into the current arena.
1520         v := h.curArena.base
1521         h.curArena.base = nBase
1522
1523         // Transition the space we're going to use from Reserved to Prepared.
1524         //
1525         // The allocation is always aligned to the heap arena
1526         // size which is always > physPageSize, so its safe to
1527         // just add directly to heapReleased.
1528         sysMap(unsafe.Pointer(v), nBase-v, &gcController.heapReleased)
1529
1530         // The memory just allocated counts as both released
1531         // and idle, even though it's not yet backed by spans.
1532         stats := memstats.heapStats.acquire()
1533         atomic.Xaddint64(&stats.released, int64(nBase-v))
1534         memstats.heapStats.release()
1535
1536         // Update the page allocator's structures to make this
1537         // space ready for allocation.
1538         h.pages.grow(v, nBase-v)
1539         totalGrowth += nBase - v
1540         return totalGrowth, true
1541 }
1542
1543 // Free the span back into the heap.
1544 func (h *mheap) freeSpan(s *mspan) {
1545         systemstack(func() {
1546                 pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
1547
1548                 lock(&h.lock)
1549                 if msanenabled {
1550                         // Tell msan that this entire span is no longer in use.
1551                         base := unsafe.Pointer(s.base())
1552                         bytes := s.npages << _PageShift
1553                         msanfree(base, bytes)
1554                 }
1555                 if asanenabled {
1556                         // Tell asan that this entire span is no longer in use.
1557                         base := unsafe.Pointer(s.base())
1558                         bytes := s.npages << _PageShift
1559                         asanpoison(base, bytes)
1560                 }
1561                 h.freeSpanLocked(s, spanAllocHeap)
1562                 unlock(&h.lock)
1563         })
1564 }
1565
1566 // freeManual frees a manually-managed span returned by allocManual.
1567 // typ must be the same as the spanAllocType passed to the allocManual that
1568 // allocated s.
1569 //
1570 // This must only be called when gcphase == _GCoff. See mSpanState for
1571 // an explanation.
1572 //
1573 // freeManual must be called on the system stack because it acquires
1574 // the heap lock. See mheap for details.
1575 //
1576 //go:systemstack
1577 func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
1578         pageTraceFree(getg().m.p.ptr(), 0, s.base(), s.npages)
1579
1580         s.needzero = 1
1581         lock(&h.lock)
1582         h.freeSpanLocked(s, typ)
1583         unlock(&h.lock)
1584 }
1585
1586 func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
1587         assertLockHeld(&h.lock)
1588
1589         switch s.state.get() {
1590         case mSpanManual:
1591                 if s.allocCount != 0 {
1592                         throw("mheap.freeSpanLocked - invalid stack free")
1593                 }
1594         case mSpanInUse:
1595                 if s.isUserArenaChunk {
1596                         throw("mheap.freeSpanLocked - invalid free of user arena chunk")
1597                 }
1598                 if s.allocCount != 0 || s.sweepgen != h.sweepgen {
1599                         print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
1600                         throw("mheap.freeSpanLocked - invalid free")
1601                 }
1602                 h.pagesInUse.Add(-s.npages)
1603
1604                 // Clear in-use bit in arena page bitmap.
1605                 arena, pageIdx, pageMask := pageIndexOf(s.base())
1606                 atomic.And8(&arena.pageInUse[pageIdx], ^pageMask)
1607         default:
1608                 throw("mheap.freeSpanLocked - invalid span state")
1609         }
1610
1611         // Update stats.
1612         //
1613         // Mirrors the code in allocSpan.
1614         nbytes := s.npages * pageSize
1615         gcController.heapFree.add(int64(nbytes))
1616         if typ == spanAllocHeap {
1617                 gcController.heapInUse.add(-int64(nbytes))
1618         }
1619         // Update consistent stats.
1620         stats := memstats.heapStats.acquire()
1621         switch typ {
1622         case spanAllocHeap:
1623                 atomic.Xaddint64(&stats.inHeap, -int64(nbytes))
1624         case spanAllocStack:
1625                 atomic.Xaddint64(&stats.inStacks, -int64(nbytes))
1626         case spanAllocPtrScalarBits:
1627                 atomic.Xaddint64(&stats.inPtrScalarBits, -int64(nbytes))
1628         case spanAllocWorkBuf:
1629                 atomic.Xaddint64(&stats.inWorkBufs, -int64(nbytes))
1630         }
1631         memstats.heapStats.release()
1632
1633         // Mark the space as free.
1634         h.pages.free(s.base(), s.npages)
1635
1636         // Free the span structure. We no longer have a use for it.
1637         s.state.set(mSpanDead)
1638         h.freeMSpanLocked(s)
1639 }
1640
1641 // scavengeAll acquires the heap lock (blocking any additional
1642 // manipulation of the page allocator) and iterates over the whole
1643 // heap, scavenging every free page available.
1644 //
1645 // Must run on the system stack because it acquires the heap lock.
1646 //
1647 //go:systemstack
1648 func (h *mheap) scavengeAll() {
1649         // Disallow malloc or panic while holding the heap lock. We do
1650         // this here because this is a non-mallocgc entry-point to
1651         // the mheap API.
1652         gp := getg()
1653         gp.m.mallocing++
1654
1655         // Force scavenge everything.
1656         released := h.pages.scavenge(^uintptr(0), nil, true)
1657
1658         gp.m.mallocing--
1659
1660         if debug.scavtrace > 0 {
1661                 printScavTrace(0, released, true)
1662         }
1663 }
1664
1665 //go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
1666 func runtime_debug_freeOSMemory() {
1667         GC()
1668         systemstack(func() { mheap_.scavengeAll() })
1669 }
1670
1671 // Initialize a new span with the given start and npages.
1672 func (span *mspan) init(base uintptr, npages uintptr) {
1673         // span is *not* zeroed.
1674         span.next = nil
1675         span.prev = nil
1676         span.list = nil
1677         span.startAddr = base
1678         span.npages = npages
1679         span.allocCount = 0
1680         span.spanclass = 0
1681         span.elemsize = 0
1682         span.speciallock.key = 0
1683         span.specials = nil
1684         span.needzero = 0
1685         span.freeindex = 0
1686         span.freeIndexForScan = 0
1687         span.allocBits = nil
1688         span.gcmarkBits = nil
1689         span.pinnerBits = nil
1690         span.state.set(mSpanDead)
1691         lockInit(&span.speciallock, lockRankMspanSpecial)
1692 }
1693
1694 func (span *mspan) inList() bool {
1695         return span.list != nil
1696 }
1697
1698 // Initialize an empty doubly-linked list.
1699 func (list *mSpanList) init() {
1700         list.first = nil
1701         list.last = nil
1702 }
1703
1704 func (list *mSpanList) remove(span *mspan) {
1705         if span.list != list {
1706                 print("runtime: failed mSpanList.remove span.npages=", span.npages,
1707                         " span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
1708                 throw("mSpanList.remove")
1709         }
1710         if list.first == span {
1711                 list.first = span.next
1712         } else {
1713                 span.prev.next = span.next
1714         }
1715         if list.last == span {
1716                 list.last = span.prev
1717         } else {
1718                 span.next.prev = span.prev
1719         }
1720         span.next = nil
1721         span.prev = nil
1722         span.list = nil
1723 }
1724
1725 func (list *mSpanList) isEmpty() bool {
1726         return list.first == nil
1727 }
1728
1729 func (list *mSpanList) insert(span *mspan) {
1730         if span.next != nil || span.prev != nil || span.list != nil {
1731                 println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list)
1732                 throw("mSpanList.insert")
1733         }
1734         span.next = list.first
1735         if list.first != nil {
1736                 // The list contains at least one span; link it in.
1737                 // The last span in the list doesn't change.
1738                 list.first.prev = span
1739         } else {
1740                 // The list contains no spans, so this is also the last span.
1741                 list.last = span
1742         }
1743         list.first = span
1744         span.list = list
1745 }
1746
1747 func (list *mSpanList) insertBack(span *mspan) {
1748         if span.next != nil || span.prev != nil || span.list != nil {
1749                 println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list)
1750                 throw("mSpanList.insertBack")
1751         }
1752         span.prev = list.last
1753         if list.last != nil {
1754                 // The list contains at least one span.
1755                 list.last.next = span
1756         } else {
1757                 // The list contains no spans, so this is also the first span.
1758                 list.first = span
1759         }
1760         list.last = span
1761         span.list = list
1762 }
1763
1764 // takeAll removes all spans from other and inserts them at the front
1765 // of list.
1766 func (list *mSpanList) takeAll(other *mSpanList) {
1767         if other.isEmpty() {
1768                 return
1769         }
1770
1771         // Reparent everything in other to list.
1772         for s := other.first; s != nil; s = s.next {
1773                 s.list = list
1774         }
1775
1776         // Concatenate the lists.
1777         if list.isEmpty() {
1778                 *list = *other
1779         } else {
1780                 // Neither list is empty. Put other before list.
1781                 other.last.next = list.first
1782                 list.first.prev = other.last
1783                 list.first = other.first
1784         }
1785
1786         other.first, other.last = nil, nil
1787 }
1788
1789 const (
1790         _KindSpecialFinalizer = 1
1791         _KindSpecialProfile   = 2
1792         // _KindSpecialReachable is a special used for tracking
1793         // reachability during testing.
1794         _KindSpecialReachable = 3
1795         // _KindSpecialPinCounter is a special used for objects that are pinned
1796         // multiple times
1797         _KindSpecialPinCounter = 4
1798         // Note: The finalizer special must be first because if we're freeing
1799         // an object, a finalizer special will cause the freeing operation
1800         // to abort, and we want to keep the other special records around
1801         // if that happens.
1802 )
1803
1804 type special struct {
1805         _      sys.NotInHeap
1806         next   *special // linked list in span
1807         offset uint16   // span offset of object
1808         kind   byte     // kind of special
1809 }
1810
1811 // spanHasSpecials marks a span as having specials in the arena bitmap.
1812 func spanHasSpecials(s *mspan) {
1813         arenaPage := (s.base() / pageSize) % pagesPerArena
1814         ai := arenaIndex(s.base())
1815         ha := mheap_.arenas[ai.l1()][ai.l2()]
1816         atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8))
1817 }
1818
1819 // spanHasNoSpecials marks a span as having no specials in the arena bitmap.
1820 func spanHasNoSpecials(s *mspan) {
1821         arenaPage := (s.base() / pageSize) % pagesPerArena
1822         ai := arenaIndex(s.base())
1823         ha := mheap_.arenas[ai.l1()][ai.l2()]
1824         atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8)))
1825 }
1826
1827 // Adds the special record s to the list of special records for
1828 // the object p. All fields of s should be filled in except for
1829 // offset & next, which this routine will fill in.
1830 // Returns true if the special was successfully added, false otherwise.
1831 // (The add will fail only if a record with the same p and s->kind
1832 // already exists.)
1833 func addspecial(p unsafe.Pointer, s *special) bool {
1834         span := spanOfHeap(uintptr(p))
1835         if span == nil {
1836                 throw("addspecial on invalid pointer")
1837         }
1838
1839         // Ensure that the span is swept.
1840         // Sweeping accesses the specials list w/o locks, so we have
1841         // to synchronize with it. And it's just much safer.
1842         mp := acquirem()
1843         span.ensureSwept()
1844
1845         offset := uintptr(p) - span.base()
1846         kind := s.kind
1847
1848         lock(&span.speciallock)
1849
1850         // Find splice point, check for existing record.
1851         iter, exists := span.specialFindSplicePoint(offset, kind)
1852         if !exists {
1853                 // Splice in record, fill in offset.
1854                 s.offset = uint16(offset)
1855                 s.next = *iter
1856                 *iter = s
1857                 spanHasSpecials(span)
1858         }
1859
1860         unlock(&span.speciallock)
1861         releasem(mp)
1862         return !exists // already exists
1863 }
1864
1865 // Removes the Special record of the given kind for the object p.
1866 // Returns the record if the record existed, nil otherwise.
1867 // The caller must FixAlloc_Free the result.
1868 func removespecial(p unsafe.Pointer, kind uint8) *special {
1869         span := spanOfHeap(uintptr(p))
1870         if span == nil {
1871                 throw("removespecial on invalid pointer")
1872         }
1873
1874         // Ensure that the span is swept.
1875         // Sweeping accesses the specials list w/o locks, so we have
1876         // to synchronize with it. And it's just much safer.
1877         mp := acquirem()
1878         span.ensureSwept()
1879
1880         offset := uintptr(p) - span.base()
1881
1882         var result *special
1883         lock(&span.speciallock)
1884
1885         iter, exists := span.specialFindSplicePoint(offset, kind)
1886         if exists {
1887                 s := *iter
1888                 *iter = s.next
1889                 result = s
1890         }
1891         if span.specials == nil {
1892                 spanHasNoSpecials(span)
1893         }
1894         unlock(&span.speciallock)
1895         releasem(mp)
1896         return result
1897 }
1898
1899 // Find a splice point in the sorted list and check for an already existing
1900 // record. Returns a pointer to the next-reference in the list predecessor.
1901 // Returns true, if the referenced item is an exact match.
1902 func (span *mspan) specialFindSplicePoint(offset uintptr, kind byte) (**special, bool) {
1903         // Find splice point, check for existing record.
1904         iter := &span.specials
1905         found := false
1906         for {
1907                 s := *iter
1908                 if s == nil {
1909                         break
1910                 }
1911                 if offset == uintptr(s.offset) && kind == s.kind {
1912                         found = true
1913                         break
1914                 }
1915                 if offset < uintptr(s.offset) || (offset == uintptr(s.offset) && kind < s.kind) {
1916                         break
1917                 }
1918                 iter = &s.next
1919         }
1920         return iter, found
1921 }
1922
1923 // The described object has a finalizer set for it.
1924 //
1925 // specialfinalizer is allocated from non-GC'd memory, so any heap
1926 // pointers must be specially handled.
1927 type specialfinalizer struct {
1928         _       sys.NotInHeap
1929         special special
1930         fn      *funcval // May be a heap pointer.
1931         nret    uintptr
1932         fint    *_type   // May be a heap pointer, but always live.
1933         ot      *ptrtype // May be a heap pointer, but always live.
1934 }
1935
1936 // Adds a finalizer to the object p. Returns true if it succeeded.
1937 func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
1938         lock(&mheap_.speciallock)
1939         s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
1940         unlock(&mheap_.speciallock)
1941         s.special.kind = _KindSpecialFinalizer
1942         s.fn = f
1943         s.nret = nret
1944         s.fint = fint
1945         s.ot = ot
1946         if addspecial(p, &s.special) {
1947                 // This is responsible for maintaining the same
1948                 // GC-related invariants as markrootSpans in any
1949                 // situation where it's possible that markrootSpans
1950                 // has already run but mark termination hasn't yet.
1951                 if gcphase != _GCoff {
1952                         base, span, _ := findObject(uintptr(p), 0, 0)
1953                         mp := acquirem()
1954                         gcw := &mp.p.ptr().gcw
1955                         // Mark everything reachable from the object
1956                         // so it's retained for the finalizer.
1957                         if !span.spanclass.noscan() {
1958                                 scanobject(base, gcw)
1959                         }
1960                         // Mark the finalizer itself, since the
1961                         // special isn't part of the GC'd heap.
1962                         scanblock(uintptr(unsafe.Pointer(&s.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
1963                         releasem(mp)
1964                 }
1965                 return true
1966         }
1967
1968         // There was an old finalizer
1969         lock(&mheap_.speciallock)
1970         mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1971         unlock(&mheap_.speciallock)
1972         return false
1973 }
1974
1975 // Removes the finalizer (if any) from the object p.
1976 func removefinalizer(p unsafe.Pointer) {
1977         s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
1978         if s == nil {
1979                 return // there wasn't a finalizer to remove
1980         }
1981         lock(&mheap_.speciallock)
1982         mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
1983         unlock(&mheap_.speciallock)
1984 }
1985
1986 // The described object is being heap profiled.
1987 type specialprofile struct {
1988         _       sys.NotInHeap
1989         special special
1990         b       *bucket
1991 }
1992
1993 // Set the heap profile bucket associated with addr to b.
1994 func setprofilebucket(p unsafe.Pointer, b *bucket) {
1995         lock(&mheap_.speciallock)
1996         s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
1997         unlock(&mheap_.speciallock)
1998         s.special.kind = _KindSpecialProfile
1999         s.b = b
2000         if !addspecial(p, &s.special) {
2001                 throw("setprofilebucket: profile already set")
2002         }
2003 }
2004
2005 // specialReachable tracks whether an object is reachable on the next
2006 // GC cycle. This is used by testing.
2007 type specialReachable struct {
2008         special   special
2009         done      bool
2010         reachable bool
2011 }
2012
2013 // specialPinCounter tracks whether an object is pinned multiple times.
2014 type specialPinCounter struct {
2015         special special
2016         counter uintptr
2017 }
2018
2019 // specialsIter helps iterate over specials lists.
2020 type specialsIter struct {
2021         pprev **special
2022         s     *special
2023 }
2024
2025 func newSpecialsIter(span *mspan) specialsIter {
2026         return specialsIter{&span.specials, span.specials}
2027 }
2028
2029 func (i *specialsIter) valid() bool {
2030         return i.s != nil
2031 }
2032
2033 func (i *specialsIter) next() {
2034         i.pprev = &i.s.next
2035         i.s = *i.pprev
2036 }
2037
2038 // unlinkAndNext removes the current special from the list and moves
2039 // the iterator to the next special. It returns the unlinked special.
2040 func (i *specialsIter) unlinkAndNext() *special {
2041         cur := i.s
2042         i.s = cur.next
2043         *i.pprev = i.s
2044         return cur
2045 }
2046
2047 // freeSpecial performs any cleanup on special s and deallocates it.
2048 // s must already be unlinked from the specials list.
2049 func freeSpecial(s *special, p unsafe.Pointer, size uintptr) {
2050         switch s.kind {
2051         case _KindSpecialFinalizer:
2052                 sf := (*specialfinalizer)(unsafe.Pointer(s))
2053                 queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
2054                 lock(&mheap_.speciallock)
2055                 mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
2056                 unlock(&mheap_.speciallock)
2057         case _KindSpecialProfile:
2058                 sp := (*specialprofile)(unsafe.Pointer(s))
2059                 mProf_Free(sp.b, size)
2060                 lock(&mheap_.speciallock)
2061                 mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
2062                 unlock(&mheap_.speciallock)
2063         case _KindSpecialReachable:
2064                 sp := (*specialReachable)(unsafe.Pointer(s))
2065                 sp.done = true
2066                 // The creator frees these.
2067         case _KindSpecialPinCounter:
2068                 lock(&mheap_.speciallock)
2069                 mheap_.specialPinCounterAlloc.free(unsafe.Pointer(s))
2070                 unlock(&mheap_.speciallock)
2071         default:
2072                 throw("bad special kind")
2073                 panic("not reached")
2074         }
2075 }
2076
2077 // gcBits is an alloc/mark bitmap. This is always used as gcBits.x.
2078 type gcBits struct {
2079         _ sys.NotInHeap
2080         x uint8
2081 }
2082
2083 // bytep returns a pointer to the n'th byte of b.
2084 func (b *gcBits) bytep(n uintptr) *uint8 {
2085         return addb(&b.x, n)
2086 }
2087
2088 // bitp returns a pointer to the byte containing bit n and a mask for
2089 // selecting that bit from *bytep.
2090 func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
2091         return b.bytep(n / 8), 1 << (n % 8)
2092 }
2093
2094 const gcBitsChunkBytes = uintptr(64 << 10)
2095 const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
2096
2097 type gcBitsHeader struct {
2098         free uintptr // free is the index into bits of the next free byte.
2099         next uintptr // *gcBits triggers recursive type bug. (issue 14620)
2100 }
2101
2102 type gcBitsArena struct {
2103         _ sys.NotInHeap
2104         // gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
2105         free uintptr // free is the index into bits of the next free byte; read/write atomically
2106         next *gcBitsArena
2107         bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
2108 }
2109
2110 var gcBitsArenas struct {
2111         lock     mutex
2112         free     *gcBitsArena
2113         next     *gcBitsArena // Read atomically. Write atomically under lock.
2114         current  *gcBitsArena
2115         previous *gcBitsArena
2116 }
2117
2118 // tryAlloc allocates from b or returns nil if b does not have enough room.
2119 // This is safe to call concurrently.
2120 func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
2121         if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
2122                 return nil
2123         }
2124         // Try to allocate from this block.
2125         end := atomic.Xadduintptr(&b.free, bytes)
2126         if end > uintptr(len(b.bits)) {
2127                 return nil
2128         }
2129         // There was enough room.
2130         start := end - bytes
2131         return &b.bits[start]
2132 }
2133
2134 // newMarkBits returns a pointer to 8 byte aligned bytes
2135 // to be used for a span's mark bits.
2136 func newMarkBits(nelems uintptr) *gcBits {
2137         blocksNeeded := (nelems + 63) / 64
2138         bytesNeeded := blocksNeeded * 8
2139
2140         // Try directly allocating from the current head arena.
2141         head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
2142         if p := head.tryAlloc(bytesNeeded); p != nil {
2143                 return p
2144         }
2145
2146         // There's not enough room in the head arena. We may need to
2147         // allocate a new arena.
2148         lock(&gcBitsArenas.lock)
2149         // Try the head arena again, since it may have changed. Now
2150         // that we hold the lock, the list head can't change, but its
2151         // free position still can.
2152         if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
2153                 unlock(&gcBitsArenas.lock)
2154                 return p
2155         }
2156
2157         // Allocate a new arena. This may temporarily drop the lock.
2158         fresh := newArenaMayUnlock()
2159         // If newArenaMayUnlock dropped the lock, another thread may
2160         // have put a fresh arena on the "next" list. Try allocating
2161         // from next again.
2162         if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
2163                 // Put fresh back on the free list.
2164                 // TODO: Mark it "already zeroed"
2165                 fresh.next = gcBitsArenas.free
2166                 gcBitsArenas.free = fresh
2167                 unlock(&gcBitsArenas.lock)
2168                 return p
2169         }
2170
2171         // Allocate from the fresh arena. We haven't linked it in yet, so
2172         // this cannot race and is guaranteed to succeed.
2173         p := fresh.tryAlloc(bytesNeeded)
2174         if p == nil {
2175                 throw("markBits overflow")
2176         }
2177
2178         // Add the fresh arena to the "next" list.
2179         fresh.next = gcBitsArenas.next
2180         atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))
2181
2182         unlock(&gcBitsArenas.lock)
2183         return p
2184 }
2185
2186 // newAllocBits returns a pointer to 8 byte aligned bytes
2187 // to be used for this span's alloc bits.
2188 // newAllocBits is used to provide newly initialized spans
2189 // allocation bits. For spans not being initialized the
2190 // mark bits are repurposed as allocation bits when
2191 // the span is swept.
2192 func newAllocBits(nelems uintptr) *gcBits {
2193         return newMarkBits(nelems)
2194 }
2195
2196 // nextMarkBitArenaEpoch establishes a new epoch for the arenas
2197 // holding the mark bits. The arenas are named relative to the
2198 // current GC cycle which is demarcated by the call to finishweep_m.
2199 //
2200 // All current spans have been swept.
2201 // During that sweep each span allocated room for its gcmarkBits in
2202 // gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
2203 // where the GC will mark objects and after each span is swept these bits
2204 // will be used to allocate objects.
2205 // gcBitsArenas.current becomes gcBitsArenas.previous where the span's
2206 // gcAllocBits live until all the spans have been swept during this GC cycle.
2207 // The span's sweep extinguishes all the references to gcBitsArenas.previous
2208 // by pointing gcAllocBits into the gcBitsArenas.current.
2209 // The gcBitsArenas.previous is released to the gcBitsArenas.free list.
2210 func nextMarkBitArenaEpoch() {
2211         lock(&gcBitsArenas.lock)
2212         if gcBitsArenas.previous != nil {
2213                 if gcBitsArenas.free == nil {
2214                         gcBitsArenas.free = gcBitsArenas.previous
2215                 } else {
2216                         // Find end of previous arenas.
2217                         last := gcBitsArenas.previous
2218                         for last = gcBitsArenas.previous; last.next != nil; last = last.next {
2219                         }
2220                         last.next = gcBitsArenas.free
2221                         gcBitsArenas.free = gcBitsArenas.previous
2222                 }
2223         }
2224         gcBitsArenas.previous = gcBitsArenas.current
2225         gcBitsArenas.current = gcBitsArenas.next
2226         atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
2227         unlock(&gcBitsArenas.lock)
2228 }
2229
2230 // newArenaMayUnlock allocates and zeroes a gcBits arena.
2231 // The caller must hold gcBitsArena.lock. This may temporarily release it.
2232 func newArenaMayUnlock() *gcBitsArena {
2233         var result *gcBitsArena
2234         if gcBitsArenas.free == nil {
2235                 unlock(&gcBitsArenas.lock)
2236                 result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gcMiscSys))
2237                 if result == nil {
2238                         throw("runtime: cannot allocate memory")
2239                 }
2240                 lock(&gcBitsArenas.lock)
2241         } else {
2242                 result = gcBitsArenas.free
2243                 gcBitsArenas.free = gcBitsArenas.free.next
2244                 memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
2245         }
2246         result.next = nil
2247         // If result.bits is not 8 byte aligned adjust index so
2248         // that &result.bits[result.free] is 8 byte aligned.
2249         if unsafe.Offsetof(gcBitsArena{}.bits)&7 == 0 {
2250                 result.free = 0
2251         } else {
2252                 result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
2253         }
2254         return result
2255 }