// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Garbage collector: type and heap bitmaps.
-//
-// Stack, data, and bss bitmaps
-//
-// Stack frames and global variables in the data and bss sections are
-// described by bitmaps with 1 bit per pointer-sized word. A "1" bit
-// means the word is a live pointer to be visited by the GC (referred to
-// as "pointer"). A "0" bit means the word should be ignored by GC
-// (referred to as "scalar", though it could be a dead pointer value).
-//
-// Heap bitmap
-//
-// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
-// stored in the heapArena metadata backing each heap arena.
-// That is, if ha is the heapArena for the arena starting a start,
-// then ha.bitmap[0] holds the 2-bit entries for the four words start
-// through start+3*ptrSize, ha.bitmap[1] holds the entries for
-// start+4*ptrSize through start+7*ptrSize, and so on.
-//
-// In each 2-bit entry, the lower bit is a pointer/scalar bit, just
-// like in the stack/data bitmaps described above. The upper bit
-// indicates scan/dead: a "1" value ("scan") indicates that there may
-// be pointers in later words of the allocation, and a "0" value
-// ("dead") indicates there are no more pointers in the allocation. If
-// the upper bit is 0, the lower bit must also be 0, and this
-// indicates scanning can ignore the rest of the allocation.
-//
-// The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 high (scan) bits and the bottom half contains 4 low
-// (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to
-// keep the pointer bits contiguous, instead of having to space them out.
-//
-// The code makes use of the fact that the zero value for a heap
-// bitmap means scalar/dead. This property must be preserved when
-// modifying the encoding.
-//
-// The bitmap for noscan spans is not maintained. Code must ensure
-// that an object is scannable before consulting its bitmap by
-// checking either the noscan bit in the span or by consulting its
-// type's information.
-
package runtime
import (
"unsafe"
)
-const (
- bitPointer = 1 << 0
- bitScan = 1 << 4
-
- heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
- wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
-
- // all scan/pointer bits in a byte
- bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
- bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
-)
-
// addb returns the byte pointer p+n.
+//
//go:nowritebarrier
//go:nosplit
func addb(p *byte, n uintptr) *byte {
}
// subtractb returns the byte pointer p-n.
+//
//go:nowritebarrier
//go:nosplit
func subtractb(p *byte, n uintptr) *byte {
}
// add1 returns the byte pointer p+1.
+//
//go:nowritebarrier
//go:nosplit
func add1(p *byte) *byte {
}
// subtract1 returns the byte pointer p-1.
-//go:nowritebarrier
//
// nosplit because it is used during write barriers and must not be preempted.
+//
+//go:nowritebarrier
//go:nosplit
func subtract1(p *byte) *byte {
// Note: wrote out full expression instead of calling subtractb(p, 1)
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
-// heapBits provides access to the bitmap bits for a single heap word.
-// The methods on heapBits take value receivers so that the compiler
-// can more easily inline calls to those methods and registerize the
-// struct fields independently.
-type heapBits struct {
- bitp *uint8
- shift uint32
- arena uint32 // Index of heap arena containing bitp
- last *uint8 // Last byte arena's bitmap
-}
-
-// Make the compiler check that heapBits.arena is large enough to hold
-// the maximum arena frame number.
-var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}
-
// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
// and negates them so that ctz (count trailing zeros) instructions
// can be used. It then places these 8 bytes into the cached 64 bit
// s.allocCache.
-func (s *mspan) refillAllocCache(whichByte uintptr) {
- bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(whichByte)))
+func (s *mspan) refillAllocCache(whichByte uint16) {
+ bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(uintptr(whichByte))))
aCache := uint64(0)
aCache |= uint64(bytes[0])
aCache |= uint64(bytes[1]) << (1 * 8)
// or after s.freeindex.
// There are hardware instructions that can be used to make this
// faster if profiling warrants it.
-func (s *mspan) nextFreeIndex() uintptr {
+func (s *mspan) nextFreeIndex() uint16 {
sfreeindex := s.freeindex
snelems := s.nelems
if sfreeindex == snelems {
aCache := s.allocCache
- bitIndex := sys.Ctz64(aCache)
+ bitIndex := sys.TrailingZeros64(aCache)
for bitIndex == 64 {
// Move index to start of next cached bits.
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
// Refill s.allocCache with the next 64 alloc bits.
s.refillAllocCache(whichByte)
aCache = s.allocCache
- bitIndex = sys.Ctz64(aCache)
+ bitIndex = sys.TrailingZeros64(aCache)
// nothing available in cached bits
// grab the next 8 bytes and try again.
}
- result := sfreeindex + uintptr(bitIndex)
+ result := sfreeindex + uint16(bitIndex)
if result >= snelems {
s.freeindex = snelems
return snelems
// been no preemption points since ensuring this (which could allow a
// GC transition, which would allow the state to change).
func (s *mspan) isFree(index uintptr) bool {
- if index < s.freeindex {
+ if index < uintptr(s.freeIndexForScan) {
return false
}
bytep, mask := s.allocBits.bitp(index)
// n must be within [0, s.npages*_PageSize),
// or may be exactly s.npages*_PageSize
// if s.elemsize is from sizeclasses.go.
+//
+// nosplit, because it is called by objIndex, which is nosplit
+//
+//go:nosplit
func (s *mspan) divideByElemSize(n uintptr) uintptr {
const doubleCheck = false
return q
}
+// nosplit, because it is called by other nosplit code like findObject
+//
+//go:nosplit
func (s *mspan) objIndex(p uintptr) uintptr {
return s.divideByElemSize(p - s.base())
}
}
func (s *mspan) markBitsForBase() markBits {
- return markBits{(*uint8)(s.gcmarkBits), uint8(1), 0}
+ return markBits{&s.gcmarkBits.x, uint8(1), 0}
}
// isMarked reports whether mark bit m is set.
m.index++
}
-// heapBitsForAddr returns the heapBits for the address addr.
-// The caller must ensure addr is in an allocated span.
-// In particular, be careful not to point past the end of an object.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//go:nosplit
-func heapBitsForAddr(addr uintptr) (h heapBits) {
- // 2 bits per word, 4 pairs per byte, and a mask is hard coded.
- arena := arenaIndex(addr)
- ha := mheap_.arenas[arena.l1()][arena.l2()]
- // The compiler uses a load for nil checking ha, but in this
- // case we'll almost never hit that cache line again, so it
- // makes more sense to do a value check.
- if ha == nil {
- // addr is not in the heap. Return nil heapBits, which
- // we expect to crash in the caller.
- return
- }
- h.bitp = &ha.bitmap[(addr/(goarch.PtrSize*4))%heapArenaBitmapBytes]
- h.shift = uint32((addr / goarch.PtrSize) & 3)
- h.arena = uint32(arena)
- h.last = &ha.bitmap[len(ha.bitmap)-1]
- return
-}
-
// clobberdeadPtr is a special value that is used by the compiler to
// clobber dead stack slots, when -clobberdead flag is set.
const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))
//
// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
// Since p is a uintptr, it would not be adjusted if the stack were to move.
+//
//go:nosplit
func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) {
s = spanOf(p)
return
}
-// next returns the heapBits describing the next pointer-sized word in memory.
-// That is, if h describes address p, h.next() describes p+ptrSize.
-// Note that next does not modify h. The caller must record the result.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//go:nosplit
-func (h heapBits) next() heapBits {
- if h.shift < 3*heapBitsShift {
- h.shift += heapBitsShift
- } else if h.bitp != h.last {
- h.bitp, h.shift = add1(h.bitp), 0
- } else {
- // Move to the next arena.
- return h.nextArena()
- }
- return h
-}
-
-// nextArena advances h to the beginning of the next heap arena.
-//
-// This is a slow-path helper to next. gc's inliner knows that
-// heapBits.next can be inlined even though it calls this. This is
-// marked noinline so it doesn't get inlined into next and cause next
-// to be too big to inline.
+// reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
//
-//go:nosplit
-//go:noinline
-func (h heapBits) nextArena() heapBits {
- h.arena++
- ai := arenaIdx(h.arena)
- l2 := mheap_.arenas[ai.l1()]
- if l2 == nil {
- // We just passed the end of the object, which
- // was also the end of the heap. Poison h. It
- // should never be dereferenced at this point.
- return heapBits{}
- }
- ha := l2[ai.l2()]
- if ha == nil {
- return heapBits{}
- }
- h.bitp, h.shift = &ha.bitmap[0], 0
- h.last = &ha.bitmap[len(ha.bitmap)-1]
- return h
-}
-
-// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
-// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
-// h.forward(1) is equivalent to h.next(), just slower.
-// Note that forward does not modify h. The caller must record the result.
-// bits returns the heap bits for the current word.
-//go:nosplit
-func (h heapBits) forward(n uintptr) heapBits {
- n += uintptr(h.shift) / heapBitsShift
- nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
- h.shift = uint32(n%4) * heapBitsShift
- if nbitp <= uintptr(unsafe.Pointer(h.last)) {
- h.bitp = (*uint8)(unsafe.Pointer(nbitp))
- return h
- }
-
- // We're in a new heap arena.
- past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
- h.arena += 1 + uint32(past/heapArenaBitmapBytes)
- ai := arenaIdx(h.arena)
- if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
- a := l2[ai.l2()]
- h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
- h.last = &a.bitmap[len(a.bitmap)-1]
- } else {
- h.bitp, h.last = nil, nil
- }
- return h
-}
-
-// forwardOrBoundary is like forward, but stops at boundaries between
-// contiguous sections of the bitmap. It returns the number of words
-// advanced over, which will be <= n.
-func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
- maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
- if n > maxn {
- n = maxn
- }
- return h.forward(n), n
-}
-
-// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
-// The result includes in its higher bits the bits for subsequent words
-// described by the same bitmap byte.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//go:nosplit
-func (h heapBits) bits() uint32 {
- // The (shift & 31) eliminates a test and conditional branch
- // from the generated code.
- return uint32(*h.bitp) >> (h.shift & 31)
-}
-
-// morePointers reports whether this word and all remaining words in this object
-// are scalars.
-// h must not describe the second word of the object.
-func (h heapBits) morePointers() bool {
- return h.bits()&bitScan != 0
-}
-
-// isPointer reports whether the heap bits describe a pointer word.
-//
-// nosplit because it is used during write barriers and must not be preempted.
-//go:nosplit
-func (h heapBits) isPointer() bool {
- return h.bits()&bitPointer != 0
-}
-
-// bulkBarrierPreWrite executes a write barrier
-// for every pointer slot in the memory range [src, src+size),
-// using pointer/scalar information from [dst, dst+size).
-// This executes the write barriers necessary before a memmove.
-// src, dst, and size must be pointer-aligned.
-// The range [dst, dst+size) must lie within a single object.
-// It does not perform the actual writes.
-//
-// As a special case, src == 0 indicates that this is being used for a
-// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
-// barrier.
-//
-// Callers should call bulkBarrierPreWrite immediately before
-// calling memmove(dst, src, size). This function is marked nosplit
-// to avoid being preempted; the GC must not stop the goroutine
-// between the memmove and the execution of the barriers.
-// The caller is also responsible for cgo pointer checks if this
-// may be writing Go pointers into non-Go memory.
-//
-// The pointer bitmap is not maintained for allocations containing
-// no pointers at all; any caller of bulkBarrierPreWrite must first
-// make sure the underlying allocation contains pointers, usually
-// by checking typ.ptrdata.
-//
-// Callers must perform cgo checks if writeBarrier.cgo.
-//
-//go:nosplit
-func bulkBarrierPreWrite(dst, src, size uintptr) {
- if (dst|src|size)&(goarch.PtrSize-1) != 0 {
- throw("bulkBarrierPreWrite: unaligned arguments")
- }
- if !writeBarrier.needed {
- return
- }
- if s := spanOf(dst); s == nil {
- // If dst is a global, use the data or BSS bitmaps to
- // execute write barriers.
- for _, datap := range activeModules() {
- if datap.data <= dst && dst < datap.edata {
- bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata)
- return
- }
- }
- for _, datap := range activeModules() {
- if datap.bss <= dst && dst < datap.ebss {
- bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata)
- return
- }
- }
- return
- } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst {
- // dst was heap memory at some point, but isn't now.
- // It can't be a global. It must be either our stack,
- // or in the case of direct channel sends, it could be
- // another stack. Either way, no need for barriers.
- // This will also catch if dst is in a freed span,
- // though that should never have.
- return
- }
-
- buf := &getg().m.p.ptr().wbBuf
- h := heapBitsForAddr(dst)
- if src == 0 {
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- dstx := (*uintptr)(unsafe.Pointer(dst + i))
- if !buf.putFast(*dstx, 0) {
- wbBufFlush(nil, 0)
- }
- }
- h = h.next()
- }
- } else {
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- dstx := (*uintptr)(unsafe.Pointer(dst + i))
- srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush(nil, 0)
- }
- }
- h = h.next()
- }
- }
+//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
+func reflect_verifyNotInHeapPtr(p uintptr) bool {
+ // Conversion to a pointer is ok as long as findObject above does not call badPointer.
+ // Since we're already promised that p doesn't point into the heap, just disallow heap
+ // pointers and the special clobbered pointer.
+ return spanOf(p) == nil && p != clobberdeadPtr
}
-// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but
-// does not execute write barriers for [dst, dst+size).
-//
-// In addition to the requirements of bulkBarrierPreWrite
-// callers need to ensure [dst, dst+size) is zeroed.
-//
-// This is used for special cases where e.g. dst was just
-// created and zeroed with malloc.
-//go:nosplit
-func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
- if (dst|src|size)&(goarch.PtrSize-1) != 0 {
- throw("bulkBarrierPreWrite: unaligned arguments")
- }
- if !writeBarrier.needed {
- return
- }
- buf := &getg().m.p.ptr().wbBuf
- h := heapBitsForAddr(dst)
- for i := uintptr(0); i < size; i += goarch.PtrSize {
- if h.isPointer() {
- srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(0, *srcx) {
- wbBufFlush(nil, 0)
- }
- }
- h = h.next()
- }
-}
+const ptrBits = 8 * goarch.PtrSize
// bulkBarrierBitmap executes write barriers for copying from [src,
// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
- if !buf.putFast(*dstx, 0) {
- wbBufFlush(nil, 0)
- }
+ p := buf.get1()
+ p[0] = *dstx
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush(nil, 0)
- }
+ p := buf.get2()
+ p[0] = *dstx
+ p[1] = *srcx
}
}
mask <<= 1
// Must not be preempted because it typically runs right before memmove,
// and the GC must observe them as an atomic action.
//
-// Callers must perform cgo checks if writeBarrier.cgo.
+// Callers must perform cgo checks if goexperiment.CgoCheck2.
//
//go:nosplit
func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
if typ == nil {
throw("runtime: typeBitsBulkBarrier without type")
}
- if typ.size != size {
- println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
+ if typ.Size_ != size {
+ println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size)
throw("runtime: invalid typeBitsBulkBarrier")
}
- if typ.kind&kindGCProg != 0 {
- println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
+ if typ.Kind_&kindGCProg != 0 {
+ println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog")
throw("runtime: invalid typeBitsBulkBarrier")
}
- if !writeBarrier.needed {
+ if !writeBarrier.enabled {
return
}
- ptrmask := typ.gcdata
+ ptrmask := typ.GCData
buf := &getg().m.p.ptr().wbBuf
var bits uint32
- for i := uintptr(0); i < typ.ptrdata; i += goarch.PtrSize {
+ for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize {
if i&(goarch.PtrSize*8-1) == 0 {
bits = uint32(*ptrmask)
ptrmask = addb(ptrmask, 1)
if bits&1 != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush(nil, 0)
- }
- }
- }
-}
-
-// The methods operating on spans all require that h has been returned
-// by heapBitsForSpan and that size, n, total are the span layout description
-// returned by the mspan's layout method.
-// If total > size*n, it means that there is extra leftover memory in the span,
-// usually due to rounding.
-//
-// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
-
-// initSpan initializes the heap bitmap for a span.
-// If this is a span of pointer-sized objects, it initializes all
-// words to pointer/scan.
-// Otherwise, it initializes all words to scalar/dead.
-func (h heapBits) initSpan(s *mspan) {
- // Clear bits corresponding to objects.
- nw := (s.npages << _PageShift) / goarch.PtrSize
- if nw%wordsPerBitmapByte != 0 {
- throw("initSpan: unaligned length")
- }
- if h.shift != 0 {
- throw("initSpan: unaligned base")
- }
- isPtrs := goarch.PtrSize == 8 && s.elemsize == goarch.PtrSize
- for nw > 0 {
- hNext, anw := h.forwardOrBoundary(nw)
- nbyte := anw / wordsPerBitmapByte
- if isPtrs {
- bitp := h.bitp
- for i := uintptr(0); i < nbyte; i++ {
- *bitp = bitPointerAll | bitScanAll
- bitp = add1(bitp)
- }
- } else {
- memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte)
+ p := buf.get2()
+ p[0] = *dstx
+ p[1] = *srcx
}
- h = hNext
- nw -= anw
}
}
// scanning the allocation bitmap.
func (s *mspan) countAlloc() int {
count := 0
- bytes := divRoundUp(s.nelems, 8)
+ bytes := divRoundUp(uintptr(s.nelems), 8)
// Iterate over each 8-byte chunk and count allocations
// with an intrinsic. Note that newMarkBits guarantees that
// gcmarkBits will be 8-byte aligned, so we don't have to
return count
}
-// heapBitsSetType records that the new allocation [x, x+size)
-// holds in [x, x+dataSize) one or more values of type typ.
-// (The number of values is given by dataSize / typ.size.)
-// If dataSize < size, the fragment [x+dataSize, x+size) is
-// recorded as non-pointer data.
-// It is known that the type has pointers somewhere;
-// malloc does not call heapBitsSetType when there are no pointers,
-// because all free objects are marked as noscan during
-// heapBitsSweepSpan.
-//
-// There can only be one allocation from a given span active at a time,
-// and the bitmap for a span always falls on byte boundaries,
-// so there are no write-write races for access to the heap bitmap.
-// Hence, heapBitsSetType can access the bitmap without atomics.
-//
-// There can be read-write races between heapBitsSetType and things
-// that read the heap bitmap like scanobject. However, since
-// heapBitsSetType is only used for objects that have not yet been
-// made reachable, readers will ignore bits being modified by this
-// function. This does mean this function cannot transiently modify
-// bits that belong to neighboring objects. Also, on weakly-ordered
-// machines, callers must execute a store/store (publication) barrier
-// between calling this function and making the object reachable.
-func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
- const doubleCheck = false // slow but helpful; enable to test modifications to this code
-
- const (
- mask1 = bitPointer | bitScan // 00010001
- mask2 = bitPointer | bitScan | mask1<<heapBitsShift // 00110011
- mask3 = bitPointer | bitScan | mask2<<heapBitsShift // 01110111
- )
-
- // dataSize is always size rounded up to the next malloc size class,
- // except in the case of allocating a defer block, in which case
- // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
- // arbitrarily larger.
- //
- // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
- // assume that dataSize == size without checking it explicitly.
-
- if goarch.PtrSize == 8 && size == goarch.PtrSize {
- // It's one word and it has pointers, it must be a pointer.
- // Since all allocated one-word objects are pointers
- // (non-pointers are aggregated into tinySize allocations),
- // initSpan sets the pointer bits for us. Nothing to do here.
- if doubleCheck {
- h := heapBitsForAddr(x)
- if !h.isPointer() {
- throw("heapBitsSetType: pointer bit missing")
- }
- if !h.morePointers() {
- throw("heapBitsSetType: scan bit missing")
- }
- }
- return
- }
-
- h := heapBitsForAddr(x)
- ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
-
- // 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.
- // Therefore, these objects share a heap bitmap byte with the objects next to them.
- // These are called out as a special case primarily so the code below can assume all
- // objects are at least 4 words long and that their bitmaps start either at the beginning
- // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
-
- if size == 2*goarch.PtrSize {
- if typ.size == goarch.PtrSize {
- // We're allocating a block big enough to hold two pointers.
- // On 64-bit, that means the actual object must be two pointers,
- // or else we'd have used the one-pointer-sized block.
- // On 32-bit, however, this is the 8-byte block, the smallest one.
- // So it could be that we're allocating one pointer and this was
- // just the smallest block available. Distinguish by checking dataSize.
- // (In general the number of instances of typ being allocated is
- // dataSize/typ.size.)
- if goarch.PtrSize == 4 && dataSize == goarch.PtrSize {
- // 1 pointer object. On 32-bit machines clear the bit for the
- // unused second word.
- *h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
- *h.bitp |= (bitPointer | bitScan) << h.shift
- } else {
- // 2-element array of pointer.
- *h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
- }
- return
- }
- // Otherwise typ.size must be 2*sys.PtrSize,
- // and typ.kind&kindGCProg == 0.
- if doubleCheck {
- if typ.size != 2*goarch.PtrSize || typ.kind&kindGCProg != 0 {
- print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
- throw("heapBitsSetType")
- }
- }
- b := uint32(*ptrmask)
- hb := b & 3
- hb |= bitScanAll & ((bitScan << (typ.ptrdata / goarch.PtrSize)) - 1)
- // Clear the bits for this object so we can set the
- // appropriate ones.
- *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
- *h.bitp |= uint8(hb << h.shift)
- return
- } else if size == 3*goarch.PtrSize {
- b := uint8(*ptrmask)
- if doubleCheck {
- if b == 0 {
- println("runtime: invalid type ", typ.string())
- throw("heapBitsSetType: called with non-pointer type")
- }
- if goarch.PtrSize != 8 {
- throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
- }
- if typ.kind&kindGCProg != 0 {
- throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
- }
- if typ.size == 2*goarch.PtrSize {
- print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n")
- throw("heapBitsSetType: inconsistent object sizes")
- }
- }
- if typ.size == goarch.PtrSize {
- // The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
- // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
- if doubleCheck && *typ.gcdata != 1 {
- print("runtime: heapBitsSetType size=", size, " typ.size=", typ.size, "but *typ.gcdata", *typ.gcdata, "\n")
- throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")
- }
- // 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.
- b = 7
- }
-
- hb := b & 7
- // Set bitScan bits for all pointers.
- hb |= hb << wordsPerBitmapByte
- // First bitScan bit is always set since the type contains pointers.
- hb |= bitScan
- // Second bitScan bit needs to also be set if the third bitScan bit is set.
- hb |= hb & (bitScan << (2 * heapBitsShift)) >> 1
-
- // For h.shift > 1 heap bits cross a byte boundary and need to be written part
- // to h.bitp and part to the next h.bitp.
- switch h.shift {
- case 0:
- *h.bitp &^= mask3 << 0
- *h.bitp |= hb << 0
- case 1:
- *h.bitp &^= mask3 << 1
- *h.bitp |= hb << 1
- case 2:
- *h.bitp &^= mask2 << 2
- *h.bitp |= (hb & mask2) << 2
- // Two words written to the first byte.
- // Advance two words to get to the next byte.
- h = h.next().next()
- *h.bitp &^= mask1
- *h.bitp |= (hb >> 2) & mask1
- case 3:
- *h.bitp &^= mask1 << 3
- *h.bitp |= (hb & mask1) << 3
- // One word written to the first byte.
- // Advance one word to get to the next byte.
- h = h.next()
- *h.bitp &^= mask2
- *h.bitp |= (hb >> 1) & mask2
- }
- return
- }
-
- // Copy from 1-bit ptrmask into 2-bit bitmap.
- // The basic approach is to use a single uintptr as a bit buffer,
- // alternating between reloading the buffer and writing bitmap bytes.
- // In general, one load can supply two bitmap byte writes.
- // This is a lot of lines of code, but it compiles into relatively few
- // machine instructions.
-
- outOfPlace := false
- if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) {
- // This object spans heap arenas, so the bitmap may be
- // discontiguous. Unroll it into the object instead
- // and then copy it out.
- //
- // In doubleCheck mode, we randomly do this anyway to
- // stress test the bitmap copying path.
- outOfPlace = true
- h.bitp = (*uint8)(unsafe.Pointer(x))
- h.last = nil
- }
-
- var (
- // Ptrmask input.
- p *byte // last ptrmask byte read
- b uintptr // ptrmask bits already loaded
- nb uintptr // number of bits in b at next read
- endp *byte // final ptrmask byte to read (then repeat)
- endnb uintptr // number of valid bits in *endp
- pbits uintptr // alternate source of bits
-
- // Heap bitmap output.
- w uintptr // words processed
- nw uintptr // number of words to process
- hbitp *byte // next heap bitmap byte to write
- hb uintptr // bits being prepared for *hbitp
- )
-
- hbitp = h.bitp
-
- // Handle GC program. Delayed until this part of the code
- // so that we can use the same double-checking mechanism
- // as the 1-bit case. Nothing above could have encountered
- // GC programs: the cases were all too small.
- if typ.kind&kindGCProg != 0 {
- heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
- if doubleCheck {
- // Double-check the heap bits written by GC program
- // by running the GC program to create a 1-bit pointer mask
- // and then jumping to the double-check code below.
- // This doesn't catch bugs shared between the 1-bit and 4-bit
- // GC program execution, but it does catch mistakes specific
- // to just one of those and bugs in heapBitsSetTypeGCProg's
- // implementation of arrays.
- lock(&debugPtrmask.lock)
- if debugPtrmask.data == nil {
- debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
- }
- ptrmask = debugPtrmask.data
- runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
- }
- goto Phase4
- }
-
- // Note about sizes:
- //
- // typ.size is the number of words in the object,
- // and typ.ptrdata is the number of words in the prefix
- // of the object that contains pointers. That is, the final
- // typ.size - typ.ptrdata words contain no pointers.
- // This allows optimization of a common pattern where
- // an object has a small header followed by a large scalar
- // buffer. If we know the pointers are over, we don't have
- // to scan the buffer's heap bitmap at all.
- // The 1-bit ptrmasks are sized to contain only bits for
- // the typ.ptrdata prefix, zero padded out to a full byte
- // of bitmap. This code sets nw (below) so that heap bitmap
- // bits are only written for the typ.ptrdata prefix; if there is
- // more room in the allocated object, the next heap bitmap
- // entry is a 00, indicating that there are no more pointers
- // to scan. So only the ptrmask for the ptrdata bytes is needed.
- //
- // Replicated copies are not as nice: if there is an array of
- // objects with scalar tails, all but the last tail does have to
- // be initialized, because there is no way to say "skip forward".
- // However, because of the possibility of a repeated type with
- // size not a multiple of 4 pointers (one heap bitmap byte),
- // the code already must handle the last ptrmask byte specially
- // by treating it as containing only the bits for endnb pointers,
- // where endnb <= 4. We represent large scalar tails that must
- // be expanded in the replication by setting endnb larger than 4.
- // This will have the effect of reading many bits out of b,
- // but once the real bits are shifted out, b will supply as many
- // zero bits as we try to read, which is exactly what we need.
-
- p = ptrmask
- if typ.size < dataSize {
- // Filling in bits for an array of typ.
- // Set up for repetition of ptrmask during main loop.
- // Note that ptrmask describes only a prefix of
- const maxBits = goarch.PtrSize*8 - 7
- if typ.ptrdata/goarch.PtrSize <= maxBits {
- // Entire ptrmask fits in uintptr with room for a byte fragment.
- // Load into pbits and never read from ptrmask again.
- // This is especially important when the ptrmask has
- // fewer than 8 bits in it; otherwise the reload in the middle
- // of the Phase 2 loop would itself need to loop to gather
- // at least 8 bits.
-
- // Accumulate ptrmask into b.
- // ptrmask is sized to describe only typ.ptrdata, but we record
- // it as describing typ.size bytes, since all the high bits are zero.
- nb = typ.ptrdata / goarch.PtrSize
- for i := uintptr(0); i < nb; i += 8 {
- b |= uintptr(*p) << i
- p = add1(p)
- }
- nb = typ.size / goarch.PtrSize
-
- // Replicate ptrmask to fill entire pbits uintptr.
- // Doubling and truncating is fewer steps than
- // iterating by nb each time. (nb could be 1.)
- // Since we loaded typ.ptrdata/sys.PtrSize bits
- // but are pretending to have typ.size/sys.PtrSize,
- // there might be no replication necessary/possible.
- pbits = b
- endnb = nb
- if nb+nb <= maxBits {
- for endnb <= goarch.PtrSize*8 {
- pbits |= pbits << endnb
- endnb += endnb
- }
- // Truncate to a multiple of original ptrmask.
- // Because nb+nb <= maxBits, nb fits in a byte.
- // Byte division is cheaper than uintptr division.
- endnb = uintptr(maxBits/byte(nb)) * nb
- pbits &= 1<<endnb - 1
- b = pbits
- nb = endnb
- }
-
- // Clear p and endp as sentinel for using pbits.
- // Checked during Phase 2 loop.
- p = nil
- endp = nil
- } else {
- // Ptrmask is larger. Read it multiple times.
- n := (typ.ptrdata/goarch.PtrSize+7)/8 - 1
- endp = addb(ptrmask, n)
- endnb = typ.size/goarch.PtrSize - n*8
- }
- }
- if p != nil {
- b = uintptr(*p)
- p = add1(p)
- nb = 8
- }
-
- if typ.size == dataSize {
- // Single entry: can stop once we reach the non-pointer data.
- nw = typ.ptrdata / goarch.PtrSize
- } else {
- // Repeated instances of typ in an array.
- // Have to process first N-1 entries in full, but can stop
- // once we reach the non-pointer data in the final entry.
- nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / goarch.PtrSize
- }
- if nw == 0 {
- // No pointers! Caller was supposed to check.
- println("runtime: invalid type ", typ.string())
- throw("heapBitsSetType: called with non-pointer type")
- return
- }
-
- // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
- // The leading byte is special because it contains the bits for word 1,
- // which does not have the scan bit set.
- // The leading half-byte is special because it's a half a byte,
- // so we have to be careful with the bits already there.
- switch {
- default:
- throw("heapBitsSetType: unexpected shift")
-
- case h.shift == 0:
- // Ptrmask and heap bitmap are aligned.
- //
- // This is a fast path for small objects.
- //
- // The first byte we write out covers the first four
- // words of the object. The scan/dead bit on the first
- // word must be set to scan since there are pointers
- // somewhere in the object.
- // In all following words, we set the scan/dead
- // appropriately to indicate that the object continues
- // to the next 2-bit entry in the bitmap.
- //
- // We set four bits at a time here, but if the object
- // is fewer than four words, phase 3 will clear
- // unnecessary bits.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- goto Phase3
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
- nb -= 4
-
- case h.shift == 2:
- // Ptrmask and heap bitmap are misaligned.
- //
- // On 32 bit architectures only the 6-word object that corresponds
- // to a 24 bytes size class can start with h.shift of 2 here since
- // all other non 16 byte aligned size classes have been handled by
- // special code paths at the beginning of heapBitsSetType on 32 bit.
- //
- // Many size classes are only 16 byte aligned. On 64 bit architectures
- // this results in a heap bitmap position starting with a h.shift of 2.
- //
- // The bits for the first two words are in a byte shared
- // with another object, so we must be careful with the bits
- // already there.
- //
- // We took care of 1-word, 2-word, and 3-word objects above,
- // so this is at least a 6-word object.
- hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
- hb |= bitScan << (2 * heapBitsShift)
- if nw > 1 {
- hb |= bitScan << (3 * heapBitsShift)
- }
- b >>= 2
- nb -= 2
- *hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift))
- *hbitp |= uint8(hb)
- hbitp = add1(hbitp)
- if w += 2; w >= nw {
- // We know that there is more data, because we handled 2-word and 3-word objects above.
- // This must be at least a 6-word object. If we're out of pointer words,
- // mark no scan in next bitmap byte and finish.
- hb = 0
- w += 4
- goto Phase3
- }
- }
-
- // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
- // The loop computes the bits for that last write but does not execute the write;
- // it leaves the bits in hb for processing by phase 3.
- // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
- // use in the first half of the loop right now, and then we only adjust nb explicitly
- // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
- nb -= 4
- for {
- // Emit bitmap byte.
- // b has at least nb+4 bits, with one exception:
- // if w+4 >= nw, then b has only nw-w bits,
- // but we'll stop at the break and then truncate
- // appropriately in Phase 3.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- break
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
-
- // Load more bits. b has nb right now.
- if p != endp {
- // Fast path: keep reading from ptrmask.
- // nb unmodified: we just loaded 8 bits,
- // and the next iteration will consume 8 bits,
- // leaving us with the same nb the next time we're here.
- if nb < 8 {
- b |= uintptr(*p) << nb
- p = add1(p)
- } else {
- // Reduce the number of bits in b.
- // This is important if we skipped
- // over a scalar tail, since nb could
- // be larger than the bit width of b.
- nb -= 8
- }
- } else if p == nil {
- // Almost as fast path: track bit count and refill from pbits.
- // For short repetitions.
- if nb < 8 {
- b |= pbits << nb
- nb += endnb
- }
- nb -= 8 // for next iteration
- } else {
- // Slow path: reached end of ptrmask.
- // Process final partial byte and rewind to start.
- b |= uintptr(*p) << nb
- nb += endnb
- if nb < 8 {
- b |= uintptr(*ptrmask) << nb
- p = add1(ptrmask)
- } else {
- nb -= 8
- p = ptrmask
- }
- }
-
- // Emit bitmap byte.
- hb = b & bitPointerAll
- hb |= bitScanAll
- if w += 4; w >= nw {
- break
- }
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- b >>= 4
- }
-
-Phase3:
- // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
- if w > nw {
- // Counting the 4 entries in hb not yet written to memory,
- // there are more entries than possible pointer slots.
- // Discard the excess entries (can't be more than 3).
- mask := uintptr(1)<<(4-(w-nw)) - 1
- hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits
- }
-
- // Change nw from counting possibly-pointer words to total words in allocation.
- nw = size / goarch.PtrSize
-
- // Write whole bitmap bytes.
- // The first is hb, the rest are zero.
- if w <= nw {
- *hbitp = uint8(hb)
- hbitp = add1(hbitp)
- hb = 0 // for possible final half-byte below
- for w += 4; w <= nw; w += 4 {
- *hbitp = 0
- hbitp = add1(hbitp)
- }
- }
-
- // Write final partial bitmap byte if any.
- // We know w > nw, or else we'd still be in the loop above.
- // It can be bigger only due to the 4 entries in hb that it counts.
- // If w == nw+4 then there's nothing left to do: we wrote all nw entries
- // and can discard the 4 sitting in hb.
- // But if w == nw+2, we need to write first two in hb.
- // The byte is shared with the next object, so be careful with
- // existing bits.
- if w == nw+2 {
- *hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb)
- }
-
-Phase4:
- // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
- if outOfPlace {
- // TODO: We could probably make this faster by
- // handling [x+dataSize, x+size) specially.
- h := heapBitsForAddr(x)
- // cnw is the number of heap words, or bit pairs
- // remaining (like nw above).
- cnw := size / goarch.PtrSize
- src := (*uint8)(unsafe.Pointer(x))
- // We know the first and last byte of the bitmap are
- // not the same, but it's still possible for small
- // objects span arenas, so it may share bitmap bytes
- // with neighboring objects.
- //
- // Handle the first byte specially if it's shared. See
- // Phase 1 for why this is the only special case we need.
- if doubleCheck {
- if !(h.shift == 0 || h.shift == 2) {
- print("x=", x, " size=", size, " cnw=", h.shift, "\n")
- throw("bad start shift")
- }
- }
- if h.shift == 2 {
- *h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
- h = h.next().next()
- cnw -= 2
- src = addb(src, 1)
- }
- // We're now byte aligned. Copy out to per-arena
- // bitmaps until the last byte (which may again be
- // partial).
- for cnw >= 4 {
- // This loop processes four words at a time,
- // so round cnw down accordingly.
- hNext, words := h.forwardOrBoundary(cnw / 4 * 4)
-
- // n is the number of bitmap bytes to copy.
- n := words / 4
- memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
- cnw -= words
- h = hNext
- src = addb(src, n)
- }
- if doubleCheck && h.shift != 0 {
- print("cnw=", cnw, " h.shift=", h.shift, "\n")
- throw("bad shift after block copy")
- }
- // Handle the last byte if it's shared.
- if cnw == 2 {
- *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
- src = addb(src, 1)
- h = h.next().next()
- }
- if doubleCheck {
- if uintptr(unsafe.Pointer(src)) > x+size {
- throw("copy exceeded object size")
- }
- if !(cnw == 0 || cnw == 2) {
- print("x=", x, " size=", size, " cnw=", cnw, "\n")
- throw("bad number of remaining words")
- }
- // Set up hbitp so doubleCheck code below can check it.
- hbitp = h.bitp
- }
- // Zero the object where we wrote the bitmap.
- memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
- }
-
- // Double check the whole bitmap.
- if doubleCheck {
- // x+size may not point to the heap, so back up one
- // word and then advance it the way we do above.
- end := heapBitsForAddr(x + size - goarch.PtrSize)
- if outOfPlace {
- // In out-of-place copying, we just advance
- // using next.
- end = end.next()
- } else {
- // Don't use next because that may advance to
- // the next arena and the in-place logic
- // doesn't do that.
- end.shift += heapBitsShift
- if end.shift == 4*heapBitsShift {
- end.bitp, end.shift = add1(end.bitp), 0
- }
- }
- if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
- println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
- print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
- print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
- h0 := heapBitsForAddr(x)
- print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
- print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
- throw("bad heapBitsSetType")
- }
-
- // Double-check that bits to be written were written correctly.
- // Does not check that other bits were not written, unfortunately.
- h := heapBitsForAddr(x)
- nptr := typ.ptrdata / goarch.PtrSize
- ndata := typ.size / goarch.PtrSize
- count := dataSize / typ.size
- totalptr := ((count-1)*typ.size + typ.ptrdata) / goarch.PtrSize
- for i := uintptr(0); i < size/goarch.PtrSize; i++ {
- j := i % ndata
- var have, want uint8
- have = (*h.bitp >> h.shift) & (bitPointer | bitScan)
- if i >= totalptr {
- if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
- // heapBitsSetTypeGCProg always fills
- // in full nibbles of bitScan.
- want = bitScan
- }
- } else {
- if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
- want |= bitPointer
- }
- want |= bitScan
- }
- if have != want {
- println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
- print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
- print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
- print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
- h0 := heapBitsForAddr(x)
- print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
- print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
- print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
- println("at word", i, "offset", i*goarch.PtrSize, "have", hex(have), "want", hex(want))
- if typ.kind&kindGCProg != 0 {
- println("GC program:")
- dumpGCProg(addb(typ.gcdata, 4))
- }
- throw("bad heapBitsSetType")
- }
- h = h.next()
- }
- if ptrmask == debugPtrmask.data {
- unlock(&debugPtrmask.lock)
+// Read the bytes starting at the aligned pointer p into a uintptr.
+// Read is little-endian.
+func readUintptr(p *byte) uintptr {
+ x := *(*uintptr)(unsafe.Pointer(p))
+ if goarch.BigEndian {
+ if goarch.PtrSize == 8 {
+ return uintptr(sys.Bswap64(uint64(x)))
}
+ return uintptr(sys.Bswap32(uint32(x)))
}
+ return x
}
var debugPtrmask struct {
data *byte
}
-// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
-// progSize is the size of the memory described by the program.
-// elemSize is the size of the element that the GC program describes (a prefix of).
-// dataSize is the total size of the intended data, a multiple of elemSize.
-// allocSize is the total size of the allocated memory.
-//
-// GC programs are only used for large allocations.
-// heapBitsSetType requires that allocSize is a multiple of 4 words,
-// so that the relevant bitmap bytes are not shared with surrounding
-// objects.
-func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
- if goarch.PtrSize == 8 && allocSize%(4*goarch.PtrSize) != 0 {
- // Alignment will be wrong.
- throw("heapBitsSetTypeGCProg: small allocation")
- }
- var totalBits uintptr
- if elemSize == dataSize {
- totalBits = runGCProg(prog, nil, h.bitp, 2)
- if totalBits*goarch.PtrSize != progSize {
- println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
- throw("heapBitsSetTypeGCProg: unexpected bit count")
- }
- } else {
- count := dataSize / elemSize
-
- // Piece together program trailer to run after prog that does:
- // literal(0)
- // repeat(1, elemSize-progSize-1) // zeros to fill element size
- // repeat(elemSize, count-1) // repeat that element for count
- // This zero-pads the data remaining in the first element and then
- // repeats that first element to fill the array.
- var trailer [40]byte // 3 varints (max 10 each) + some bytes
- i := 0
- if n := elemSize/goarch.PtrSize - progSize/goarch.PtrSize; n > 0 {
- // literal(0)
- trailer[i] = 0x01
- i++
- trailer[i] = 0
- i++
- if n > 1 {
- // repeat(1, n-1)
- trailer[i] = 0x81
- i++
- n--
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- }
- }
- // repeat(elemSize/ptrSize, count-1)
- trailer[i] = 0x80
- i++
- n := elemSize / goarch.PtrSize
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- n = count - 1
- for ; n >= 0x80; n >>= 7 {
- trailer[i] = byte(n | 0x80)
- i++
- }
- trailer[i] = byte(n)
- i++
- trailer[i] = 0
- i++
-
- runGCProg(prog, &trailer[0], h.bitp, 2)
-
- // Even though we filled in the full array just now,
- // record that we only filled in up to the ptrdata of the
- // last element. This will cause the code below to
- // memclr the dead section of the final array element,
- // so that scanobject can stop early in the final element.
- totalBits = (elemSize*(count-1) + progSize) / goarch.PtrSize
- }
- endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
- endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/goarch.PtrSize/wordsPerBitmapByte))
- memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
-}
-
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
-// The resulting bitvector will have no more than size/sys.PtrSize bits.
+// The resulting bitvector will have no more than size/goarch.PtrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/goarch.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
x[len(x)-1] = 0xa1 // overflow check sentinel
- n = runGCProg(prog, nil, &x[0], 1)
+ n = runGCProg(prog, &x[0])
if x[len(x)-1] != 0xa1 {
throw("progToPointerMask: overflow")
}
// 10000000 n c: repeat the previous n bits c times; n, c are varints
// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
-// runGCProg executes the GC program prog, and then trailer if non-nil,
-// writing to dst with entries of the given size.
-// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
-// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
-// starting at dst (because the heap bitmap does). In this case, the caller guarantees
-// that only whole bytes in dst need to be written.
-//
-// runGCProg returns the number of 1- or 2-bit entries written to memory.
-func runGCProg(prog, trailer, dst *byte, size int) uintptr {
+// runGCProg returns the number of 1-bit entries written to memory.
+func runGCProg(prog, dst *byte) uintptr {
dstStart := dst
// Bits waiting to be written to memory.
// Flush accumulated full bytes.
// The rest of the loop assumes that nbits <= 7.
for ; nbits >= 8; nbits -= 8 {
- if size == 1 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- } else {
- v := bits&bitPointerAll | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- v = bits&bitPointerAll | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- }
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
}
// Process one instruction.
if inst&0x80 == 0 {
// Literal bits; n == 0 means end of program.
if n == 0 {
- // Program is over; continue in trailer if present.
- if trailer != nil {
- p = trailer
- trailer = nil
- continue
- }
+ // Program is over.
break Run
}
nbyte := n / 8
for i := uintptr(0); i < nbyte; i++ {
bits |= uintptr(*p) << nbits
p = add1(p)
- if size == 1 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- } else {
- v := bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- v = bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
- }
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
}
if n %= 8; n > 0 {
bits |= uintptr(*p) << nbits
// into a register and use that register for the entire loop
// instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler.
- // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
+ // The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow.
src := dst
npattern := nbits
// If we need more bits, fetch them from memory.
- if size == 1 {
- src = subtract1(src)
- for npattern < n {
- pattern <<= 8
- pattern |= uintptr(*src)
- src = subtract1(src)
- npattern += 8
- }
- } else {
+ src = subtract1(src)
+ for npattern < n {
+ pattern <<= 8
+ pattern |= uintptr(*src)
src = subtract1(src)
- for npattern < n {
- pattern <<= 4
- pattern |= uintptr(*src) & 0xf
- src = subtract1(src)
- npattern += 4
- }
+ npattern += 8
}
// We started with the whole bit output buffer,
for ; c >= npattern; c -= npattern {
bits |= pattern << nbits
nbits += npattern
- if size == 1 {
- for nbits >= 8 {
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- nbits -= 8
- }
- } else {
- for nbits >= 4 {
- *dst = uint8(bits&0xf | bitScanAll)
- dst = add1(dst)
- bits >>= 4
- nbits -= 4
- }
+ for nbits >= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ nbits -= 8
}
}
// Since nbits <= 7, we know the first few bytes of repeated data
// are already written to memory.
off := n - nbits // n > nbits because n > maxBits and nbits <= 7
- if size == 1 {
- // Leading src fragment.
- src = subtractb(src, (off+7)/8)
- if frag := off & 7; frag != 0 {
- bits |= uintptr(*src) >> (8 - frag) << nbits
- src = add1(src)
- nbits += frag
- c -= frag
- }
- // Main loop: load one byte, write another.
- // The bits are rotating through the bit buffer.
- for i := c / 8; i > 0; i-- {
- bits |= uintptr(*src) << nbits
- src = add1(src)
- *dst = uint8(bits)
- dst = add1(dst)
- bits >>= 8
- }
- // Final src fragment.
- if c %= 8; c > 0 {
- bits |= (uintptr(*src) & (1<<c - 1)) << nbits
- nbits += c
- }
- } else {
- // Leading src fragment.
- src = subtractb(src, (off+3)/4)
- if frag := off & 3; frag != 0 {
- bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
- src = add1(src)
- nbits += frag
- c -= frag
- }
- // Main loop: load one byte, write another.
- // The bits are rotating through the bit buffer.
- for i := c / 4; i > 0; i-- {
- bits |= (uintptr(*src) & 0xf) << nbits
- src = add1(src)
- *dst = uint8(bits&0xf | bitScanAll)
- dst = add1(dst)
- bits >>= 4
- }
- // Final src fragment.
- if c %= 4; c > 0 {
- bits |= (uintptr(*src) & (1<<c - 1)) << nbits
- nbits += c
- }
- }
- }
-
- // Write any final bits out, using full-byte writes, even for the final byte.
- var totalBits uintptr
- if size == 1 {
- totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
- nbits += -nbits & 7
- for ; nbits > 0; nbits -= 8 {
+ // Leading src fragment.
+ src = subtractb(src, (off+7)/8)
+ if frag := off & 7; frag != 0 {
+ bits |= uintptr(*src) >> (8 - frag) << nbits
+ src = add1(src)
+ nbits += frag
+ c -= frag
+ }
+ // Main loop: load one byte, write another.
+ // The bits are rotating through the bit buffer.
+ for i := c / 8; i > 0; i-- {
+ bits |= uintptr(*src) << nbits
+ src = add1(src)
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
- } else {
- totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits
- nbits += -nbits & 3
- for ; nbits > 0; nbits -= 4 {
- v := bits&0xf | bitScanAll
- *dst = uint8(v)
- dst = add1(dst)
- bits >>= 4
+ // Final src fragment.
+ if c %= 8; c > 0 {
+ bits |= (uintptr(*src) & (1<<c - 1)) << nbits
+ nbits += c
}
}
+
+ // Write any final bits out, using full-byte writes, even for the final byte.
+ totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
+ nbits += -nbits & 7
+ for ; nbits > 0; nbits -= 8 {
+ *dst = uint8(bits)
+ dst = add1(dst)
+ bits >>= 8
+ }
return totalBits
}
// Compute the number of pages needed for bitmapBytes.
pages := divRoundUp(bitmapBytes, pageSize)
s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
- runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
+ runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
return s
}
func dematerializeGCProg(s *mspan) {
// Testing.
-func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
- target := (*stkframe)(ctxt)
- if frame.sp <= target.sp && target.sp < frame.varp {
- *target = *frame
- return false
- }
- return true
-}
-
-// gcbits returns the GC type info for x, for testing.
+// reflect_gcbits returns the GC type info for x, for testing.
// The result is the bitmap entries (0 or 1), one entry per byte.
+//
//go:linkname reflect_gcbits reflect.gcbits
-func reflect_gcbits(x interface{}) []byte {
- ret := getgcmask(x)
- typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
- nptr := typ.ptrdata / goarch.PtrSize
- for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
- ret = ret[:len(ret)-1]
- }
- return ret
-}
-
-// Returns GC type info for the pointer stored in ep for testing.
-// If ep points to the stack, only static live information will be returned
-// (i.e. not for objects which are only dynamically live stack objects).
-func getgcmask(ep interface{}) (mask []byte) {
- e := *efaceOf(&ep)
- p := e.data
- t := e._type
- // data or bss
- for _, datap := range activeModules() {
- // data
- if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
- bitmap := datap.gcdatamask.bytedata
- n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- mask = make([]byte, n/goarch.PtrSize)
- for i := uintptr(0); i < n; i += goarch.PtrSize {
- off := (uintptr(p) + i - datap.data) / goarch.PtrSize
- mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
- }
- return
- }
-
- // bss
- if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
- bitmap := datap.gcbssmask.bytedata
- n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- mask = make([]byte, n/goarch.PtrSize)
- for i := uintptr(0); i < n; i += goarch.PtrSize {
- off := (uintptr(p) + i - datap.bss) / goarch.PtrSize
- mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
- }
- return
- }
- }
-
- // heap
- if base, s, _ := findObject(uintptr(p), 0, 0); base != 0 {
- hbits := heapBitsForAddr(base)
- n := s.elemsize
- mask = make([]byte, n/goarch.PtrSize)
- for i := uintptr(0); i < n; i += goarch.PtrSize {
- if hbits.isPointer() {
- mask[i/goarch.PtrSize] = 1
- }
- if !hbits.morePointers() {
- mask = mask[:i/goarch.PtrSize]
- break
- }
- hbits = hbits.next()
- }
- return
- }
-
- // stack
- if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
- var frame stkframe
- frame.sp = uintptr(p)
- _g_ := getg()
- gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
- if frame.fn.valid() {
- locals, _, _ := getStackMap(&frame, nil, false)
- if locals.n == 0 {
- return
- }
- size := uintptr(locals.n) * goarch.PtrSize
- n := (*ptrtype)(unsafe.Pointer(t)).elem.size
- mask = make([]byte, n/goarch.PtrSize)
- for i := uintptr(0); i < n; i += goarch.PtrSize {
- off := (uintptr(p) + i - frame.varp + size) / goarch.PtrSize
- mask[i/goarch.PtrSize] = locals.ptrbit(off)
- }
- }
- return
- }
-
- // otherwise, not something the GC knows about.
- // possibly read-only data, like malloc(0).
- // must not have pointers
- return
+func reflect_gcbits(x any) []byte {
+ return getgcmask(x)
}