src/runtime/mbitmap.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package runtime
   6
   7 import (
   8         "internal/goarch"
   9         "runtime/internal/atomic"
  10         "runtime/internal/sys"
  11         "unsafe"
  12 )
  13
  14 // addb returns the byte pointer p+n.
  15 //
  16 //go:nowritebarrier
  17 //go:nosplit
  18 func addb(p *byte, n uintptr) *byte {
  19         // Note: wrote out full expression instead of calling add(p, n)
  20         // to reduce the number of temporaries generated by the
  21         // compiler for this trivial expression during inlining.
  22         return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
  23 }
  24
  25 // subtractb returns the byte pointer p-n.
  26 //
  27 //go:nowritebarrier
  28 //go:nosplit
  29 func subtractb(p *byte, n uintptr) *byte {
  30         // Note: wrote out full expression instead of calling add(p, -n)
  31         // to reduce the number of temporaries generated by the
  32         // compiler for this trivial expression during inlining.
  33         return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
  34 }
  35
  36 // add1 returns the byte pointer p+1.
  37 //
  38 //go:nowritebarrier
  39 //go:nosplit
  40 func add1(p *byte) *byte {
  41         // Note: wrote out full expression instead of calling addb(p, 1)
  42         // to reduce the number of temporaries generated by the
  43         // compiler for this trivial expression during inlining.
  44         return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
  45 }
  46
  47 // subtract1 returns the byte pointer p-1.
  48 //
  49 // nosplit because it is used during write barriers and must not be preempted.
  50 //
  51 //go:nowritebarrier
  52 //go:nosplit
  53 func subtract1(p *byte) *byte {
  54         // Note: wrote out full expression instead of calling subtractb(p, 1)
  55         // to reduce the number of temporaries generated by the
  56         // compiler for this trivial expression during inlining.
  57         return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
  58 }
  59
  60 // markBits provides access to the mark bit for an object in the heap.
  61 // bytep points to the byte holding the mark bit.
  62 // mask is a byte with a single bit set that can be &ed with *bytep
  63 // to see if the bit has been set.
  64 // *m.byte&m.mask != 0 indicates the mark bit is set.
  65 // index can be used along with span information to generate
  66 // the address of the object in the heap.
  67 // We maintain one set of mark bits for allocation and one for
  68 // marking purposes.
  69 type markBits struct {
  70         bytep *uint8
  71         mask  uint8
  72         index uintptr
  73 }
  74
  75 //go:nosplit
  76 func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
  77         bytep, mask := s.allocBits.bitp(allocBitIndex)
  78         return markBits{bytep, mask, allocBitIndex}
  79 }
  80
  81 // refillAllocCache takes 8 bytes s.allocBits starting at whichByte
  82 // and negates them so that ctz (count trailing zeros) instructions
  83 // can be used. It then places these 8 bytes into the cached 64 bit
  84 // s.allocCache.
  85 func (s *mspan) refillAllocCache(whichByte uint16) {
  86         bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(uintptr(whichByte))))
  87         aCache := uint64(0)
  88         aCache |= uint64(bytes[0])
  89         aCache |= uint64(bytes[1]) << (1 * 8)
  90         aCache |= uint64(bytes[2]) << (2 * 8)
  91         aCache |= uint64(bytes[3]) << (3 * 8)
  92         aCache |= uint64(bytes[4]) << (4 * 8)
  93         aCache |= uint64(bytes[5]) << (5 * 8)
  94         aCache |= uint64(bytes[6]) << (6 * 8)
  95         aCache |= uint64(bytes[7]) << (7 * 8)
  96         s.allocCache = ^aCache
  97 }
  98
  99 // nextFreeIndex returns the index of the next free object in s at
 100 // or after s.freeindex.
 101 // There are hardware instructions that can be used to make this
 102 // faster if profiling warrants it.
 103 func (s *mspan) nextFreeIndex() uint16 {
 104         sfreeindex := s.freeindex
 105         snelems := s.nelems
 106         if sfreeindex == snelems {
 107                 return sfreeindex
 108         }
 109         if sfreeindex > snelems {
 110                 throw("s.freeindex > s.nelems")
 111         }
 112
 113         aCache := s.allocCache
 114
 115         bitIndex := sys.TrailingZeros64(aCache)
 116         for bitIndex == 64 {
 117                 // Move index to start of next cached bits.
 118                 sfreeindex = (sfreeindex + 64) &^ (64 - 1)
 119                 if sfreeindex >= snelems {
 120                         s.freeindex = snelems
 121                         return snelems
 122                 }
 123                 whichByte := sfreeindex / 8
 124                 // Refill s.allocCache with the next 64 alloc bits.
 125                 s.refillAllocCache(whichByte)
 126                 aCache = s.allocCache
 127                 bitIndex = sys.TrailingZeros64(aCache)
 128                 // nothing available in cached bits
 129                 // grab the next 8 bytes and try again.
 130         }
 131         result := sfreeindex + uint16(bitIndex)
 132         if result >= snelems {
 133                 s.freeindex = snelems
 134                 return snelems
 135         }
 136
 137         s.allocCache >>= uint(bitIndex + 1)
 138         sfreeindex = result + 1
 139
 140         if sfreeindex%64 == 0 && sfreeindex != snelems {
 141                 // We just incremented s.freeindex so it isn't 0.
 142                 // As each 1 in s.allocCache was encountered and used for allocation
 143                 // it was shifted away. At this point s.allocCache contains all 0s.
 144                 // Refill s.allocCache so that it corresponds
 145                 // to the bits at s.allocBits starting at s.freeindex.
 146                 whichByte := sfreeindex / 8
 147                 s.refillAllocCache(whichByte)
 148         }
 149         s.freeindex = sfreeindex
 150         return result
 151 }
 152
 153 // isFree reports whether the index'th object in s is unallocated.
 154 //
 155 // The caller must ensure s.state is mSpanInUse, and there must have
 156 // been no preemption points since ensuring this (which could allow a
 157 // GC transition, which would allow the state to change).
 158 func (s *mspan) isFree(index uintptr) bool {
 159         if index < uintptr(s.freeIndexForScan) {
 160                 return false
 161         }
 162         bytep, mask := s.allocBits.bitp(index)
 163         return *bytep&mask == 0
 164 }
 165
 166 // divideByElemSize returns n/s.elemsize.
 167 // n must be within [0, s.npages*_PageSize),
 168 // or may be exactly s.npages*_PageSize
 169 // if s.elemsize is from sizeclasses.go.
 170 //
 171 // nosplit, because it is called by objIndex, which is nosplit
 172 //
 173 //go:nosplit
 174 func (s *mspan) divideByElemSize(n uintptr) uintptr {
 175         const doubleCheck = false
 176
 177         // See explanation in mksizeclasses.go's computeDivMagic.
 178         q := uintptr((uint64(n) * uint64(s.divMul)) >> 32)
 179
 180         if doubleCheck && q != n/s.elemsize {
 181                 println(n, "/", s.elemsize, "should be", n/s.elemsize, "but got", q)
 182                 throw("bad magic division")
 183         }
 184         return q
 185 }
 186
 187 // nosplit, because it is called by other nosplit code like findObject
 188 //
 189 //go:nosplit
 190 func (s *mspan) objIndex(p uintptr) uintptr {
 191         return s.divideByElemSize(p - s.base())
 192 }
 193
 194 func markBitsForAddr(p uintptr) markBits {
 195         s := spanOf(p)
 196         objIndex := s.objIndex(p)
 197         return s.markBitsForIndex(objIndex)
 198 }
 199
 200 func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
 201         bytep, mask := s.gcmarkBits.bitp(objIndex)
 202         return markBits{bytep, mask, objIndex}
 203 }
 204
 205 func (s *mspan) markBitsForBase() markBits {
 206         return markBits{&s.gcmarkBits.x, uint8(1), 0}
 207 }
 208
 209 // isMarked reports whether mark bit m is set.
 210 func (m markBits) isMarked() bool {
 211         return *m.bytep&m.mask != 0
 212 }
 213
 214 // setMarked sets the marked bit in the markbits, atomically.
 215 func (m markBits) setMarked() {
 216         // Might be racing with other updates, so use atomic update always.
 217         // We used to be clever here and use a non-atomic update in certain
 218         // cases, but it's not worth the risk.
 219         atomic.Or8(m.bytep, m.mask)
 220 }
 221
 222 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
 223 func (m markBits) setMarkedNonAtomic() {
 224         *m.bytep |= m.mask
 225 }
 226
 227 // clearMarked clears the marked bit in the markbits, atomically.
 228 func (m markBits) clearMarked() {
 229         // Might be racing with other updates, so use atomic update always.
 230         // We used to be clever here and use a non-atomic update in certain
 231         // cases, but it's not worth the risk.
 232         atomic.And8(m.bytep, ^m.mask)
 233 }
 234
 235 // markBitsForSpan returns the markBits for the span base address base.
 236 func markBitsForSpan(base uintptr) (mbits markBits) {
 237         mbits = markBitsForAddr(base)
 238         if mbits.mask != 1 {
 239                 throw("markBitsForSpan: unaligned start")
 240         }
 241         return mbits
 242 }
 243
 244 // advance advances the markBits to the next object in the span.
 245 func (m *markBits) advance() {
 246         if m.mask == 1<<7 {
 247                 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
 248                 m.mask = 1
 249         } else {
 250                 m.mask = m.mask << 1
 251         }
 252         m.index++
 253 }
 254
 255 // clobberdeadPtr is a special value that is used by the compiler to
 256 // clobber dead stack slots, when -clobberdead flag is set.
 257 const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))
 258
 259 // badPointer throws bad pointer in heap panic.
 260 func badPointer(s *mspan, p, refBase, refOff uintptr) {
 261         // Typically this indicates an incorrect use
 262         // of unsafe or cgo to store a bad pointer in
 263         // the Go heap. It may also indicate a runtime
 264         // bug.
 265         //
 266         // TODO(austin): We could be more aggressive
 267         // and detect pointers to unallocated objects
 268         // in allocated spans.
 269         printlock()
 270         print("runtime: pointer ", hex(p))
 271         if s != nil {
 272                 state := s.state.get()
 273                 if state != mSpanInUse {
 274                         print(" to unallocated span")
 275                 } else {
 276                         print(" to unused region of span")
 277                 }
 278                 print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state)
 279         }
 280         print("\n")
 281         if refBase != 0 {
 282                 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
 283                 gcDumpObject("object", refBase, refOff)
 284         }
 285         getg().m.traceback = 2
 286         throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
 287 }
 288
 289 // findObject returns the base address for the heap object containing
 290 // the address p, the object's span, and the index of the object in s.
 291 // If p does not point into a heap object, it returns base == 0.
 292 //
 293 // If p points is an invalid heap pointer and debug.invalidptr != 0,
 294 // findObject panics.
 295 //
 296 // refBase and refOff optionally give the base address of the object
 297 // in which the pointer p was found and the byte offset at which it
 298 // was found. These are used for error reporting.
 299 //
 300 // It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
 301 // Since p is a uintptr, it would not be adjusted if the stack were to move.
 302 //
 303 //go:nosplit
 304 func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) {
 305         s = spanOf(p)
 306         // If s is nil, the virtual address has never been part of the heap.
 307         // This pointer may be to some mmap'd region, so we allow it.
 308         if s == nil {
 309                 if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 {
 310                         // Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
 311                         // as they are the only platform where compiler's clobberdead mode is
 312                         // implemented. On these platforms clobberdeadPtr cannot be a valid address.
 313                         badPointer(s, p, refBase, refOff)
 314                 }
 315                 return
 316         }
 317         // If p is a bad pointer, it may not be in s's bounds.
 318         //
 319         // Check s.state to synchronize with span initialization
 320         // before checking other fields. See also spanOfHeap.
 321         if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit {
 322                 // Pointers into stacks are also ok, the runtime manages these explicitly.
 323                 if state == mSpanManual {
 324                         return
 325                 }
 326                 // The following ensures that we are rigorous about what data
 327                 // structures hold valid pointers.
 328                 if debug.invalidptr != 0 {
 329                         badPointer(s, p, refBase, refOff)
 330                 }
 331                 return
 332         }
 333
 334         objIndex = s.objIndex(p)
 335         base = s.base() + objIndex*s.elemsize
 336         return
 337 }
 338
 339 // reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
 340 //
 341 //go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
 342 func reflect_verifyNotInHeapPtr(p uintptr) bool {
 343         // Conversion to a pointer is ok as long as findObject above does not call badPointer.
 344         // Since we're already promised that p doesn't point into the heap, just disallow heap
 345         // pointers and the special clobbered pointer.
 346         return spanOf(p) == nil && p != clobberdeadPtr
 347 }
 348
 349 const ptrBits = 8 * goarch.PtrSize
 350
 351 // bulkBarrierBitmap executes write barriers for copying from [src,
 352 // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
 353 // assumed to start maskOffset bytes into the data covered by the
 354 // bitmap in bits (which may not be a multiple of 8).
 355 //
 356 // This is used by bulkBarrierPreWrite for writes to data and BSS.
 357 //
 358 //go:nosplit
 359 func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 360         word := maskOffset / goarch.PtrSize
 361         bits = addb(bits, word/8)
 362         mask := uint8(1) << (word % 8)
 363
 364         buf := &getg().m.p.ptr().wbBuf
 365         for i := uintptr(0); i < size; i += goarch.PtrSize {
 366                 if mask == 0 {
 367                         bits = addb(bits, 1)
 368                         if *bits == 0 {
 369                                 // Skip 8 words.
 370                                 i += 7 * goarch.PtrSize
 371                                 continue
 372                         }
 373                         mask = 1
 374                 }
 375                 if *bits&mask != 0 {
 376                         dstx := (*uintptr)(unsafe.Pointer(dst + i))
 377                         if src == 0 {
 378                                 p := buf.get1()
 379                                 p[0] = *dstx
 380                         } else {
 381                                 srcx := (*uintptr)(unsafe.Pointer(src + i))
 382                                 p := buf.get2()
 383                                 p[0] = *dstx
 384                                 p[1] = *srcx
 385                         }
 386                 }
 387                 mask <<= 1
 388         }
 389 }
 390
 391 // typeBitsBulkBarrier executes a write barrier for every
 392 // pointer that would be copied from [src, src+size) to [dst,
 393 // dst+size) by a memmove using the type bitmap to locate those
 394 // pointer slots.
 395 //
 396 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
 397 // dst, src, and size must be pointer-aligned.
 398 // The type typ must have a plain bitmap, not a GC program.
 399 // The only use of this function is in channel sends, and the
 400 // 64 kB channel element limit takes care of this for us.
 401 //
 402 // Must not be preempted because it typically runs right before memmove,
 403 // and the GC must observe them as an atomic action.
 404 //
 405 // Callers must perform cgo checks if goexperiment.CgoCheck2.
 406 //
 407 //go:nosplit
 408 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 409         if typ == nil {
 410                 throw("runtime: typeBitsBulkBarrier without type")
 411         }
 412         if typ.Size_ != size {
 413                 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size)
 414                 throw("runtime: invalid typeBitsBulkBarrier")
 415         }
 416         if typ.Kind_&kindGCProg != 0 {
 417                 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog")
 418                 throw("runtime: invalid typeBitsBulkBarrier")
 419         }
 420         if !writeBarrier.enabled {
 421                 return
 422         }
 423         ptrmask := typ.GCData
 424         buf := &getg().m.p.ptr().wbBuf
 425         var bits uint32
 426         for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize {
 427                 if i&(goarch.PtrSize*8-1) == 0 {
 428                         bits = uint32(*ptrmask)
 429                         ptrmask = addb(ptrmask, 1)
 430                 } else {
 431                         bits = bits >> 1
 432                 }
 433                 if bits&1 != 0 {
 434                         dstx := (*uintptr)(unsafe.Pointer(dst + i))
 435                         srcx := (*uintptr)(unsafe.Pointer(src + i))
 436                         p := buf.get2()
 437                         p[0] = *dstx
 438                         p[1] = *srcx
 439                 }
 440         }
 441 }
 442
 443 // countAlloc returns the number of objects allocated in span s by
 444 // scanning the allocation bitmap.
 445 func (s *mspan) countAlloc() int {
 446         count := 0
 447         bytes := divRoundUp(uintptr(s.nelems), 8)
 448         // Iterate over each 8-byte chunk and count allocations
 449         // with an intrinsic. Note that newMarkBits guarantees that
 450         // gcmarkBits will be 8-byte aligned, so we don't have to
 451         // worry about edge cases, irrelevant bits will simply be zero.
 452         for i := uintptr(0); i < bytes; i += 8 {
 453                 // Extract 64 bits from the byte pointer and get a OnesCount.
 454                 // Note that the unsafe cast here doesn't preserve endianness,
 455                 // but that's OK. We only care about how many bits are 1, not
 456                 // about the order we discover them in.
 457                 mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
 458                 count += sys.OnesCount64(mrkBits)
 459         }
 460         return count
 461 }
 462
 463 // Read the bytes starting at the aligned pointer p into a uintptr.
 464 // Read is little-endian.
 465 func readUintptr(p *byte) uintptr {
 466         x := *(*uintptr)(unsafe.Pointer(p))
 467         if goarch.BigEndian {
 468                 if goarch.PtrSize == 8 {
 469                         return uintptr(sys.Bswap64(uint64(x)))
 470                 }
 471                 return uintptr(sys.Bswap32(uint32(x)))
 472         }
 473         return x
 474 }
 475
 476 var debugPtrmask struct {
 477         lock mutex
 478         data *byte
 479 }
 480
 481 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
 482 // size the size of the region described by prog, in bytes.
 483 // The resulting bitvector will have no more than size/goarch.PtrSize bits.
 484 func progToPointerMask(prog *byte, size uintptr) bitvector {
 485         n := (size/goarch.PtrSize + 7) / 8
 486         x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
 487         x[len(x)-1] = 0xa1 // overflow check sentinel
 488         n = runGCProg(prog, &x[0])
 489         if x[len(x)-1] != 0xa1 {
 490                 throw("progToPointerMask: overflow")
 491         }
 492         return bitvector{int32(n), &x[0]}
 493 }
 494
 495 // Packed GC pointer bitmaps, aka GC programs.
 496 //
 497 // For large types containing arrays, the type information has a
 498 // natural repetition that can be encoded to save space in the
 499 // binary and in the memory representation of the type information.
 500 //
 501 // The encoding is a simple Lempel-Ziv style bytecode machine
 502 // with the following instructions:
 503 //
 504 //      00000000: stop
 505 //      0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
 506 //      10000000 n c: repeat the previous n bits c times; n, c are varints
 507 //      1nnnnnnn c: repeat the previous n bits c times; c is a varint
 508
 509 // runGCProg returns the number of 1-bit entries written to memory.
 510 func runGCProg(prog, dst *byte) uintptr {
 511         dstStart := dst
 512
 513         // Bits waiting to be written to memory.
 514         var bits uintptr
 515         var nbits uintptr
 516
 517         p := prog
 518 Run:
 519         for {
 520                 // Flush accumulated full bytes.
 521                 // The rest of the loop assumes that nbits <= 7.
 522                 for ; nbits >= 8; nbits -= 8 {
 523                         *dst = uint8(bits)
 524                         dst = add1(dst)
 525                         bits >>= 8
 526                 }
 527
 528                 // Process one instruction.
 529                 inst := uintptr(*p)
 530                 p = add1(p)
 531                 n := inst & 0x7F
 532                 if inst&0x80 == 0 {
 533                         // Literal bits; n == 0 means end of program.
 534                         if n == 0 {
 535                                 // Program is over.
 536                                 break Run
 537                         }
 538                         nbyte := n / 8
 539                         for i := uintptr(0); i < nbyte; i++ {
 540                                 bits |= uintptr(*p) << nbits
 541                                 p = add1(p)
 542                                 *dst = uint8(bits)
 543                                 dst = add1(dst)
 544                                 bits >>= 8
 545                         }
 546                         if n %= 8; n > 0 {
 547                                 bits |= uintptr(*p) << nbits
 548                                 p = add1(p)
 549                                 nbits += n
 550                         }
 551                         continue Run
 552                 }
 553
 554                 // Repeat. If n == 0, it is encoded in a varint in the next bytes.
 555                 if n == 0 {
 556                         for off := uint(0); ; off += 7 {
 557                                 x := uintptr(*p)
 558                                 p = add1(p)
 559                                 n |= (x & 0x7F) << off
 560                                 if x&0x80 == 0 {
 561                                         break
 562                                 }
 563                         }
 564                 }
 565
 566                 // Count is encoded in a varint in the next bytes.
 567                 c := uintptr(0)
 568                 for off := uint(0); ; off += 7 {
 569                         x := uintptr(*p)
 570                         p = add1(p)
 571                         c |= (x & 0x7F) << off
 572                         if x&0x80 == 0 {
 573                                 break
 574                         }
 575                 }
 576                 c *= n // now total number of bits to copy
 577
 578                 // If the number of bits being repeated is small, load them
 579                 // into a register and use that register for the entire loop
 580                 // instead of repeatedly reading from memory.
 581                 // Handling fewer than 8 bits here makes the general loop simpler.
 582                 // The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
 583                 // the pattern to a bit buffer holding at most 7 bits (a partial byte)
 584                 // it will not overflow.
 585                 src := dst
 586                 const maxBits = goarch.PtrSize*8 - 7
 587                 if n <= maxBits {
 588                         // Start with bits in output buffer.
 589                         pattern := bits
 590                         npattern := nbits
 591
 592                         // If we need more bits, fetch them from memory.
 593                         src = subtract1(src)
 594                         for npattern < n {
 595                                 pattern <<= 8
 596                                 pattern |= uintptr(*src)
 597                                 src = subtract1(src)
 598                                 npattern += 8
 599                         }
 600
 601                         // We started with the whole bit output buffer,
 602                         // and then we loaded bits from whole bytes.
 603                         // Either way, we might now have too many instead of too few.
 604                         // Discard the extra.
 605                         if npattern > n {
 606                                 pattern >>= npattern - n
 607                                 npattern = n
 608                         }
 609
 610                         // Replicate pattern to at most maxBits.
 611                         if npattern == 1 {
 612                                 // One bit being repeated.
 613                                 // If the bit is 1, make the pattern all 1s.
 614                                 // If the bit is 0, the pattern is already all 0s,
 615                                 // but we can claim that the number of bits
 616                                 // in the word is equal to the number we need (c),
 617                                 // because right shift of bits will zero fill.
 618                                 if pattern == 1 {
 619                                         pattern = 1<<maxBits - 1
 620                                         npattern = maxBits
 621                                 } else {
 622                                         npattern = c
 623                                 }
 624                         } else {
 625                                 b := pattern
 626                                 nb := npattern
 627                                 if nb+nb <= maxBits {
 628                                         // Double pattern until the whole uintptr is filled.
 629                                         for nb <= goarch.PtrSize*8 {
 630                                                 b |= b << nb
 631                                                 nb += nb
 632                                         }
 633                                         // Trim away incomplete copy of original pattern in high bits.
 634                                         // TODO(rsc): Replace with table lookup or loop on systems without divide?
 635                                         nb = maxBits / npattern * npattern
 636                                         b &= 1<<nb - 1
 637                                         pattern = b
 638                                         npattern = nb
 639                                 }
 640                         }
 641
 642                         // Add pattern to bit buffer and flush bit buffer, c/npattern times.
 643                         // Since pattern contains >8 bits, there will be full bytes to flush
 644                         // on each iteration.
 645                         for ; c >= npattern; c -= npattern {
 646                                 bits |= pattern << nbits
 647                                 nbits += npattern
 648                                 for nbits >= 8 {
 649                                         *dst = uint8(bits)
 650                                         dst = add1(dst)
 651                                         bits >>= 8
 652                                         nbits -= 8
 653                                 }
 654                         }
 655
 656                         // Add final fragment to bit buffer.
 657                         if c > 0 {
 658                                 pattern &= 1<<c - 1
 659                                 bits |= pattern << nbits
 660                                 nbits += c
 661                         }
 662                         continue Run
 663                 }
 664
 665                 // Repeat; n too large to fit in a register.
 666                 // Since nbits <= 7, we know the first few bytes of repeated data
 667                 // are already written to memory.
 668                 off := n - nbits // n > nbits because n > maxBits and nbits <= 7
 669                 // Leading src fragment.
 670                 src = subtractb(src, (off+7)/8)
 671                 if frag := off & 7; frag != 0 {
 672                         bits |= uintptr(*src) >> (8 - frag) << nbits
 673                         src = add1(src)
 674                         nbits += frag
 675                         c -= frag
 676                 }
 677                 // Main loop: load one byte, write another.
 678                 // The bits are rotating through the bit buffer.
 679                 for i := c / 8; i > 0; i-- {
 680                         bits |= uintptr(*src) << nbits
 681                         src = add1(src)
 682                         *dst = uint8(bits)
 683                         dst = add1(dst)
 684                         bits >>= 8
 685                 }
 686                 // Final src fragment.
 687                 if c %= 8; c > 0 {
 688                         bits |= (uintptr(*src) & (1<<c - 1)) << nbits
 689                         nbits += c
 690                 }
 691         }
 692
 693         // Write any final bits out, using full-byte writes, even for the final byte.
 694         totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
 695         nbits += -nbits & 7
 696         for ; nbits > 0; nbits -= 8 {
 697                 *dst = uint8(bits)
 698                 dst = add1(dst)
 699                 bits >>= 8
 700         }
 701         return totalBits
 702 }
 703
 704 // materializeGCProg allocates space for the (1-bit) pointer bitmask
 705 // for an object of size ptrdata.  Then it fills that space with the
 706 // pointer bitmask specified by the program prog.
 707 // The bitmask starts at s.startAddr.
 708 // The result must be deallocated with dematerializeGCProg.
 709 func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
 710         // Each word of ptrdata needs one bit in the bitmap.
 711         bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize)
 712         // Compute the number of pages needed for bitmapBytes.
 713         pages := divRoundUp(bitmapBytes, pageSize)
 714         s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
 715         runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
 716         return s
 717 }
 718 func dematerializeGCProg(s *mspan) {
 719         mheap_.freeManual(s, spanAllocPtrScalarBits)
 720 }
 721
 722 func dumpGCProg(p *byte) {
 723         nptr := 0
 724         for {
 725                 x := *p
 726                 p = add1(p)
 727                 if x == 0 {
 728                         print("\t", nptr, " end\n")
 729                         break
 730                 }
 731                 if x&0x80 == 0 {
 732                         print("\t", nptr, " lit ", x, ":")
 733                         n := int(x+7) / 8
 734                         for i := 0; i < n; i++ {
 735                                 print(" ", hex(*p))
 736                                 p = add1(p)
 737                         }
 738                         print("\n")
 739                         nptr += int(x)
 740                 } else {
 741                         nbit := int(x &^ 0x80)
 742                         if nbit == 0 {
 743                                 for nb := uint(0); ; nb += 7 {
 744                                         x := *p
 745                                         p = add1(p)
 746                                         nbit |= int(x&0x7f) << nb
 747                                         if x&0x80 == 0 {
 748                                                 break
 749                                         }
 750                                 }
 751                         }
 752                         count := 0
 753                         for nb := uint(0); ; nb += 7 {
 754                                 x := *p
 755                                 p = add1(p)
 756                                 count |= int(x&0x7f) << nb
 757                                 if x&0x80 == 0 {
 758                                         break
 759                                 }
 760                         }
 761                         print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
 762                         nptr += nbit * count
 763                 }
 764         }
 765 }
 766
 767 // Testing.
 768
 769 // reflect_gcbits returns the GC type info for x, for testing.
 770 // The result is the bitmap entries (0 or 1), one entry per byte.
 771 //
 772 //go:linkname reflect_gcbits reflect.gcbits
 773 func reflect_gcbits(x any) []byte {
 774         return getgcmask(x)
 775 }