[dev.garbage] Merge remote-tracking branch 'origin/master' into HEAD

author Rick Hudson <rlh@golang.org>

Wed, 27 Apr 2016 22:19:16 +0000 (18:19 -0400)

committer Rick Hudson <rlh@golang.org>

Wed, 27 Apr 2016 22:46:52 +0000 (18:46 -0400)
author Rick Hudson <rlh@golang.org>
Wed, 27 Apr 2016 22:19:16 +0000 (18:19 -0400)
committer Rick Hudson <rlh@golang.org>
Wed, 27 Apr 2016 22:46:52 +0000 (18:46 -0400)
diff --git a/dev.garbage b/dev.garbage

new file mode 100644 (file)

index 0000000..b8c3a3f
--- /dev/null
+++ b/dev.garbage
@@ -0,0 +1 @@
+Reviving dev.garbage branch for use in new garbage collection experiment.
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go

index 0afab0909511ceb8cfe7676079fd6ba7f5f483d3..6085c6866cf8c6526f267ad44f2835d324236cf9 100644 (file)
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -473,9 +473,13 @@ func dumpobjs() {
                 if n > uintptr(len(freemark)) {
                         throw("freemark array doesn't have enough entries")
                 }
-               for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
-                       freemark[(uintptr(l)-p)/size] = true
+
+               for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
+                       if s.isFree(freeIndex) {
+                               freemark[freeIndex] = true
+                       }
                 }
+
                 for j := uintptr(0); j < n; j, p = j+1, p+size {
                         if freemark[j] {
                                 freemark[j] = false
@@ -710,7 +714,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
         i := uintptr(0)
         hbits := heapBitsForAddr(p)
         for ; i < nptr; i++ {
-               if i >= 2 && !hbits.isMarked() {
+               if i >= 2 && !hbits.morePointers() {
                         break // end of object
                 }
                 if hbits.isPointer() {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go

index 081d1419cb67082d90e957ddeb8e628c5529a21b..6fe46566033143a0a85196885f62b7e969845e0a 100644 (file)
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -94,6 +94,9 @@ const (
         pageShift = _PageShift
         pageSize  = _PageSize
         pageMask  = _PageMask
+       // By construction, single page spans of the smallest object class
+       // have the most objects per span.
+       maxObjsPerSpan = pageSize / 8
  
         mSpanInUse = _MSpanInUse
  
@@ -484,6 +487,71 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
  // base address for all 0-byte allocations
  var zerobase uintptr
  
+// nextFreeFast returns the next free object if one is quickly available.
+// Otherwise it returns 0.
+func (c *mcache) nextFreeFast(sizeclass int8) gclinkptr {
+       s := c.alloc[sizeclass]
+       ctzIndex := uint8(s.allocCache & 0xff)
+       if ctzIndex != 0 {
+               theBit := uint64(ctzVals[ctzIndex])
+               freeidx := s.freeindex // help the pre ssa compiler out here with cse.
+               result := freeidx + uintptr(theBit)
+               if result < s.nelems {
+                       s.allocCache >>= (theBit + 1)
+                       freeidx = result + 1
+                       if freeidx%64 == 0 && freeidx != s.nelems {
+                               // We just incremented s.freeindex so it isn't 0
+                               // so we are moving to the next aCache.
+                               whichByte := freeidx / 8
+                               s.refillAllocCache(whichByte)
+                       }
+                       s.freeindex = freeidx
+                       v := gclinkptr(result*s.elemsize + s.base())
+                       s.allocCount++
+                       return v
+               }
+       }
+       return 0
+}
+
+// nextFree returns the next free object from the cached span if one is available.
+// Otherwise it refills the cache with a span with an available object and
+// returns that object along with a flag indicating that this was a heavy
+// weight allocation. If it is a heavy weight allocation the caller must
+// determine whether a new GC cycle needs to be started or if the GC is active
+// whether this goroutine needs to assist the GC.
+func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) {
+       s := c.alloc[sizeclass]
+       shouldhelpgc = false
+       freeIndex := s.nextFreeIndex()
+       if freeIndex == s.nelems {
+               // The span is full.
+               if uintptr(s.allocCount) != s.nelems {
+                       println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
+                       throw("s.allocCount != s.nelems && freeIndex == s.nelems")
+               }
+               systemstack(func() {
+                       c.refill(int32(sizeclass))
+               })
+               shouldhelpgc = true
+               s = c.alloc[sizeclass]
+
+               freeIndex = s.nextFreeIndex()
+       }
+
+       if freeIndex >= s.nelems {
+               throw("freeIndex is not valid")
+       }
+
+       v = gclinkptr(freeIndex*s.elemsize + s.base())
+       s.allocCount++
+       if uintptr(s.allocCount) > s.nelems {
+               println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
+               throw("s.allocCount > s.nelems")
+       }
+       return
+}
+
  // Allocate an object of size bytes.
  // Small objects are allocated from the per-P cache's free lists.
  // Large objects (> 32 kB) are allocated straight from the heap.
@@ -538,7 +606,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
         shouldhelpgc := false
         dataSize := size
         c := gomcache()
-       var s *mspan
         var x unsafe.Pointer
         noscan := typ == nil || typ.kind&kindNoPointers != 0
         if size <= maxSmallSize {
@@ -591,20 +658,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                                 return x
                         }
                         // Allocate a new maxTinySize block.
-                       s = c.alloc[tinySizeClass]
-                       v := s.freelist
-                       if v.ptr() == nil {
-                               systemstack(func() {
-                                       c.refill(tinySizeClass)
-                               })
-                               shouldhelpgc = true
-                               s = c.alloc[tinySizeClass]
-                               v = s.freelist
+                       var v gclinkptr
+                       v = c.nextFreeFast(tinySizeClass)
+                       if v == 0 {
+                               v, shouldhelpgc = c.nextFree(tinySizeClass)
                         }
-                       s.freelist = v.ptr().next
-                       s.ref++
-                       // prefetchnta offers best performance, see change list message.
-                       prefetchnta(uintptr(v.ptr().next))
                         x = unsafe.Pointer(v)
                         (*[2]uint64)(x)[0] = 0
                         (*[2]uint64)(x)[1] = 0
@@ -623,26 +681,15 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                                 sizeclass = size_to_class128[(size-1024+127)>>7]
                         }
                         size = uintptr(class_to_size[sizeclass])
-                       s = c.alloc[sizeclass]
-                       v := s.freelist
-                       if v.ptr() == nil {
-                               systemstack(func() {
-                                       c.refill(int32(sizeclass))
-                               })
-                               shouldhelpgc = true
-                               s = c.alloc[sizeclass]
-                               v = s.freelist
+                       var v gclinkptr
+                       v = c.nextFreeFast(sizeclass)
+                       if v == 0 {
+                               v, shouldhelpgc = c.nextFree(sizeclass)
                         }
-                       s.freelist = v.ptr().next
-                       s.ref++
-                       // prefetchnta offers best performance, see change list message.
-                       prefetchnta(uintptr(v.ptr().next))
                         x = unsafe.Pointer(v)
                         if needzero {
-                               v.ptr().next = 0
-                               if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
-                                       memclr(unsafe.Pointer(v), size)
-                               }
+                               memclr(unsafe.Pointer(v), size)
+                               // TODO:(rlh) Only clear if object is not known to be zeroed.
                         }
                 }
         } else {
@@ -651,13 +698,14 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                 systemstack(func() {
                         s = largeAlloc(size, needzero)
                 })
-               x = unsafe.Pointer(uintptr(s.start << pageShift))
+               s.freeindex = 1
+               x = unsafe.Pointer(s.base())
                 size = s.elemsize
         }
  
         var scanSize uintptr
         if noscan {
-               // All objects are pre-marked as noscan. Nothing to do.
+               heapBitsSetTypeNoScan(uintptr(x), size)
         } else {
                 // If allocating a defer+arg block, now that we've picked a malloc size
                 // large enough to hold everything, cut the "asked for" size down to
@@ -698,9 +746,27 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                 gcmarknewobject(uintptr(x), size, scanSize)
         }
  
+       // The object x is about to be reused but tracefree and msanfree
+       // need to be informed.
+       // TODO:(rlh) It is quite possible that this object is being allocated
+       // out of a fresh span and that there is no preceding call to
+       // tracealloc with this object. If this is an issue then initialization
+       // of the fresh span needs to leave some crumbs around that can be used to
+       // avoid these calls. Furthermore these crumbs a likely the same as
+       // those needed to determine if the object needs to be zeroed.
+       // In the case of msanfree it does not make sense to call msanfree
+       // followed by msanmalloc. msanfree indicates that the bytes are not
+       // initialized but msanmalloc is about to indicate that they are.
+       // It makes no difference whether msanmalloc has been called on these
+       // bytes or not.
+       if debug.allocfreetrace != 0 {
+               tracefree(unsafe.Pointer(x), size)
+       }
+
         if raceenabled {
                 racemalloc(x, size)
         }
+
         if msanenabled {
                 msanmalloc(x, size)
         }
@@ -755,8 +821,8 @@ func largeAlloc(size uintptr, needzero bool) *mspan {
         if s == nil {
                 throw("out of memory")
         }
-       s.limit = uintptr(s.start)<<_PageShift + size
-       heapBitsForSpan(s.base()).initSpan(s.layout())
+       s.limit = s.base() + size
+       heapBitsForSpan(s.base()).initSpan(s)
         return s
  }
  
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go

index 3df697ee5c6355d30f9bf72e065846780e33c11a..af89577703a52ef1e416349eb5904e4e127b0ca7 100644 (file)
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -24,7 +24,7 @@
  // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
  // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
  // The meaning of the high bit depends on the position of the word being described
-// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
+// in its allocated object. In the first word, the high bit is unused.
  // In the second word, the high bit is the GC ``checkmarked'' bit (see below).
  // In the third and later words, the high bit indicates that the object is still
  // being described. In these words, if a bit pair with a high bit 0 is encountered,
@@ -33,12 +33,13 @@
  // in the object are uninteresting to the garbage collector.
  //
  // The 2-bit entries are split when written into the byte, so that the top half
-// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
+// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
+// bits.
  // This form allows a copy from the 1-bit to the 4-bit form to keep the
  // pointer bits contiguous, instead of having to space them out.
  //
  // The code makes use of the fact that the zero value for a heap bitmap
-// has no live pointer bit set and is (depending on position), not marked,
+// has no live pointer bit set and is (depending on position), not used,
  // not checkmarked, and is the dead encoding.
  // These properties must be preserved when modifying the encoding.
  //
@@ -63,6 +64,7 @@
  // It is still used in general, except in checkmark the type bit is repurposed
  // as the checkmark bit and then reinitialized (to 1) as the type bit when
  // finished.
+//
  
  package runtime
  
@@ -95,6 +97,8 @@ func addb(p *byte, n uintptr) *byte {
  }
  
  // subtractb returns the byte pointer p-n.
+// subtractb is typically used when traversing the pointer tables referred to by hbits
+// which are arranged in reverse order.
  //go:nowritebarrier
  //go:nosplit
  func subtractb(p *byte, n uintptr) *byte {
@@ -115,6 +119,8 @@ func add1(p *byte) *byte {
  }
  
  // subtract1 returns the byte pointer p-1.
+// subtract1 is typically used when traversing the pointer tables referred to by hbits
+// which are arranged in reverse order.
  //go:nowritebarrier
  //
  // nosplit because it is used during write barriers and must not be preempted.
@@ -161,6 +167,234 @@ type heapBits struct {
         shift uint32
  }
  
+// markBits provides access to the mark bit for an object in the heap.
+// bytep points to the byte holding the mark bit.
+// mask is a byte with a single bit set that can be &ed with *bytep
+// to see if the bit has been set.
+// *m.byte&m.mask != 0 indicates the mark bit is set.
+// index can be used along with span information to generate
+// the address of the object in the heap.
+// We maintain one set of mark bits for allocation and one for
+// marking purposes.
+type markBits struct {
+       bytep *uint8
+       mask  uint8
+       index uintptr
+}
+
+//go:nosplit
+func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
+       whichByte := allocBitIndex / 8
+       whichBit := allocBitIndex % 8
+       return markBits{&s.allocBits[whichByte], uint8(1 << whichBit), allocBitIndex}
+}
+
+// ctzVals contains the count of trailing zeros for the
+// index. 0 returns 8 indicating 8 zeros.
+var ctzVals = [256]int8{
+       8, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       6, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       7, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       6, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0}
+
+// A temporary stand in for the count trailing zero ctz instruction.
+// IA bsf works on 64 bit non-zero word.
+func ctz64(markBits uint64) uint64 {
+       ctz8 := ctzVals[markBits&0xff]
+       if ctz8 != 8 {
+               return uint64(ctz8)
+       } else if markBits == 0 { // low byte is zero check fill word.
+               return 64 // bits in 64 bit word, ensures loop terminates
+       }
+       result := uint64(8)
+       markBits >>= 8
+       for ctz8 = ctzVals[markBits&0xff]; ctz8 == 8; ctz8 = ctzVals[markBits&0xff] {
+               result += 8
+               markBits >>= 8
+       }
+       result += uint64(ctz8)
+       return result
+}
+
+// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
+// and negates them so that ctz (count trailing zeros) instructions
+// can be used. It then places these 8 bytes into the cached 64 bit
+// s.allocCache.
+func (s *mspan) refillAllocCache(whichByte uintptr) {
+       bytes := s.allocBits[whichByte : whichByte+8]
+       aCache := uint64(0)
+       aCache |= uint64(bytes[0])
+       aCache |= uint64(bytes[1]) << (1 * 8)
+       aCache |= uint64(bytes[2]) << (2 * 8)
+       aCache |= uint64(bytes[3]) << (3 * 8)
+       aCache |= uint64(bytes[4]) << (4 * 8)
+       aCache |= uint64(bytes[5]) << (5 * 8)
+       aCache |= uint64(bytes[6]) << (6 * 8)
+       aCache |= uint64(bytes[7]) << (7 * 8)
+       s.allocCache = ^aCache
+}
+
+// nextFreeIndex returns the index of the next free object in s at
+// or after s.freeindex.
+// There are hardware instructions that can be used to make this
+// faster if profiling warrants it.
+func (s *mspan) nextFreeIndex() uintptr {
+       sfreeindex := s.freeindex
+       snelems := s.nelems
+       if sfreeindex == snelems {
+               return sfreeindex
+       }
+       if sfreeindex > snelems {
+               throw("s.freeindex > s.nelems")
+       }
+
+       aCache := s.allocCache
+       bitIndex := ctz64(aCache)
+       for bitIndex == 64 {
+               // Move index to start of next cached bits.
+               sfreeindex = (sfreeindex + 64) &^ (64 - 1)
+               if sfreeindex >= snelems {
+                       s.freeindex = snelems
+                       return snelems
+               }
+               whichByte := sfreeindex / 8
+               // Refill s.allocCache with the next 64 alloc bits.
+               s.refillAllocCache(whichByte)
+               aCache = s.allocCache
+               bitIndex = ctz64(aCache)
+               // Nothing was available try again now allocCache has been refilled.
+       }
+       result := sfreeindex + uintptr(bitIndex)
+       if result >= snelems {
+               s.freeindex = snelems
+               return snelems
+       }
+
+       s.allocCache >>= (bitIndex + 1)
+       sfreeindex = result + 1
+
+       if sfreeindex%64 == 0 && sfreeindex != snelems {
+               // We just incremented s.freeindex so it isn't 0.
+               // As each 1 in s.allocCache was encountered and used for allocation
+               // it was shifted away. At this point s.allocCache contains all 0s.
+               // Refill s.allocCache so that it corresponds
+               // to the bits at s.allocBits starting at s.freeindex.
+               whichByte := sfreeindex / 8
+               s.refillAllocCache(whichByte)
+       }
+       s.freeindex = sfreeindex
+       return result
+}
+
+func (s *mspan) isFree(index uintptr) bool {
+       whichByte := index / 8
+       whichBit := index % 8
+       return s.allocBits[whichByte]&uint8(1<<whichBit) == 0
+}
+
+func markBitsForAddr(p uintptr) markBits {
+       s := spanOf(p)
+       return s.markBitsForAddr(p)
+}
+
+func (s *mspan) markBitsForAddr(p uintptr) markBits {
+       byteOffset := p - s.base()
+       markBitIndex := uintptr(0)
+       if byteOffset != 0 {
+               // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication
+               markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2)
+       }
+       whichByte := markBitIndex / 8
+       whichBit := markBitIndex % 8
+       return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex}
+}
+
+func (s *mspan) markBitsForBase() markBits {
+       return markBits{&s.gcmarkBits[0], uint8(1), 0}
+}
+
+// isMarked reports whether mark bit m is set.
+func (m markBits) isMarked() bool {
+       return *m.bytep&m.mask != 0
+}
+
+// setMarked sets the marked bit in the markbits, atomically.
+func (m markBits) setMarked() {
+       // Might be racing with other updates, so use atomic update always.
+       // We used to be clever here and use a non-atomic update in certain
+       // cases, but it's not worth the risk.
+       atomic.Or8(m.bytep, m.mask)
+}
+
+// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
+func (m markBits) setMarkedNonAtomic() {
+       *m.bytep |= m.mask
+}
+
+// clearMarked clears the marked bit in the markbits, atomically.
+func (m markBits) clearMarked() {
+       // Might be racing with other updates, so use atomic update always.
+       // We used to be clever here and use a non-atomic update in certain
+       // cases, but it's not worth the risk.
+       atomic.And8(m.bytep, ^m.mask)
+}
+
+// clearMarkedNonAtomic clears the marked bit non-atomically.
+func (m markBits) clearMarkedNonAtomic() {
+       *m.bytep ^= m.mask
+}
+
+// markBitsForSpan returns the markBits for the span base address base.
+func markBitsForSpan(base uintptr) (mbits markBits) {
+       if base < mheap_.arena_start || base >= mheap_.arena_used {
+               throw("heapBitsForSpan: base out of range")
+       }
+       mbits = markBitsForAddr(base)
+       if mbits.mask != 1 {
+               throw("markBitsForSpan: unaligned start")
+       }
+       return mbits
+}
+
+// advance advances the markBits to the next object in the span.
+func (m *markBits) advance() {
+       if m.mask == 1<<7 {
+               m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
+               m.mask = 1
+       } else {
+               m.mask = m.mask << 1
+       }
+       m.index++
+}
+
  // heapBitsForAddr returns the heapBits for the address addr.
  // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
  //
@@ -177,11 +411,7 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
         if base < mheap_.arena_start || base >= mheap_.arena_used {
                 throw("heapBitsForSpan: base out of range")
         }
-       hbits = heapBitsForAddr(base)
-       if hbits.shift != 0 {
-               throw("heapBitsForSpan: unaligned start")
-       }
-       return hbits
+       return heapBitsForAddr(base)
  }
  
  // heapBitsForObject returns the base address for the heap object
@@ -230,7 +460,7 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
                         } else {
                                 print(" to unused region of span")
                         }
-                       print("idx=", hex(idx), " span.start=", hex(s.start<<_PageShift), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
+                       print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
                         if refBase != 0 {
                                 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
                                 gcDumpObject("object", refBase, refOff)
@@ -298,28 +528,13 @@ func (h heapBits) bits() uint32 {
         return uint32(*h.bitp) >> (h.shift & 31)
  }
  
-// isMarked reports whether the heap bits have the marked bit set.
-// h must describe the initial word of the object.
-func (h heapBits) isMarked() bool {
+// morePointers returns true if this word and all remaining words in this object
+// are scalars.
+// h must not describe the first or second word of the object.
+func (h heapBits) morePointers() bool {
         return *h.bitp&(bitMarked<<h.shift) != 0
  }
  
-// setMarked sets the marked bit in the heap bits, atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarked() {
-       // Each byte of GC bitmap holds info for four words.
-       // Might be racing with other updates, so use atomic update always.
-       // We used to be clever here and use a non-atomic update in certain
-       // cases, but it's not worth the risk.
-       atomic.Or8(h.bitp, bitMarked<<h.shift)
-}
-
-// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
-// h must describe the initial word of the object.
-func (h heapBits) setMarkedNonAtomic() {
-       *h.bitp |= bitMarked << h.shift
-}
-
  // isPointer reports whether the heap bits describe a pointer word.
  // h must describe the initial word of the object.
  //
@@ -536,6 +751,22 @@ func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
         }
  }
  
+func (s *mspan) clearGCMarkBits() {
+       bytesInMarkBits := (s.nelems + 7) / 8
+       bits := s.gcmarkBits[:bytesInMarkBits]
+       for i := range bits {
+               bits[i] = 0
+       }
+}
+
+func (s *mspan) clearAllocBits() {
+       bytesInMarkBits := (s.nelems + 7) / 8
+       bits := s.allocBits[:bytesInMarkBits]
+       for i := range bits {
+               bits[i] = 0
+       }
+}
+
  // The methods operating on spans all require that h has been returned
  // by heapBitsForSpan and that size, n, total are the span layout description
  // returned by the mspan's layout method.
@@ -549,7 +780,19 @@ func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
  // If this is a span of pointer-sized objects, it initializes all
  // words to pointer (and there are no dead bits).
  // Otherwise, it initializes all words to scalar/dead.
-func (h heapBits) initSpan(size, n, total uintptr) {
+func (h heapBits) initSpan(s *mspan) {
+       size, n, total := s.layout()
+
+       // Init the markbit structures
+       s.allocBits = &s.markbits1
+       s.gcmarkBits = &s.markbits2
+       s.freeindex = 0
+       s.allocCache = ^uint64(0) // all 1s indicating all free.
+       s.nelems = n
+       s.clearAllocBits()
+       s.clearGCMarkBits()
+
+       // Clear bits corresponding to objects.
         if total%heapBitmapScale != 0 {
                 throw("initSpan: unaligned length")
         }
@@ -610,106 +853,60 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
         }
  }
  
-// heapBitsSweepSpan coordinates the sweeping of a span by reading
-// and updating the corresponding heap bitmap entries.
-// For each free object in the span, heapBitsSweepSpan sets the type
-// bits for the first four words (less for smaller objects) to scalar/dead
-// and then calls f(p), where p is the object's base address.
-// f is expected to add the object to a free list.
-// For non-free objects, heapBitsSweepSpan turns off the marked bit.
-func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
-       h := heapBitsForSpan(base)
-       switch {
-       default:
-               throw("heapBitsSweepSpan")
-       case sys.PtrSize == 8 && size == sys.PtrSize:
-               // Consider mark bits in all four 2-bit entries of each bitmap byte.
-               bitp := h.bitp
-               for i := uintptr(0); i < n; i += 4 {
-                       x := uint32(*bitp)
-                       // Note that unlike the other size cases, we leave the pointer bits set here.
-                       // These are initialized during initSpan when the span is created and left
-                       // in place the whole time the span is used for pointer-sized objects.
-                       // That lets heapBitsSetType avoid an atomic update to set the pointer bit
-                       // during allocation.
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
-                               f(base + i*sys.PtrSize)
-                       }
-                       if x&(bitMarked<<heapBitsShift) != 0 {
-                               x &^= bitMarked << heapBitsShift
-                       } else {
-                               f(base + (i+1)*sys.PtrSize)
-                       }
-                       if x&(bitMarked<<(2*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (2 * heapBitsShift)
-                       } else {
-                               f(base + (i+2)*sys.PtrSize)
-                       }
-                       if x&(bitMarked<<(3*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (3 * heapBitsShift)
-                       } else {
-                               f(base + (i+3)*sys.PtrSize)
-                       }
-                       *bitp = uint8(x)
-                       bitp = subtract1(bitp)
-               }
-
-       case size%(4*sys.PtrSize) == 0:
-               // Mark bit is in first word of each object.
-               // Each object starts at bit 0 of a heap bitmap byte.
-               bitp := h.bitp
-               step := size / heapBitmapScale
-               for i := uintptr(0); i < n; i++ {
-                       x := uint32(*bitp)
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
-                               x = 0
-                               f(base + i*size)
-                       }
-                       *bitp = uint8(x)
-                       bitp = subtractb(bitp, step)
-               }
+// oneBitCount is indexed by byte and produces the
+// number of 1 bits in that byte. For example 128 has 1 bit set
+// and oneBitCount[128] will holds 1.
+var oneBitCount = [256]uint8{
+       0, 1, 1, 2, 1, 2, 2, 3,
+       1, 2, 2, 3, 2, 3, 3, 4,
+       1, 2, 2, 3, 2, 3, 3, 4,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       1, 2, 2, 3, 2, 3, 3, 4,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       1, 2, 2, 3, 2, 3, 3, 4,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       4, 5, 5, 6, 5, 6, 6, 7,
+       1, 2, 2, 3, 2, 3, 3, 4,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       4, 5, 5, 6, 5, 6, 6, 7,
+       2, 3, 3, 4, 3, 4, 4, 5,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       4, 5, 5, 6, 5, 6, 6, 7,
+       3, 4, 4, 5, 4, 5, 5, 6,
+       4, 5, 5, 6, 5, 6, 6, 7,
+       4, 5, 5, 6, 5, 6, 6, 7,
+       5, 6, 6, 7, 6, 7, 7, 8}
+
+// countFree runs through the mark bits in a span and counts the number of free objects
+// in the span.
+// TODO:(rlh) Use popcount intrinsic.
+func (s *mspan) countFree() int {
+       count := 0
+       maxIndex := s.nelems / 8
+       for i := uintptr(0); i < maxIndex; i++ {
+               count += int(oneBitCount[s.gcmarkBits[i]])
+       }
  
-       case size%(4*sys.PtrSize) == 2*sys.PtrSize:
-               // Mark bit is in first word of each object,
-               // but every other object starts halfway through a heap bitmap byte.
-               // Unroll loop 2x to handle alternating shift count and step size.
-               bitp := h.bitp
-               step := size / heapBitmapScale
-               var i uintptr
-               for i = uintptr(0); i < n; i += 2 {
-                       x := uint32(*bitp)
-                       if x&bitMarked != 0 {
-                               x &^= bitMarked
-                       } else {
-                               x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
-                               f(base + i*size)
-                               if size > 2*sys.PtrSize {
-                                       x = 0
-                               }
-                       }
-                       *bitp = uint8(x)
-                       if i+1 >= n {
-                               break
-                       }
-                       bitp = subtractb(bitp, step)
-                       x = uint32(*bitp)
-                       if x&(bitMarked<<(2*heapBitsShift)) != 0 {
-                               x &^= bitMarked << (2 * heapBitsShift)
-                       } else {
-                               x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
-                               f(base + (i+1)*size)
-                               if size > 2*sys.PtrSize {
-                                       *subtract1(bitp) = 0
-                               }
-                       }
-                       *bitp = uint8(x)
-                       bitp = subtractb(bitp, step+1)
-               }
+       if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
+               markBits := uint8(s.gcmarkBits[maxIndex])
+               mask := uint8((1 << bitsInLastByte) - 1)
+               bits := markBits & mask
+               count += int(oneBitCount[bits])
         }
+       return int(s.nelems) - count
  }
  
  // heapBitsSetType records that the new allocation [x, x+size)
@@ -739,7 +936,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
         // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
         // arbitrarily larger.
         //
-       // The checks for size == ptrSize and size == 2*ptrSize can therefore
+       // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
         // assume that dataSize == size without checking it explicitly.
  
         if sys.PtrSize == 8 && size == sys.PtrSize {
@@ -779,10 +976,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         // (In general the number of instances of typ being allocated is
                         // dataSize/typ.size.)
                         if sys.PtrSize == 4 && dataSize == sys.PtrSize {
-                               // 1 pointer.
+                               // 1 pointer object. On 32-bit machines clear the bit for the
+                               // unused second word.
                                 if gcphase == _GCoff {
+                                       *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
                                         *h.bitp |= bitPointer << h.shift
                                 } else {
+                                       atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
                                         atomic.Or8(h.bitp, bitPointer<<h.shift)
                                 }
                         } else {
@@ -795,7 +995,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         }
                         return
                 }
-               // Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
+               // Otherwise typ.size must be 2*sys.PtrSize,
+               // and typ.kind&kindGCProg == 0.
                 if doubleCheck {
                         if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
                                 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
@@ -805,8 +1006,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 b := uint32(*ptrmask)
                 hb := b & 3
                 if gcphase == _GCoff {
+                       // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
+                       // 110011 is shifted h.shift and complemented.
+                       // This clears out the bits that are about to be
+                       // ored into *h.hbitp in the next instructions.
+                       *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
                         *h.bitp |= uint8(hb << h.shift)
                 } else {
+                       // TODO:(rlh) since the GC is not concurrently setting the
+                       // mark bits in the heap map anymore and malloc
+                       // owns the span we are allocating in why does this have
+                       // to be atomic?
+
+                       atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
                         atomic.Or8(h.bitp, uint8(hb<<h.shift))
                 }
                 return
@@ -920,8 +1132,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                         // Replicate ptrmask to fill entire pbits uintptr.
                         // Doubling and truncating is fewer steps than
                         // iterating by nb each time. (nb could be 1.)
-                       // Since we loaded typ.ptrdata/ptrSize bits
-                       // but are pretending to have typ.size/ptrSize,
+                       // Since we loaded typ.ptrdata/sys.PtrSize bits
+                       // but are pretending to have typ.size/sys.PtrSize,
                         // there might be no replication necessary/possible.
                         pbits = b
                         endnb = nb
@@ -1012,13 +1224,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
                 // not with its mark bit. Since there is only one allocation
                 // from a given span at a time, we should be able to set
                 // these bits non-atomically. Not worth the risk right now.
-               hb = (b & 3) << (2 * heapBitsShift)
+               hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
                 b >>= 2
                 nb -= 2
                 // Note: no bitMarker in hb because the first two words don't get markers from us.
                 if gcphase == _GCoff {
+                       *hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
                         *hbitp |= uint8(hb)
                 } else {
+                       atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
                         atomic.Or8(hbitp, uint8(hb))
                 }
                 hbitp = subtract1(hbitp)
@@ -1208,6 +1422,41 @@ Phase4:
         }
  }
  
+// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2
+// words set their bitPointers to off (0).
+// All other objects have the first 3 bitPointers set to
+// off (0) and the scan word in the third word
+// also set to off (0).
+func heapBitsSetTypeNoScan(x, size uintptr) {
+       h := heapBitsForAddr(uintptr(x))
+       bitp := h.bitp
+
+       if sys.PtrSize == 8 && size == sys.PtrSize {
+               // If this is truely noScan the tinyAlloc logic should have noticed
+               // and combined such objects.
+               throw("noscan object is too small")
+       } else if size%(4*sys.PtrSize) == 0 {
+               *bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift)
+       } else if size%(4*sys.PtrSize) == 2*sys.PtrSize {
+               if h.shift == 0 {
+                       *bitp &^= (bitPointer | bitPointer<<heapBitsShift)
+                       if size > 2*sys.PtrSize {
+                               *bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift)
+                       }
+               } else if h.shift == 2 {
+                       *bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
+                       if size > 2*sys.PtrSize {
+                               bitp = subtract1(bitp)
+                               *bitp &^= bitPointer | bitMarked
+                       }
+               } else {
+                       throw("Type has unrecognized size")
+               }
+       } else {
+               throw("Type has unrecognized size")
+       }
+}
+
  var debugPtrmask struct {
         lock mutex
         data *byte
@@ -1301,7 +1550,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
  
  // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
  // size the size of the region described by prog, in bytes.
-// The resulting bitvector will have no more than size/ptrSize bits.
+// The resulting bitvector will have no more than size/sys.PtrSize bits.
  func progToPointerMask(prog *byte, size uintptr) bitvector {
         n := (size/sys.PtrSize + 7) / 8
         x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
@@ -1437,7 +1686,7 @@ Run:
                 // into a register and use that register for the entire loop
                 // instead of repeatedly reading from memory.
                 // Handling fewer than 8 bits here makes the general loop simpler.
-               // The cutoff is ptrSize*8 - 7 to guarantee that when we add
+               // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
                 // the pattern to a bit buffer holding at most 7 bits (a partial byte)
                 // it will not overflow.
                 src := dst
@@ -1732,7 +1981,7 @@ func getgcmask(ep interface{}) (mask []byte) {
                         if hbits.isPointer() {
                                 mask[i/sys.PtrSize] = 1
                         }
-                       if i >= 2*sys.PtrSize && !hbits.isMarked() {
+                       if i >= 2*sys.PtrSize && !hbits.morePointers() {
                                 mask = mask[:i/sys.PtrSize]
                                 break
                         }
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go

index 2230c5c200d2cbd0de19001c13597419f0b886d7..5938e53ca8ced0115c45d0b67367ac14613cca17 100644 (file)
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
         _g_.m.locks++
         // Return the current cached span to the central lists.
         s := c.alloc[sizeclass]
-       if s.freelist.ptr() != nil {
-               throw("refill on a nonempty span")
+
+       if uintptr(s.allocCount) != s.nelems {
+               throw("refill of span with free space remaining")
         }
+
         if s != &emptymspan {
                 s.incache = false
         }
@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
         if s == nil {
                 throw("out of memory")
         }
-       if s.freelist.ptr() == nil {
-               println(s.ref, (s.npages<<_PageShift)/s.elemsize)
-               throw("empty span")
+
+       if uintptr(s.allocCount) == s.nelems {
+               throw("span has no free space")
         }
+
         c.alloc[sizeclass] = s
         _g_.m.locks--
         return s
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go

index 4f0b86c228cbbbc4480bcac0069a027835b12ccf..bbbfb18fbf88a5ce9e9f2aa22d857dc0ab94a9bc 100644 (file)
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
  type mcentral struct {
         lock      mutex
         sizeclass int32
-       nonempty  mSpanList // list of spans with a free object
+       nonempty  mSpanList // list of spans with a free object, ie a nonempty free list
         empty     mSpanList // list of spans with no free objects (or cached in an mcache)
  }
  
@@ -67,7 +67,9 @@ retry:
                         c.empty.insertBack(s)
                         unlock(&c.lock)
                         s.sweep(true)
-                       if s.freelist.ptr() != nil {
+                       freeIndex := s.nextFreeIndex()
+                       if freeIndex != s.nelems {
+                               s.freeindex = freeIndex
                                 goto havespan
                         }
                         lock(&c.lock)
@@ -98,11 +100,11 @@ retry:
         // c is unlocked.
  havespan:
         cap := int32((s.npages << _PageShift) / s.elemsize)
-       n := cap - int32(s.ref)
-       if n == 0 {
-               throw("empty span")
+       n := cap - int32(s.allocCount)
+       if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
+               throw("span has no free objects")
         }
-       usedBytes := uintptr(s.ref) * s.elemsize
+       usedBytes := uintptr(s.allocCount) * s.elemsize
         if usedBytes > 0 {
                 reimburseSweepCredit(usedBytes)
         }
@@ -115,10 +117,16 @@ havespan:
                 // heap_live changed.
                 gcController.revise()
         }
-       if s.freelist.ptr() == nil {
-               throw("freelist empty")
-       }
         s.incache = true
+       freeByteBase := s.freeindex &^ (64 - 1)
+       whichByte := freeByteBase / 8
+       // Init alloc bits cache.
+       s.refillAllocCache(whichByte)
+
+       // Adjust the allocCache so that s.freeindex corresponds to the low bit in
+       // s.allocCache.
+       s.allocCache >>= s.freeindex % 64
+
         return s
  }
  
@@ -128,12 +136,12 @@ func (c *mcentral) uncacheSpan(s *mspan) {
  
         s.incache = false
  
-       if s.ref == 0 {
-               throw("uncaching full span")
+       if s.allocCount == 0 {
+               throw("uncaching span but s.allocCount == 0")
         }
  
         cap := int32((s.npages << _PageShift) / s.elemsize)
-       n := cap - int32(s.ref)
+       n := cap - int32(s.allocCount)
         if n > 0 {
                 c.empty.remove(s)
                 c.nonempty.insert(s)
@@ -144,23 +152,19 @@ func (c *mcentral) uncacheSpan(s *mspan) {
         unlock(&c.lock)
  }
  
-// Free n objects from a span s back into the central free list c.
-// Called during sweep.
-// Returns true if the span was returned to heap. Sets sweepgen to
-// the latest generation.
-// If preserve=true, don't return the span to heap nor relink in MCentral lists;
-// caller takes care of it.
-func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
+// freeSpan updates c and s after sweeping s.
+// It sets s's sweepgen to the latest generation,
+// and, based on the number of free objects in s,
+// moves s to the appropriate list of c or returns it
+// to the heap.
+// freeSpan returns true if s was returned to the heap.
+// If preserve=true, it does not move s (the caller
+// must take care of it).
+func (c *mcentral) freeSpan(s *mspan, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
         if s.incache {
-               throw("freespan into cached span")
+               throw("freeSpan given cached span")
         }
  
-       // Add the objects back to s's free list.
-       wasempty := s.freelist.ptr() == nil
-       end.ptr().next = s.freelist
-       s.freelist = start
-       s.ref -= uint16(n)
-
         if preserve {
                 // preserve is set only when called from MCentral_CacheSpan above,
                 // the span must be in the empty list.
@@ -185,21 +189,19 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
         // lock of c above.)
         atomic.Store(&s.sweepgen, mheap_.sweepgen)
  
-       if s.ref != 0 {
+       if s.allocCount != 0 {
                 unlock(&c.lock)
                 return false
         }
  
-       // s is completely freed, return it to the heap.
         c.nonempty.remove(s)
         s.needzero = 1
-       s.freelist = 0
         unlock(&c.lock)
         mheap_.freeSpan(s, 0)
         return true
  }
  
-// Fetch a new span from the heap and carve into objects for the free list.
+// grow allocates a new empty span from the heap and initializes it for c's size class.
  func (c *mcentral) grow() *mspan {
         npages := uintptr(class_to_allocnpages[c.sizeclass])
         size := uintptr(class_to_size[c.sizeclass])
@@ -210,21 +212,9 @@ func (c *mcentral) grow() *mspan {
                 return nil
         }
  
-       p := uintptr(s.start << _PageShift)
+       p := s.base()
         s.limit = p + size*n
-       head := gclinkptr(p)
-       tail := gclinkptr(p)
-       // i==0 iteration already done
-       for i := uintptr(1); i < n; i++ {
-               p += size
-               tail.ptr().next = gclinkptr(p)
-               tail = gclinkptr(p)
-       }
-       if s.freelist.ptr() != nil {
-               throw("freelist not empty")
-       }
-       tail.ptr().next = 0
-       s.freelist = head
-       heapBitsForSpan(s.base()).initSpan(s.layout())
+
+       heapBitsForSpan(s.base()).initSpan(s)
         return s
  }
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go

index b5a9ff9b5681188864b1871e92eefc9db7dc45e0..3704164527b69d363dd2e22268dc1a7167787367 100644 (file)
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -360,7 +360,7 @@ func markrootSpans(gcw *gcWork, shard int) {
                         // retain everything it points to.
                         spf := (*specialfinalizer)(unsafe.Pointer(sp))
                         // A finalizer can be set for an inner byte of an object, find object beginning.
-                       p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+                       p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
  
                         // Mark everything that can be reached from
                         // the object (but *not* the object itself or
@@ -962,7 +962,10 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
                 if blocking {
                         b = gcw.get()
                 } else {
-                       b = gcw.tryGet()
+                       b = gcw.tryGetFast()
+                       if b == 0 {
+                               b = gcw.tryGet()
+                       }
                 }
                 if b == 0 {
                         // work barrier reached or tryGet failed.
@@ -1025,7 +1028,11 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
                 //         PREFETCH(wbuf->obj[wbuf.nobj - 3];
                 //  }
                 //
-               b := gcw.tryGet()
+               b := gcw.tryGetFast()
+               if b == 0 {
+                       b = gcw.tryGet()
+               }
+
                 if b == 0 {
                         break
                 }
@@ -1175,9 +1182,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
         if obj&(sys.PtrSize-1) != 0 {
                 throw("greyobject: obj not pointer-aligned")
         }
-
+       mbits := span.markBitsForAddr(obj)
         if useCheckmark {
-               if !hbits.isMarked() {
+               if !mbits.isMarked() {
                         printlock()
                         print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
                         print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
@@ -1199,10 +1206,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
                 }
         } else {
                 // If marked we have nothing to do.
-               if hbits.isMarked() {
+               if mbits.isMarked() {
                         return
                 }
-               hbits.setMarked()
+               mbits.setMarked()
  
                 // If this is a noscan object, fast-track it to black
                 // instead of greying it.
@@ -1218,8 +1225,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
         // Previously we put the obj in an 8 element buffer that is drained at a rate
         // to give the PREFETCH time to do its work.
         // Use of PREFETCHNTA might be more appropriate than PREFETCH
-
-       gcw.put(obj)
+       if !gcw.putFast(obj) {
+               gcw.put(obj)
+       }
  }
  
  // gcDumpObject dumps the contents of obj for debugging and marks the
@@ -1274,7 +1282,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) {
         if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
                 throw("gcmarknewobject called while doing checkmark")
         }
-       heapBitsForAddr(obj).setMarked()
+       markBitsForAddr(obj).setMarked()
         gcw := &getg().m.p.ptr().gcw
         gcw.bytesMarked += uint64(size)
         gcw.scanWork += int64(scanSize)
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go

index 31d1a801831790ac1dbbb3f41d9191fe4b4f8fdd..9316cc6f499dcaebda3a9c83b638da536cd26de1 100644 (file)
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -8,7 +8,6 @@ package runtime
  
  import (
         "runtime/internal/atomic"
-       "runtime/internal/sys"
         "unsafe"
  )
  
@@ -192,16 +191,13 @@ func (s *mspan) sweep(preserve bool) bool {
         c := _g_.m.mcache
         freeToHeap := false
  
-       // Mark any free objects in this span so we don't collect them.
-       sstart := uintptr(s.start << _PageShift)
-       for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
-               if uintptr(link) < sstart || s.limit <= uintptr(link) {
-                       // Free list is corrupted.
-                       dumpFreeList(s)
-                       throw("free list corrupted")
-               }
-               heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
-       }
+       // The allocBits indicate which unmarked objects don't need to be
+       // processed since they were free at the end of the last GC cycle
+       // and were not allocated since then.
+       // If the allocBits index is >= s.freeindex and the bit
+       // is not marked then the object remains unallocated
+       // since the last GC.
+       // This situation is analogous to being on a freelist.
  
         // Unlink & free special records for any objects we're about to free.
         // Two complications here:
@@ -215,17 +211,17 @@ func (s *mspan) sweep(preserve bool) bool {
         special := *specialp
         for special != nil {
                 // A finalizer can be set for an inner byte of an object, find object beginning.
-               p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
-               hbits := heapBitsForAddr(p)
-               if !hbits.isMarked() {
+               p := s.base() + uintptr(special.offset)/size*size
+               mbits := s.markBitsForAddr(p)
+               if !mbits.isMarked() {
                         // This object is not marked and has at least one special record.
                         // Pass 1: see if it has at least one finalizer.
                         hasFin := false
-                       endOffset := p - uintptr(s.start<<_PageShift) + size
+                       endOffset := p - s.base() + size
                         for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
                                 if tmp.kind == _KindSpecialFinalizer {
                                         // Stop freeing of object if it has a finalizer.
-                                       hbits.setMarkedNonAtomic()
+                                       mbits.setMarkedNonAtomic()
                                         hasFin = true
                                         break
                                 }
@@ -234,7 +230,7 @@ func (s *mspan) sweep(preserve bool) bool {
                         for special != nil && uintptr(special.offset) < endOffset {
                                 // Find the exact byte for which the special was setup
                                 // (as opposed to object beginning).
-                               p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
+                               p := s.base() + uintptr(special.offset)
                                 if special.kind == _KindSpecialFinalizer || !hasFin {
                                         // Splice out special record.
                                         y := special
@@ -255,49 +251,24 @@ func (s *mspan) sweep(preserve bool) bool {
                 }
         }
  
-       // Sweep through n objects of given size starting at p.
-       // This thread owns the span now, so it can manipulate
-       // the block bitmap without atomic operations.
+       // Count the number of free objects in this span.
+       nfree = s.countFree()
+       if cl == 0 && nfree != 0 {
+               s.needzero = 1
+               freeToHeap = true
+       }
  
-       size, n, _ := s.layout()
-       heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
-               // At this point we know that we are looking at garbage object
-               // that needs to be collected.
-               if debug.allocfreetrace != 0 {
-                       tracefree(unsafe.Pointer(p), size)
-               }
-               if msanenabled {
-                       msanfree(unsafe.Pointer(p), size)
-               }
+       s.allocCount = uint16(s.nelems) - uint16(nfree)
+       wasempty := s.nextFreeIndex() == s.nelems
  
-               // Reset to allocated+noscan.
-               if cl == 0 {
-                       // Free large span.
-                       if preserve {
-                               throw("can't preserve large span")
-                       }
-                       s.needzero = 1
+       s.freeindex = 0 // reset allocation index to start of span.
  
-                       // Free the span after heapBitsSweepSpan
-                       // returns, since it's not done with the span.
-                       freeToHeap = true
-               } else {
-                       // Free small object.
-                       if size > 2*sys.PtrSize {
-                               *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
-                       } else if size > sys.PtrSize {
-                               *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
-                       }
-                       if head.ptr() == nil {
-                               head = gclinkptr(p)
-                       } else {
-                               end.ptr().next = gclinkptr(p)
-                       }
-                       end = gclinkptr(p)
-                       end.ptr().next = gclinkptr(0x0bade5)
-                       nfree++
-               }
-       })
+       // Swap role of allocBits with gcmarkBits
+       // Clear gcmarkBits in preparation for next GC
+       s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
+       s.clearGCMarkBits() // prepare for next GC
+       // Initialize alloc bits cache.
+       s.refillAllocCache(0)
  
         // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
         // because of the potential for a concurrent free/SetFinalizer.
@@ -311,11 +282,15 @@ func (s *mspan) sweep(preserve bool) bool {
                         print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
                         throw("MSpan_Sweep: bad span state after sweep")
                 }
+               // Serialization point.
+               // At this point the mark bits are cleared and allocation ready
+               // to go so release the span.
                 atomic.Store(&s.sweepgen, sweepgen)
         }
-       if nfree > 0 {
+
+       if nfree > 0 && cl != 0 {
                 c.local_nsmallfree[cl] += uintptr(nfree)
-               res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
+               res = mheap_.central[cl].mcentral.freeSpan(s, head, end, preserve, wasempty)
                 // MCentral_FreeSpan updates sweepgen
         } else if freeToHeap {
                 // Free large span to heap
@@ -336,7 +311,7 @@ func (s *mspan) sweep(preserve bool) bool {
                 // implement and then call some kind of MHeap_DeleteSpan.
                 if debug.efence > 0 {
                         s.limit = 0 // prevent mlookup from finding this span
-                       sysFault(unsafe.Pointer(uintptr(s.start<<_PageShift)), size)
+                       sysFault(unsafe.Pointer(s.base()), size)
                 } else {
                         mheap_.freeSpan(s, 1)
                 }
@@ -399,27 +374,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
                 throw("spanBytesAlloc underflow")
         }
  }
-
-func dumpFreeList(s *mspan) {
-       printlock()
-       print("runtime: free list of span ", s, ":\n")
-       sstart := uintptr(s.start << _PageShift)
-       link := s.freelist
-       for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
-               if i != 0 {
-                       print(" -> ")
-               }
-               print(hex(link))
-               if link.ptr() == nil {
-                       break
-               }
-               if uintptr(link) < sstart || s.limit <= uintptr(link) {
-                       // Bad link. Stop walking before we crash.
-                       print(" (BAD)")
-                       break
-               }
-               link = link.ptr().next
-       }
-       print("\n")
-       printunlock()
-}
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go

index 63a3ade3a663f1158b44ae53985db37a6aba2654..d04840b68671a6ada200d600c55ba20e8d70a7b3 100644 (file)
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -116,6 +116,22 @@ func (w *gcWork) put(obj uintptr) {
         wbuf.nobj++
  }
  
+// putFast does a put and returns true if it can be done quickly
+// otherwise it returns false and the caller needs to call put.
+//go:nowritebarrier
+func (w *gcWork) putFast(obj uintptr) bool {
+       wbuf := w.wbuf1.ptr()
+       if wbuf == nil {
+               return false
+       } else if wbuf.nobj == len(wbuf.obj) {
+               return false
+       }
+
+       wbuf.obj[wbuf.nobj] = obj
+       wbuf.nobj++
+       return true
+}
+
  // tryGet dequeues a pointer for the garbage collector to trace.
  //
  // If there are no pointers remaining in this gcWork or in the global
@@ -147,6 +163,23 @@ func (w *gcWork) tryGet() uintptr {
         return wbuf.obj[wbuf.nobj]
  }
  
+// tryGetFast dequeues a pointer for the garbage collector to trace
+// if one is readily available. Otherwise it returns 0 and
+// the caller is expected to call tryGet().
+//go:nowritebarrier
+func (w *gcWork) tryGetFast() uintptr {
+       wbuf := w.wbuf1.ptr()
+       if wbuf == nil {
+               return 0
+       }
+       if wbuf.nobj == 0 {
+               return 0
+       }
+
+       wbuf.nobj--
+       return wbuf.obj[wbuf.nobj]
+}
+
  // get dequeues a pointer for the garbage collector to trace, blocking
  // if necessary to ensure all pointers from all queues and caches have
  // been retrieved.  get returns 0 if there are no pointers remaining.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index 99f7b54fc8ebe82029b01391f7999f493145ccab..1333dd696bbaa5a6949cf3e76719e9224cfac53c 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -116,10 +116,50 @@ type mspan struct {
         next *mspan     // next span in list, or nil if none
         prev **mspan    // previous span's next field, or list head's first field if none
         list *mSpanList // For debugging. TODO: Remove.
+       //TODO:(rlh) Eliminate start field and use startAddr >> PageShift instead.
+       startAddr     uintptr   // uintptr(s.start << _PageShift) aka s.base()
+       start         pageID    // starting page number
+       npages        uintptr   // number of pages in span
+       stackfreelist gclinkptr // list of free stacks, avoids overloading freelist
+
+       // freeindex is the slot index between 0 and nelems at which to begin scanning
+       // for the next free object in this span.
+       // Each allocation scans allocBits starting at freeindex until it encounters a 0
+       // indicating a free object. freeindex is then adjusted so that subsequent scans begin
+       // just past the the newly discovered free object.
+       //
+       // If freeindex == nelem, this span has no free objects.
+       //
+       // allocBits is a bitmap of objects in this span.
+       // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
+       // then object n is free;
+       // otherwise, object n is allocated. Bits starting at nelem are
+       // undefined and should never be referenced.
+       //
+       // Object n starts at address n*elemsize + (start << pageShift).
+       freeindex uintptr
+       // TODO: Look up nelems from sizeclass and remove this field if it
+       // helps performance.
+       nelems uintptr // number of object in the span.
+
+       // Cache of the allocBits at freeindex. allocCache is shifted
+       // such that the lowest bit corresponds to the bit freeindex.
+       // allocCache holds the complement of allocBits, thus allowing
+       // ctz64 (count trailing zero) to use it directly.
+       // allocCache may contain bits beyond s.nelems; the caller must ignore
+       // these.
+       allocCache uint64
+       allocBits  *[maxObjsPerSpan / 8]uint8
+       gcmarkBits *[maxObjsPerSpan / 8]uint8
+
+       // allocBits and gcmarkBits currently point to either markbits1
+       // or markbits2. At the end of a GC cycle allocBits and
+       // gcmarkBits swap roles simply by swapping pointers.
+       // This level of indirection also facilitates an implementation
+       // where markbits1 and markbits2 are not inlined in mspan.
+       markbits1 [maxObjsPerSpan / 8]uint8 // A bit for each obj.
+       markbits2 [maxObjsPerSpan / 8]uint8 // A bit for each obj.
  
-       start    pageID    // starting page number
-       npages   uintptr   // number of pages in span
-       freelist gclinkptr // list of free objects
         // sweep generation:
         // if sweepgen == h->sweepgen - 2, the span needs sweeping
         // if sweepgen == h->sweepgen - 1, the span is currently being swept
@@ -128,7 +168,7 @@ type mspan struct {
  
         sweepgen    uint32
         divMul      uint32   // for divide by elemsize - divMagic.mul
-       ref         uint16   // capacity - number of objects in freelist
+       allocCount  uint16   // capacity - number of objects in freelist
         sizeclass   uint8    // size class
         incache     bool     // being used by an mcache
         state       uint8    // mspaninuse etc
@@ -145,7 +185,7 @@ type mspan struct {
  }
  
  func (s *mspan) base() uintptr {
-       return uintptr(s.start << _PageShift)
+       return s.startAddr
  }
  
  func (s *mspan) layout() (size, n, total uintptr) {
@@ -261,7 +301,7 @@ func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
                 return 0
         }
  
-       p := uintptr(s.start) << _PageShift
+       p := s.base()
         if s.sizeclass == 0 {
                 // Large object.
                 if base != nil {
@@ -440,8 +480,7 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
                 // able to map interior pointer to containing span.
                 atomic.Store(&s.sweepgen, h.sweepgen)
                 s.state = _MSpanInUse
-               s.freelist = 0
-               s.ref = 0
+               s.allocCount = 0
                 s.sizeclass = uint8(sizeclass)
                 if sizeclass == 0 {
                         s.elemsize = s.npages << _PageShift
@@ -504,7 +543,7 @@ func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool)
  
         if s != nil {
                 if needzero && s.needzero != 0 {
-                       memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
+                       memclr(unsafe.Pointer(s.base()), s.npages<<_PageShift)
                 }
                 s.needzero = 0
         }
@@ -520,8 +559,8 @@ func (h *mheap) allocStack(npage uintptr) *mspan {
         s := h.allocSpanLocked(npage)
         if s != nil {
                 s.state = _MSpanStack
-               s.freelist = 0
-               s.ref = 0
+               s.stackfreelist = 0
+               s.allocCount = 0
                 memstats.stacks_inuse += uint64(s.npages << _PageShift)
         }
  
@@ -572,7 +611,7 @@ HaveSpan:
                 throw("still in list")
         }
         if s.npreleased > 0 {
-               sysUsed(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
+               sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
                 memstats.heap_released -= uint64(s.npreleased << _PageShift)
                 s.npreleased = 0
         }
@@ -715,6 +754,12 @@ func (h *mheap) freeSpan(s *mspan, acct int32) {
                 mp.mcache.local_scan = 0
                 memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
                 mp.mcache.local_tinyallocs = 0
+               if msanenabled {
+                       // Tell msan that this entire span is no longer in use.
+                       base := unsafe.Pointer(s.base())
+                       bytes := s.npages << _PageShift
+                       msanfree(base, bytes)
+               }
                 if acct != 0 {
                         memstats.heap_objects--
                 }
@@ -743,12 +788,12 @@ func (h *mheap) freeStack(s *mspan) {
  func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
         switch s.state {
         case _MSpanStack:
-               if s.ref != 0 {
+               if s.allocCount != 0 {
                         throw("MHeap_FreeSpanLocked - invalid stack free")
                 }
         case _MSpanInUse:
-               if s.ref != 0 || s.sweepgen != h.sweepgen {
-                       print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
+               if s.allocCount != 0 || s.sweepgen != h.sweepgen {
+                       print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
                         throw("MHeap_FreeSpanLocked - invalid free")
                 }
                 h.pagesInUse -= uint64(s.npages)
@@ -782,6 +827,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
                 t := h_spans[p-1]
                 if t != nil && t.state == _MSpanFree {
                         s.start = t.start
+                       s.startAddr = uintptr(s.start << _PageShift)
                         s.npages += t.npages
                         s.npreleased = t.npreleased // absorb released pages
                         s.needzero |= t.needzero
@@ -891,9 +937,9 @@ func (span *mspan) init(start pageID, npages uintptr) {
         span.prev = nil
         span.list = nil
         span.start = start
+       span.startAddr = uintptr(start << _PageShift)
         span.npages = npages
-       span.freelist = 0
-       span.ref = 0
+       span.allocCount = 0
         span.sizeclass = 0
         span.incache = false
         span.elemsize = 0
@@ -903,6 +949,17 @@ func (span *mspan) init(start pageID, npages uintptr) {
         span.speciallock.key = 0
         span.specials = nil
         span.needzero = 0
+       span.freeindex = 0
+       span.allocBits = &span.markbits1
+       span.gcmarkBits = &span.markbits2
+       // determine if this is actually needed. It is once / span so it
+       // isn't expensive. This is to be replaced by an arena
+       // based system where things can be cleared all at once so
+       // don't worry about optimizing this.
+       for i := 0; i < len(span.markbits1); i++ {
+               span.allocBits[i] = 0
+               span.gcmarkBits[i] = 0
+       }
  }
  
  func (span *mspan) inList() bool {
@@ -917,7 +974,7 @@ func (list *mSpanList) init() {
  
  func (list *mSpanList) remove(span *mspan) {
         if span.prev == nil || span.list != list {
-               println("failed MSpanList_Remove", span, span.prev, span.list, list)
+               println("runtime: failed MSpanList_Remove", span, span.prev, span.list, list)
                 throw("MSpanList_Remove")
         }
         if span.next != nil {
@@ -939,7 +996,7 @@ func (list *mSpanList) isEmpty() bool {
  
  func (list *mSpanList) insert(span *mspan) {
         if span.next != nil || span.prev != nil || span.list != nil {
-               println("failed MSpanList_Insert", span, span.next, span.prev, span.list)
+               println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
                 throw("MSpanList_Insert")
         }
         span.next = list.first
diff --git a/src/runtime/msize.go b/src/runtime/msize.go

index 21fe2f4c615ca77c7f7e0482b732d62d812fc808..18577b309bda5961d411ea49d85f9f9c92db3315 100644 (file)
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -55,7 +55,7 @@ var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
  
  func sizeToClass(size int32) int32 {
         if size > _MaxSmallSize {
-               throw("SizeToClass - invalid size")
+               throw("invalid size")
         }
         if size > 1024-8 {
                 return int32(size_to_class128[(size-1024+127)>>7])
@@ -79,7 +79,7 @@ func initSizes() {
                         }
                 }
                 if align&(align-1) != 0 {
-                       throw("InitSizes - bug")
+                       throw("incorrect alignment")
                 }
  
                 // Make the allocnpages big enough that
@@ -106,10 +106,18 @@ func initSizes() {
                 sizeclass++
         }
         if sizeclass != _NumSizeClasses {
-               print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
-               throw("InitSizes - bad NumSizeClasses")
+               print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
+               throw("bad NumSizeClasses")
+       }
+       // Check maxObjsPerSpan => number of objects invariant.
+       for i, size := range class_to_size {
+               if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
+                       throw("span contains too many objects")
+               }
+               if size == 0 && i != 0 {
+                       throw("size is 0 but class is not 0")
+               }
         }
-
         // Initialize the size_to_class tables.
         nextsize := 0
         for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
@@ -128,12 +136,12 @@ func initSizes() {
                 for n := int32(0); n < _MaxSmallSize; n++ {
                         sizeclass := sizeToClass(n)
                         if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
-                               print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+                               print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
                                 print("incorrect SizeToClass\n")
                                 goto dump
                         }
                         if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
-                               print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+                               print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
                                 print("SizeToClass too big\n")
                                 goto dump
                         }
@@ -155,18 +163,18 @@ func initSizes() {
  
  dump:
         if true {
-               print("NumSizeClasses=", _NumSizeClasses, "\n")
+               print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
                 print("runtime·class_to_size:")
                 for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
                         print(" ", class_to_size[sizeclass], "")
                 }
                 print("\n\n")
-               print("size_to_class8:")
+               print("runtime: size_to_class8:")
                 for i := 0; i < len(size_to_class8); i++ {
                         print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
                 }
                 print("\n")
-               print("size_to_class128:")
+               print("runtime: size_to_class128:")
                 for i := 0; i < len(size_to_class128); i++ {
                         print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
                 }
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go

index 84a79e312cbc943bc2c62c5b09ab48a4fcbecad3..2d75d2fef120e039d9964367e2b7efadb011fd29 100644 (file)
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -295,9 +295,9 @@ func updatememstats(stats *gcstats) {
                         memstats.nmalloc++
                         memstats.alloc += uint64(s.elemsize)
                 } else {
-                       memstats.nmalloc += uint64(s.ref)
-                       memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
-                       memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
+                       memstats.nmalloc += uint64(s.allocCount)
+                       memstats.by_size[s.sizeclass].nmalloc += uint64(s.allocCount)
+                       memstats.alloc += uint64(s.allocCount) * uint64(s.elemsize)
                 }
         }
         unlock(&mheap_.lock)
diff --git a/src/runtime/stack.go b/src/runtime/stack.go

index c4b1fb862e9ad0634c3a0a99d79ae22d71c4eedf..ac4efc114be89c992c678c57b8e4acceaad1d9a7 100644 (file)
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -191,26 +191,26 @@ func stackpoolalloc(order uint8) gclinkptr {
                 if s == nil {
                         throw("out of memory")
                 }
-               if s.ref != 0 {
-                       throw("bad ref")
+               if s.allocCount != 0 {
+                       throw("bad allocCount")
                 }
-               if s.freelist.ptr() != nil {
-                       throw("bad freelist")
+               if s.stackfreelist.ptr() != nil {
+                       throw("bad stackfreelist")
                 }
                 for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
                         x := gclinkptr(uintptr(s.start)<<_PageShift + i)
-                       x.ptr().next = s.freelist
-                       s.freelist = x
+                       x.ptr().next = s.stackfreelist
+                       s.stackfreelist = x
                 }
                 list.insert(s)
         }
-       x := s.freelist
+       x := s.stackfreelist
         if x.ptr() == nil {
                 throw("span has no free stacks")
         }
-       s.freelist = x.ptr().next
-       s.ref++
-       if s.freelist.ptr() == nil {
+       s.stackfreelist = x.ptr().next
+       s.allocCount++
+       if s.stackfreelist.ptr() == nil {
                 // all stacks in s are allocated.
                 list.remove(s)
         }
@@ -223,14 +223,14 @@ func stackpoolfree(x gclinkptr, order uint8) {
         if s.state != _MSpanStack {
                 throw("freeing stack not in a stack span")
         }
-       if s.freelist.ptr() == nil {
+       if s.stackfreelist.ptr() == nil {
                 // s will now have a free stack
                 stackpool[order].insert(s)
         }
-       x.ptr().next = s.freelist
-       s.freelist = x
-       s.ref--
-       if gcphase == _GCoff && s.ref == 0 {
+       x.ptr().next = s.stackfreelist
+       s.stackfreelist = x
+       s.allocCount--
+       if gcphase == _GCoff && s.allocCount == 0 {
                 // Span is completely free. Return it to the heap
                 // immediately if we're sweeping.
                 //
@@ -247,7 +247,7 @@ func stackpoolfree(x gclinkptr, order uint8) {
                 //
                 // By not freeing, we prevent step #4 until GC is done.
                 stackpool[order].remove(s)
-               s.freelist = 0
+               s.stackfreelist = 0
                 mheap_.freeStack(s)
         }
  }
@@ -1136,9 +1136,9 @@ func freeStackSpans() {
                 list := &stackpool[order]
                 for s := list.first; s != nil; {
                         next := s.next
-                       if s.ref == 0 {
+                       if s.allocCount == 0 {
                                 list.remove(s)
-                               s.freelist = 0
+                               s.stackfreelist = 0
                                 mheap_.freeStack(s)
                         }
                         s = next
author	Rick Hudson <rlh@golang.org>
	Wed, 27 Apr 2016 22:19:16 +0000 (18:19 -0400)
committer	Rick Hudson <rlh@golang.org>
	Wed, 27 Apr 2016 22:46:52 +0000 (18:46 -0400)
dev.garbage	[new file with mode: 0644]	patch \| blob
src/runtime/heapdump.go		patch \| blob \| history
src/runtime/malloc.go		patch \| blob \| history
src/runtime/mbitmap.go		patch \| blob \| history
src/runtime/mcache.go		patch \| blob \| history
src/runtime/mcentral.go		patch \| blob \| history
src/runtime/mgcmark.go		patch \| blob \| history
src/runtime/mgcsweep.go		patch \| blob \| history
src/runtime/mgcwork.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/msize.go		patch \| blob \| history
src/runtime/mstats.go		patch \| blob \| history
src/runtime/stack.go		patch \| blob \| history