1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 flagNoScan = _FlagNoScan
13 flagNoZero = _FlagNoZero
15 maxTinySize = _TinySize
16 tinySizeClass = _TinySizeClass
17 maxSmallSize = _MaxSmallSize
19 pageShift = _PageShift
23 bitsPerPointer = _BitsPerPointer
25 pointersPerByte = _PointersPerByte
26 maxGCMask = _MaxGCMask
28 bitsPointer = _BitsPointer
29 bitsScalar = _BitsScalar
31 mSpanInUse = _MSpanInUse
33 concurrentSweep = _ConcurrentSweep
36 // Page number (address>>pageShift)
39 // base address for all 0-byte allocations
42 // Allocate an object of size bytes.
43 // Small objects are allocated from the per-P cache's free lists.
44 // Large objects (> 32 kB) are allocated straight from the heap.
45 func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
47 return unsafe.Pointer(&zerobase)
51 if flags&flagNoScan == 0 && typ == nil {
52 gothrow("malloc missing type")
55 // This function must be atomic wrt GC, but for performance reasons
56 // we don't acquirem/releasem on fast path. The code below does not have
57 // split stack checks, so it can't be preempted by GC.
58 // Functions like roundup/add are inlined. And systemstack/racemalloc are nosplit.
59 // If debugMalloc = true, these assumptions are checked below.
62 if mp.mallocing != 0 {
63 gothrow("malloc deadlock")
67 mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
74 if size <= maxSmallSize {
75 if flags&flagNoScan != 0 && size < maxTinySize {
78 // Tiny allocator combines several tiny allocation requests
79 // into a single memory block. The resulting memory block
80 // is freed when all subobjects are unreachable. The subobjects
81 // must be FlagNoScan (don't have pointers), this ensures that
82 // the amount of potentially wasted memory is bounded.
84 // Size of the memory block used for combining (maxTinySize) is tunable.
85 // Current setting is 16 bytes, which relates to 2x worst case memory
86 // wastage (when all but one subobjects are unreachable).
87 // 8 bytes would result in no wastage at all, but provides less
88 // opportunities for combining.
89 // 32 bytes provides more opportunities for combining,
90 // but can lead to 4x worst case wastage.
91 // The best case winning is 8x regardless of block size.
93 // Objects obtained from tiny allocator must not be freed explicitly.
94 // So when an object will be freed explicitly, we ensure that
95 // its size >= maxTinySize.
97 // SetFinalizer has a special case for objects potentially coming
98 // from tiny allocator, it such case it allows to set finalizers
99 // for an inner byte of a memory block.
101 // The main targets of tiny allocator are small strings and
102 // standalone escaping variables. On a json benchmark
103 // the allocator reduces number of allocations by ~12% and
104 // reduces heap size by ~20%.
105 tinysize := uintptr(c.tinysize)
106 if size <= tinysize {
107 tiny := unsafe.Pointer(c.tiny)
108 // Align tiny pointer for required (conservative) alignment.
110 tiny = roundup(tiny, 8)
111 } else if size&3 == 0 {
112 tiny = roundup(tiny, 4)
113 } else if size&1 == 0 {
114 tiny = roundup(tiny, 2)
116 size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny)))
117 if size1 <= tinysize {
118 // The object fits into existing tiny block.
120 c.tiny = (*byte)(add(x, size))
121 c.tinysize -= uintptr(size1)
125 if mp.mallocing == 0 {
126 gothrow("bad malloc")
130 mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
132 // Note: one releasem for the acquirem just above.
133 // The other for the acquirem at start of malloc.
140 // Allocate a new maxTinySize block.
141 s = c.alloc[tinySizeClass]
145 mCache_Refill(c, tinySizeClass)
147 s = c.alloc[tinySizeClass]
152 //TODO: prefetch v.next
153 x = unsafe.Pointer(v)
154 (*[2]uint64)(x)[0] = 0
155 (*[2]uint64)(x)[1] = 0
156 // See if we need to replace the existing tiny block with the new one
157 // based on amount of remaining free space.
158 if maxTinySize-size > tinysize {
159 c.tiny = (*byte)(add(x, size))
160 c.tinysize = uintptr(maxTinySize - size)
166 sizeclass = size_to_class8[(size+7)>>3]
168 sizeclass = size_to_class128[(size-1024+127)>>7]
170 size = uintptr(class_to_size[sizeclass])
171 s = c.alloc[sizeclass]
175 mCache_Refill(c, int32(sizeclass))
177 s = c.alloc[sizeclass]
183 x = unsafe.Pointer(v)
184 if flags&flagNoZero == 0 {
186 if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
187 memclr(unsafe.Pointer(v), size)
191 c.local_cachealloc += intptr(size)
195 s = largeAlloc(size, uint32(flags))
197 x = unsafe.Pointer(uintptr(s.start << pageShift))
198 size = uintptr(s.elemsize)
201 if flags&flagNoScan != 0 {
202 // All objects are pre-marked as noscan.
206 // If allocating a defer+arg block, now that we've picked a malloc size
207 // large enough to hold everything, cut the "asked for" size down to
208 // just the defer header, so that the GC bitmap will record the arg block
209 // as containing nothing at all (as if it were unused space at the end of
210 // a malloc block caused by size rounding).
211 // The defer arg areas are scanned as part of scanstack.
212 if typ == deferType {
213 size0 = unsafe.Sizeof(_defer{})
216 // From here till marked label marking the object as allocated
217 // and storing type info in the GC bitmap.
219 arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
220 off := (uintptr(x) - arena_start) / ptrSize
221 xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
222 shift := (off % wordsPerBitmapByte) * gcBits
223 if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
224 println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
225 gothrow("bad bits in markallocated")
231 // It's one word and it has pointers, it must be a pointer.
232 *xbits |= (bitsPointer << 2) << shift
235 if typ.kind&kindGCProg != 0 {
236 nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
239 masksize *= 2 // repeated
241 masksize = masksize * pointersPerByte / 8 // 4 bits per word
242 masksize++ // unroll flag in the beginning
243 if masksize > maxGCMask && typ.gc[1] != 0 {
244 // write barriers have not been updated to deal with this case yet.
245 gothrow("maxGCMask too small for now")
246 // If the mask is too large, unroll the program directly
247 // into the GC bitmap. It's 7 times slower than copying
248 // from the pre-unrolled mask, but saves 1/16 of type size
249 // memory for the mask.
251 unrollgcproginplace_m(x, typ, size, size0)
255 ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
256 // Check whether the program is already unrolled
257 // by checking if the unroll flag byte is set
258 maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
259 if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
264 ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
266 ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
268 if size == 2*ptrSize {
269 *xbits = *ptrmask | bitBoundary
272 te = uintptr(typ.size) / ptrSize
273 // If the type occupies odd number of words, its mask is repeated.
277 // Copy pointer bitmask into the bitmap.
278 for i := uintptr(0); i < size0; i += 2 * ptrSize {
279 v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
287 if i+ptrSize == size0 {
288 v &^= uint8(bitPtrMask << 4)
292 xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
294 if size0%(2*ptrSize) == 0 && size0 < size {
295 // Mark the word after last object's word as bitsDead.
296 *xbits = bitsDead << 2
301 // GCmarkterminate allocates black
302 // All slots hold nil so no scanning is needed.
303 // This may be racing with GC so do it atomically if there can be
304 // a race marking the bit.
305 if gcphase == _GCmarktermination {
307 gcmarknewobject_m(uintptr(x))
317 if mp.mallocing == 0 {
318 gothrow("bad malloc")
322 mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
324 // Note: one releasem for the acquirem just above.
325 // The other for the acquirem at start of malloc.
330 if debug.allocfreetrace != 0 {
331 tracealloc(x, size, typ)
334 if rate := MemProfileRate; rate > 0 {
335 if size < uintptr(rate) && int32(size) < c.next_sample {
336 c.next_sample -= int32(size)
339 profilealloc(mp, x, size)
344 if memstats.heap_alloc >= memstats.next_gc {
351 func loadPtrMask(typ *_type) []uint8 {
353 nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
354 if typ.kind&kindGCProg != 0 {
357 masksize *= 2 // repeated
359 masksize = masksize * pointersPerByte / 8 // 4 bits per word
360 masksize++ // unroll flag in the beginning
361 if masksize > maxGCMask && typ.gc[1] != 0 {
362 // write barriers have not been updated to deal with this case yet.
363 gothrow("maxGCMask too small for now")
365 ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
366 // Check whether the program is already unrolled
367 // by checking if the unroll flag byte is set
368 maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
369 if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
374 ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
376 ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
378 return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
381 // implementation of new builtin
382 func newobject(typ *_type) unsafe.Pointer {
384 if typ.kind&kindNoPointers != 0 {
387 return mallocgc(uintptr(typ.size), typ, flags)
390 // implementation of make builtin for slices
391 func newarray(typ *_type, n uintptr) unsafe.Pointer {
393 if typ.kind&kindNoPointers != 0 {
396 if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
397 panic("runtime: allocation size out of range")
399 return mallocgc(uintptr(typ.size)*n, typ, flags)
402 // rawmem returns a chunk of pointerless memory. It is
404 func rawmem(size uintptr) unsafe.Pointer {
405 return mallocgc(size, nil, flagNoScan|flagNoZero)
408 // round size up to next size class
409 func goroundupsize(size uintptr) uintptr {
410 if size < maxSmallSize {
412 return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
414 return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
416 if size+pageSize < size {
419 return (size + pageSize - 1) &^ pageMask
422 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
424 rate := MemProfileRate
425 if size < uintptr(rate) {
426 // pick next profile time
427 // If you change this, also change allocmcache.
428 if rate > 0x3fffffff { // make 2*rate not overflow
431 next := int32(fastrand1()) % (2 * int32(rate))
432 // Subtract the "remainder" of the current allocation.
433 // Otherwise objects that are close in size to sampling rate
434 // will be under-sampled, because we consistently discard this remainder.
435 next -= (int32(size) - c.next_sample)
442 mProf_Malloc(x, size)
445 // force = 1 - do GC regardless of current heap usage
446 // force = 2 - go GC and eager sweep
447 func gogc(force int32) {
448 // The gc is turned off (via enablegc) until the bootstrap has completed.
449 // Also, malloc gets called in the guts of a number of libraries that might be
450 // holding locks. To avoid deadlocks during stoptheworld, don't bother
451 // trying to run gc while holding a lock. The next mallocgc without a lock
452 // will do the gc instead.
454 if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
461 semacquire(&worldsema, false)
463 if force == 0 && memstats.heap_alloc < memstats.next_gc {
464 // typically threads which lost the race to grab
465 // worldsema exit here when gc is done.
466 semrelease(&worldsema)
470 // Ok, we're doing it! Stop everybody else
471 startTime := nanotime()
476 systemstack(stoptheworld)
477 systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
478 if false { // To turn on concurrent scan and mark set to true...
479 systemstack(starttheworld)
480 // Do a concurrent heap scan before we stop the world.
481 systemstack(gcscan_m)
482 systemstack(stoptheworld)
483 systemstack(gcinstallmarkwb_m)
484 systemstack(starttheworld)
485 systemstack(gcmark_m)
486 systemstack(stoptheworld)
487 systemstack(gcinstalloffwb_m)
490 if mp != acquirem() {
491 gothrow("gogc: rescheduled")
496 // Run gc on the g0 stack. We do this so that the g stack
497 // we're currently running on will no longer change. Cuts
498 // the root set down a bit (g0 stacks are not scanned, and
499 // we don't need to scan gc's internal state). We also
500 // need to switch to g0 so we can shrink the stack.
502 if debug.gctrace > 1 {
505 eagersweep := force >= 2
506 for i := 0; i < n; i++ {
508 startTime = nanotime()
510 // switch to g0, call gc, then switch back
512 gc_m(startTime, eagersweep)
517 gccheckmark_m(startTime, eagersweep)
522 semrelease(&worldsema)
523 systemstack(starttheworld)
527 // now that gc is done, kick off finalizer thread if needed
528 if !concurrentSweep {
529 // give the queued finalizers, if any, a chance to run
534 func GCcheckmarkenable() {
535 systemstack(gccheckmarkenable_m)
538 func GCcheckmarkdisable() {
539 systemstack(gccheckmarkdisable_m)
542 // GC runs a garbage collection.
548 var noptrdata struct{}
549 var enoptrbss struct{}
551 // SetFinalizer sets the finalizer associated with x to f.
552 // When the garbage collector finds an unreachable block
553 // with an associated finalizer, it clears the association and runs
554 // f(x) in a separate goroutine. This makes x reachable again, but
555 // now without an associated finalizer. Assuming that SetFinalizer
556 // is not called again, the next time the garbage collector sees
557 // that x is unreachable, it will free x.
559 // SetFinalizer(x, nil) clears any finalizer associated with x.
561 // The argument x must be a pointer to an object allocated by
562 // calling new or by taking the address of a composite literal.
563 // The argument f must be a function that takes a single argument
564 // to which x's type can be assigned, and can have arbitrary ignored return
565 // values. If either of these is not true, SetFinalizer aborts the
568 // Finalizers are run in dependency order: if A points at B, both have
569 // finalizers, and they are otherwise unreachable, only the finalizer
570 // for A runs; once A is freed, the finalizer for B can run.
571 // If a cyclic structure includes a block with a finalizer, that
572 // cycle is not guaranteed to be garbage collected and the finalizer
573 // is not guaranteed to run, because there is no ordering that
574 // respects the dependencies.
576 // The finalizer for x is scheduled to run at some arbitrary time after
577 // x becomes unreachable.
578 // There is no guarantee that finalizers will run before a program exits,
579 // so typically they are useful only for releasing non-memory resources
580 // associated with an object during a long-running program.
581 // For example, an os.File object could use a finalizer to close the
582 // associated operating system file descriptor when a program discards
583 // an os.File without calling Close, but it would be a mistake
584 // to depend on a finalizer to flush an in-memory I/O buffer such as a
585 // bufio.Writer, because the buffer would not be flushed at program exit.
587 // It is not guaranteed that a finalizer will run if the size of *x is
590 // It is not guaranteed that a finalizer will run for objects allocated
591 // in initializers for package-level variables. Such objects may be
592 // linker-allocated, not heap-allocated.
594 // A single goroutine runs all finalizers for a program, sequentially.
595 // If a finalizer must run for a long time, it should do so by starting
597 func SetFinalizer(obj interface{}, finalizer interface{}) {
598 e := (*eface)(unsafe.Pointer(&obj))
601 gothrow("runtime.SetFinalizer: first argument is nil")
603 if etyp.kind&kindMask != kindPtr {
604 gothrow("runtime.SetFinalizer: first argument is " + *etyp._string + ", not pointer")
606 ot := (*ptrtype)(unsafe.Pointer(etyp))
608 gothrow("nil elem type!")
611 // find the containing object
612 _, base, _ := findObject(e.data)
615 // 0-length objects are okay.
616 if e.data == unsafe.Pointer(&zerobase) {
620 // Global initializers might be linker-allocated.
621 // var Foo = &Object{}
623 // runtime.SetFinalizer(Foo, nil)
625 // The segments are, in order: text, rodata, noptrdata, data, bss, noptrbss.
626 if uintptr(unsafe.Pointer(&noptrdata)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrbss)) {
629 gothrow("runtime.SetFinalizer: pointer not in allocated block")
633 // As an implementation detail we allow to set finalizers for an inner byte
634 // of an object if it could come from tiny alloc (see mallocgc for details).
635 if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
636 gothrow("runtime.SetFinalizer: pointer not at beginning of allocated block")
640 f := (*eface)(unsafe.Pointer(&finalizer))
643 // switch to system stack and remove finalizer
645 removefinalizer(e.data)
650 if ftyp.kind&kindMask != kindFunc {
651 gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function")
653 ft := (*functype)(unsafe.Pointer(ftyp))
654 ins := *(*[]*_type)(unsafe.Pointer(&ft.in))
655 if ft.dotdotdot || len(ins) != 1 {
656 gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
663 case fint.kind&kindMask == kindPtr:
664 if (fint.x == nil || fint.x.name == nil || etyp.x == nil || etyp.x.name == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem {
665 // ok - not same type, but both pointers,
666 // one or the other is unnamed, and same element type, so assignable.
669 case fint.kind&kindMask == kindInterface:
670 ityp := (*interfacetype)(unsafe.Pointer(fint))
671 if len(ityp.mhdr) == 0 {
672 // ok - satisfies empty interface
675 if _, ok := assertE2I2(ityp, obj); ok {
679 gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
681 // compute size needed for return parameters
683 for _, t := range *(*[]*_type)(unsafe.Pointer(&ft.out)) {
684 nret = round(nret, uintptr(t.align)) + uintptr(t.size)
686 nret = round(nret, ptrSize)
688 // make sure we have a finalizer goroutine
692 if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
693 gothrow("runtime.SetFinalizer: finalizer already set")
698 // round n up to a multiple of a. a must be a power of 2.
699 func round(n, a uintptr) uintptr {
700 return (n + a - 1) &^ (a - 1)
703 // Look up pointer v in heap. Return the span containing the object,
704 // the start of the object, and the size of the object. If the object
705 // does not exist, return nil, nil, 0.
706 func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
709 if ptrSize == 4 && c.local_nlookup >= 1<<30 {
710 // purge cache stats to prevent overflow
717 arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
718 arena_used := uintptr(unsafe.Pointer(mheap_.arena_used))
719 if uintptr(v) < arena_start || uintptr(v) >= arena_used {
722 p := uintptr(v) >> pageShift
723 q := p - arena_start>>pageShift
724 s = *(**mspan)(add(unsafe.Pointer(mheap_.spans), q*ptrSize))
728 x = unsafe.Pointer(uintptr(s.start) << pageShift)
730 if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
736 n = uintptr(s.elemsize)
737 if s.sizeclass != 0 {
738 x = add(x, (uintptr(v)-uintptr(x))/n*n)
743 var fingCreate uint32
746 // start the finalizer goroutine exactly once
747 if fingCreate == 0 && cas(&fingCreate, 0, 1) {
752 // This is the goroutine that runs all of the finalizers
768 goparkunlock(&finlock, "finalizer wait")
777 for i := int32(0); i < fb.cnt; i++ {
778 f := (*finalizer)(add(unsafe.Pointer(&fb.fin), uintptr(i)*unsafe.Sizeof(finalizer{})))
780 framesz := unsafe.Sizeof((interface{})(nil)) + uintptr(f.nret)
781 if framecap < framesz {
782 // The frame does not contain pointers interesting for GC,
783 // all not yet finalized objects are stored in finq.
784 // If we do not mark it as FlagNoScan,
785 // the last finalized object is not collected.
786 frame = mallocgc(framesz, nil, flagNoScan)
791 gothrow("missing type in runfinq")
793 switch f.fint.kind & kindMask {
795 // direct use of pointer
796 *(*unsafe.Pointer)(frame) = f.arg
798 ityp := (*interfacetype)(unsafe.Pointer(f.fint))
799 // set up with empty interface
800 (*eface)(frame)._type = &f.ot.typ
801 (*eface)(frame).data = f.arg
802 if len(ityp.mhdr) != 0 {
803 // convert to interface with methods
804 // this conversion is guaranteed to succeed - we checked in SetFinalizer
805 *(*fInterface)(frame) = assertE2I(ityp, *(*interface{})(frame))
808 gothrow("bad kind in runfinq")
810 reflectcall(unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
812 // drop finalizer queue references to finalized object
828 var persistent struct {
834 // Wrapper around sysAlloc that can allocate small chunks.
835 // There is no associated free operation.
836 // Intended for things like function/type/debug-related persistent data.
837 // If align is 0, uses default align (currently 8).
838 func persistentalloc(size, align uintptr, stat *uint64) unsafe.Pointer {
841 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
845 if align&(align-1) != 0 {
846 gothrow("persistentalloc: align is not a power of 2")
848 if align > _PageSize {
849 gothrow("persistentalloc: align is too large")
855 if size >= maxBlock {
856 return sysAlloc(size, stat)
859 lock(&persistent.lock)
860 persistent.pos = roundup(persistent.pos, align)
861 if uintptr(persistent.pos)+size > uintptr(persistent.end) {
862 persistent.pos = sysAlloc(chunk, &memstats.other_sys)
863 if persistent.pos == nil {
864 unlock(&persistent.lock)
865 gothrow("runtime: cannot allocate memory")
867 persistent.end = add(persistent.pos, chunk)
870 persistent.pos = add(persistent.pos, size)
871 unlock(&persistent.lock)
873 if stat != &memstats.other_sys {
874 xadd64(stat, int64(size))
875 xadd64(&memstats.other_sys, -int64(size))