1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 flagNoScan = _FlagNoScan
13 flagNoZero = _FlagNoZero
15 maxTinySize = _TinySize
16 tinySizeClass = _TinySizeClass
17 maxSmallSize = _MaxSmallSize
19 pageShift = _PageShift
23 bitsPerPointer = _BitsPerPointer
25 pointersPerByte = _PointersPerByte
26 maxGCMask = _MaxGCMask
28 bitsPointer = _BitsPointer
30 mSpanInUse = _MSpanInUse
32 concurrentSweep = _ConcurrentSweep != 0
35 // Page number (address>>pageShift)
38 // base address for all 0-byte allocations
41 // Allocate an object of size bytes.
42 // Small objects are allocated from the per-P cache's free lists.
43 // Large objects (> 32 kB) are allocated straight from the heap.
44 func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
46 return unsafe.Pointer(&zerobase)
50 if flags&flagNoScan == 0 && typ == nil {
51 gothrow("malloc missing type")
54 // This function must be atomic wrt GC, but for performance reasons
55 // we don't acquirem/releasem on fast path. The code below does not have
56 // split stack checks, so it can't be preempted by GC.
57 // Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
58 // If debugMalloc = true, these assumptions are checked below.
61 if mp.mallocing != 0 {
62 gothrow("malloc deadlock")
66 mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
73 if size <= maxSmallSize {
74 if flags&flagNoScan != 0 && size < maxTinySize {
77 // Tiny allocator combines several tiny allocation requests
78 // into a single memory block. The resulting memory block
79 // is freed when all subobjects are unreachable. The subobjects
80 // must be FlagNoScan (don't have pointers), this ensures that
81 // the amount of potentially wasted memory is bounded.
83 // Size of the memory block used for combining (maxTinySize) is tunable.
84 // Current setting is 16 bytes, which relates to 2x worst case memory
85 // wastage (when all but one subobjects are unreachable).
86 // 8 bytes would result in no wastage at all, but provides less
87 // opportunities for combining.
88 // 32 bytes provides more opportunities for combining,
89 // but can lead to 4x worst case wastage.
90 // The best case winning is 8x regardless of block size.
92 // Objects obtained from tiny allocator must not be freed explicitly.
93 // So when an object will be freed explicitly, we ensure that
94 // its size >= maxTinySize.
96 // SetFinalizer has a special case for objects potentially coming
97 // from tiny allocator, it such case it allows to set finalizers
98 // for an inner byte of a memory block.
100 // The main targets of tiny allocator are small strings and
101 // standalone escaping variables. On a json benchmark
102 // the allocator reduces number of allocations by ~12% and
103 // reduces heap size by ~20%.
104 tinysize := uintptr(c.tinysize)
105 if size <= tinysize {
106 tiny := unsafe.Pointer(c.tiny)
107 // Align tiny pointer for required (conservative) alignment.
109 tiny = roundup(tiny, 8)
110 } else if size&3 == 0 {
111 tiny = roundup(tiny, 4)
112 } else if size&1 == 0 {
113 tiny = roundup(tiny, 2)
115 size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny)))
116 if size1 <= tinysize {
117 // The object fits into existing tiny block.
119 c.tiny = (*byte)(add(x, size))
120 c.tinysize -= uintptr(size1)
124 if mp.mallocing == 0 {
125 gothrow("bad malloc")
129 mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
131 // Note: one releasem for the acquirem just above.
132 // The other for the acquirem at start of malloc.
139 // Allocate a new maxTinySize block.
140 s = c.alloc[tinySizeClass]
144 mp.scalararg[0] = tinySizeClass
147 s = c.alloc[tinySizeClass]
152 //TODO: prefetch v.next
153 x = unsafe.Pointer(v)
154 (*[2]uint64)(x)[0] = 0
155 (*[2]uint64)(x)[1] = 0
156 // See if we need to replace the existing tiny block with the new one
157 // based on amount of remaining free space.
158 if maxTinySize-size > tinysize {
159 c.tiny = (*byte)(add(x, size))
160 c.tinysize = uintptr(maxTinySize - size)
166 sizeclass = size_to_class8[(size+7)>>3]
168 sizeclass = size_to_class128[(size-1024+127)>>7]
170 size = uintptr(class_to_size[sizeclass])
171 s = c.alloc[sizeclass]
175 mp.scalararg[0] = uintptr(sizeclass)
178 s = c.alloc[sizeclass]
184 x = unsafe.Pointer(v)
185 if flags&flagNoZero == 0 {
187 if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
188 memclr(unsafe.Pointer(v), size)
192 c.local_cachealloc += intptr(size)
195 mp.scalararg[0] = uintptr(size)
196 mp.scalararg[1] = uintptr(flags)
198 s = (*mspan)(mp.ptrarg[0])
201 x = unsafe.Pointer(uintptr(s.start << pageShift))
202 size = uintptr(s.elemsize)
205 if flags&flagNoScan != 0 {
206 // All objects are pre-marked as noscan.
210 // If allocating a defer+arg block, now that we've picked a malloc size
211 // large enough to hold everything, cut the "asked for" size down to
212 // just the defer header, so that the GC bitmap will record the arg block
213 // as containing nothing at all (as if it were unused space at the end of
214 // a malloc block caused by size rounding).
215 // The defer arg areas are scanned as part of scanstack.
216 if typ == deferType {
217 size0 = unsafe.Sizeof(_defer{})
220 // From here till marked label marking the object as allocated
221 // and storing type info in the GC bitmap.
223 arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
224 off := (uintptr(x) - arena_start) / ptrSize
225 xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
226 shift := (off % wordsPerBitmapByte) * gcBits
227 if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
228 println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
229 gothrow("bad bits in markallocated")
235 // It's one word and it has pointers, it must be a pointer.
236 *xbits |= (bitsPointer << 2) << shift
239 if typ.kind&kindGCProg != 0 {
240 nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
243 masksize *= 2 // repeated
245 masksize = masksize * pointersPerByte / 8 // 4 bits per word
246 masksize++ // unroll flag in the beginning
247 if masksize > maxGCMask && typ.gc[1] != 0 {
248 // write barriers have not been updated to deal with this case yet.
249 gothrow("maxGCMask too small for now")
250 // If the mask is too large, unroll the program directly
251 // into the GC bitmap. It's 7 times slower than copying
252 // from the pre-unrolled mask, but saves 1/16 of type size
253 // memory for the mask.
256 mp.ptrarg[1] = unsafe.Pointer(typ)
257 mp.scalararg[0] = uintptr(size)
258 mp.scalararg[1] = uintptr(size0)
259 onM(unrollgcproginplace_m)
263 ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
264 // Check whether the program is already unrolled
265 // by checking if the unroll flag byte is set
266 maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
267 if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
269 mp.ptrarg[0] = unsafe.Pointer(typ)
273 ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
275 ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
277 if size == 2*ptrSize {
278 *xbits = *ptrmask | bitBoundary
281 te = uintptr(typ.size) / ptrSize
282 // If the type occupies odd number of words, its mask is repeated.
286 // Copy pointer bitmask into the bitmap.
287 for i := uintptr(0); i < size0; i += 2 * ptrSize {
288 v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
296 if i+ptrSize == size0 {
297 v &^= uint8(bitPtrMask << 4)
301 xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
303 if size0%(2*ptrSize) == 0 && size0 < size {
304 // Mark the word after last object's word as bitsDead.
305 *xbits = bitsDead << 2
310 // GCmarkterminate allocates black
311 // All slots hold nil so no scanning is needed.
312 // This may be racing with GC so do it atomically if there can be
313 // a race marking the bit.
314 if gcphase == _GCmarktermination {
317 onM(gcmarknewobject_m)
327 if mp.mallocing == 0 {
328 gothrow("bad malloc")
332 mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
334 // Note: one releasem for the acquirem just above.
335 // The other for the acquirem at start of malloc.
340 if debug.allocfreetrace != 0 {
341 tracealloc(x, size, typ)
344 if rate := MemProfileRate; rate > 0 {
345 if size < uintptr(rate) && int32(size) < c.next_sample {
346 c.next_sample -= int32(size)
349 profilealloc(mp, x, size)
354 if memstats.heap_alloc >= memstats.next_gc {
361 func loadPtrMask(typ *_type) []uint8 {
363 nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
364 if typ.kind&kindGCProg != 0 {
367 masksize *= 2 // repeated
369 masksize = masksize * pointersPerByte / 8 // 4 bits per word
370 masksize++ // unroll flag in the beginning
371 if masksize > maxGCMask && typ.gc[1] != 0 {
372 // write barriers have not been updated to deal with this case yet.
373 gothrow("maxGCMask too small for now")
375 ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
376 // Check whether the program is already unrolled
377 // by checking if the unroll flag byte is set
378 maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
379 if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
381 mp.ptrarg[0] = unsafe.Pointer(typ)
385 ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
387 ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
389 return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
392 // implementation of new builtin
393 func newobject(typ *_type) unsafe.Pointer {
395 if typ.kind&kindNoPointers != 0 {
398 return mallocgc(uintptr(typ.size), typ, flags)
401 // implementation of make builtin for slices
402 func newarray(typ *_type, n uintptr) unsafe.Pointer {
404 if typ.kind&kindNoPointers != 0 {
407 if int(n) < 0 || (typ.size > 0 && n > maxmem/uintptr(typ.size)) {
408 panic("runtime: allocation size out of range")
410 return mallocgc(uintptr(typ.size)*n, typ, flags)
413 // rawmem returns a chunk of pointerless memory. It is
415 func rawmem(size uintptr) unsafe.Pointer {
416 return mallocgc(size, nil, flagNoScan|flagNoZero)
419 // round size up to next size class
420 func goroundupsize(size uintptr) uintptr {
421 if size < maxSmallSize {
423 return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
425 return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
427 if size+pageSize < size {
430 return (size + pageSize - 1) &^ pageMask
433 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
435 rate := MemProfileRate
436 if size < uintptr(rate) {
437 // pick next profile time
438 // If you change this, also change allocmcache.
439 if rate > 0x3fffffff { // make 2*rate not overflow
442 next := int32(fastrand1()) % (2 * int32(rate))
443 // Subtract the "remainder" of the current allocation.
444 // Otherwise objects that are close in size to sampling rate
445 // will be under-sampled, because we consistently discard this remainder.
446 next -= (int32(size) - c.next_sample)
453 mProf_Malloc(x, size)
456 // force = 1 - do GC regardless of current heap usage
457 // force = 2 - go GC and eager sweep
458 func gogc(force int32) {
459 // The gc is turned off (via enablegc) until the bootstrap has completed.
460 // Also, malloc gets called in the guts of a number of libraries that might be
461 // holding locks. To avoid deadlocks during stoptheworld, don't bother
462 // trying to run gc while holding a lock. The next mallocgc without a lock
463 // will do the gc instead.
465 if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
472 semacquire(&worldsema, false)
474 if force == 0 && memstats.heap_alloc < memstats.next_gc {
475 // typically threads which lost the race to grab
476 // worldsema exit here when gc is done.
477 semrelease(&worldsema)
481 // Ok, we're doing it! Stop everybody else
482 startTime := nanotime()
488 onM(finishsweep_m) // finish sweep before we start concurrent scan.
489 if false { // To turn on concurrent scan and mark set to true...
491 // Do a concurrent heap scan before we stop the world.
494 onM(gcinstallmarkwb_m)
498 onM(gcinstalloffwb_m)
500 if mp != acquirem() {
501 gothrow("gogc: rescheduled")
506 // Run gc on the g0 stack. We do this so that the g stack
507 // we're currently running on will no longer change. Cuts
508 // the root set down a bit (g0 stacks are not scanned, and
509 // we don't need to scan gc's internal state). We also
510 // need to switch to g0 so we can shrink the stack.
512 if debug.gctrace > 1 {
515 for i := 0; i < n; i++ {
517 startTime = nanotime()
519 // switch to g0, call gc, then switch back
520 mp.scalararg[0] = uintptr(uint32(startTime)) // low 32 bits
521 mp.scalararg[1] = uintptr(startTime >> 32) // high 32 bits
523 mp.scalararg[2] = 1 // eagersweep
534 semrelease(&worldsema)
539 // now that gc is done, kick off finalizer thread if needed
540 if !concurrentSweep {
541 // give the queued finalizers, if any, a chance to run
546 func GCcheckmarkenable() {
547 onM(gccheckmarkenable_m)
550 func GCcheckmarkdisable() {
551 onM(gccheckmarkdisable_m)
554 // GC runs a garbage collection.
560 var noptrdata struct{}
561 var enoptrbss struct{}
563 // SetFinalizer sets the finalizer associated with x to f.
564 // When the garbage collector finds an unreachable block
565 // with an associated finalizer, it clears the association and runs
566 // f(x) in a separate goroutine. This makes x reachable again, but
567 // now without an associated finalizer. Assuming that SetFinalizer
568 // is not called again, the next time the garbage collector sees
569 // that x is unreachable, it will free x.
571 // SetFinalizer(x, nil) clears any finalizer associated with x.
573 // The argument x must be a pointer to an object allocated by
574 // calling new or by taking the address of a composite literal.
575 // The argument f must be a function that takes a single argument
576 // to which x's type can be assigned, and can have arbitrary ignored return
577 // values. If either of these is not true, SetFinalizer aborts the
580 // Finalizers are run in dependency order: if A points at B, both have
581 // finalizers, and they are otherwise unreachable, only the finalizer
582 // for A runs; once A is freed, the finalizer for B can run.
583 // If a cyclic structure includes a block with a finalizer, that
584 // cycle is not guaranteed to be garbage collected and the finalizer
585 // is not guaranteed to run, because there is no ordering that
586 // respects the dependencies.
588 // The finalizer for x is scheduled to run at some arbitrary time after
589 // x becomes unreachable.
590 // There is no guarantee that finalizers will run before a program exits,
591 // so typically they are useful only for releasing non-memory resources
592 // associated with an object during a long-running program.
593 // For example, an os.File object could use a finalizer to close the
594 // associated operating system file descriptor when a program discards
595 // an os.File without calling Close, but it would be a mistake
596 // to depend on a finalizer to flush an in-memory I/O buffer such as a
597 // bufio.Writer, because the buffer would not be flushed at program exit.
599 // It is not guaranteed that a finalizer will run if the size of *x is
602 // It is not guaranteed that a finalizer will run for objects allocated
603 // in initializers for package-level variables. Such objects may be
604 // linker-allocated, not heap-allocated.
606 // A single goroutine runs all finalizers for a program, sequentially.
607 // If a finalizer must run for a long time, it should do so by starting
609 func SetFinalizer(obj interface{}, finalizer interface{}) {
610 e := (*eface)(unsafe.Pointer(&obj))
613 gothrow("runtime.SetFinalizer: first argument is nil")
615 if etyp.kind&kindMask != kindPtr {
616 gothrow("runtime.SetFinalizer: first argument is " + *etyp._string + ", not pointer")
618 ot := (*ptrtype)(unsafe.Pointer(etyp))
620 gothrow("nil elem type!")
623 // find the containing object
624 _, base, _ := findObject(e.data)
627 // 0-length objects are okay.
628 if e.data == unsafe.Pointer(&zerobase) {
632 // Global initializers might be linker-allocated.
633 // var Foo = &Object{}
635 // runtime.SetFinalizer(Foo, nil)
637 // The segments are, in order: text, rodata, noptrdata, data, bss, noptrbss.
638 if uintptr(unsafe.Pointer(&noptrdata)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrbss)) {
641 gothrow("runtime.SetFinalizer: pointer not in allocated block")
645 // As an implementation detail we allow to set finalizers for an inner byte
646 // of an object if it could come from tiny alloc (see mallocgc for details).
647 if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
648 gothrow("runtime.SetFinalizer: pointer not at beginning of allocated block")
652 f := (*eface)(unsafe.Pointer(&finalizer))
655 // switch to M stack and remove finalizer
657 mp.ptrarg[0] = e.data
658 onM(removeFinalizer_m)
663 if ftyp.kind&kindMask != kindFunc {
664 gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function")
666 ft := (*functype)(unsafe.Pointer(ftyp))
667 ins := *(*[]*_type)(unsafe.Pointer(&ft.in))
668 if ft.dotdotdot || len(ins) != 1 {
669 gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
676 case fint.kind&kindMask == kindPtr:
677 if (fint.x == nil || fint.x.name == nil || etyp.x == nil || etyp.x.name == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem {
678 // ok - not same type, but both pointers,
679 // one or the other is unnamed, and same element type, so assignable.
682 case fint.kind&kindMask == kindInterface:
683 ityp := (*interfacetype)(unsafe.Pointer(fint))
684 if len(ityp.mhdr) == 0 {
685 // ok - satisfies empty interface
688 if _, ok := assertE2I2(ityp, obj); ok {
692 gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
694 // compute size needed for return parameters
696 for _, t := range *(*[]*_type)(unsafe.Pointer(&ft.out)) {
697 nret = round(nret, uintptr(t.align)) + uintptr(t.size)
699 nret = round(nret, ptrSize)
701 // make sure we have a finalizer goroutine
704 // switch to M stack to add finalizer record
706 mp.ptrarg[0] = f.data
707 mp.ptrarg[1] = e.data
708 mp.scalararg[0] = nret
709 mp.ptrarg[2] = unsafe.Pointer(fint)
710 mp.ptrarg[3] = unsafe.Pointer(ot)
712 if mp.scalararg[0] != 1 {
713 gothrow("runtime.SetFinalizer: finalizer already set")
718 // round n up to a multiple of a. a must be a power of 2.
719 func round(n, a uintptr) uintptr {
720 return (n + a - 1) &^ (a - 1)
723 // Look up pointer v in heap. Return the span containing the object,
724 // the start of the object, and the size of the object. If the object
725 // does not exist, return nil, nil, 0.
726 func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
729 if ptrSize == 4 && c.local_nlookup >= 1<<30 {
730 // purge cache stats to prevent overflow
737 arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
738 arena_used := uintptr(unsafe.Pointer(mheap_.arena_used))
739 if uintptr(v) < arena_start || uintptr(v) >= arena_used {
742 p := uintptr(v) >> pageShift
743 q := p - arena_start>>pageShift
744 s = *(**mspan)(add(unsafe.Pointer(mheap_.spans), q*ptrSize))
748 x = unsafe.Pointer(uintptr(s.start) << pageShift)
750 if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
756 n = uintptr(s.elemsize)
757 if s.sizeclass != 0 {
758 x = add(x, (uintptr(v)-uintptr(x))/n*n)
763 var fingCreate uint32
766 // start the finalizer goroutine exactly once
767 if fingCreate == 0 && cas(&fingCreate, 0, 1) {
772 // This is the goroutine that runs all of the finalizers
788 goparkunlock(&finlock, "finalizer wait")
797 for i := int32(0); i < fb.cnt; i++ {
798 f := (*finalizer)(add(unsafe.Pointer(&fb.fin), uintptr(i)*unsafe.Sizeof(finalizer{})))
800 framesz := unsafe.Sizeof((interface{})(nil)) + uintptr(f.nret)
801 if framecap < framesz {
802 // The frame does not contain pointers interesting for GC,
803 // all not yet finalized objects are stored in finq.
804 // If we do not mark it as FlagNoScan,
805 // the last finalized object is not collected.
806 frame = mallocgc(framesz, nil, flagNoScan)
811 gothrow("missing type in runfinq")
813 switch f.fint.kind & kindMask {
815 // direct use of pointer
816 *(*unsafe.Pointer)(frame) = f.arg
818 ityp := (*interfacetype)(unsafe.Pointer(f.fint))
819 // set up with empty interface
820 (*eface)(frame)._type = &f.ot.typ
821 (*eface)(frame).data = f.arg
822 if len(ityp.mhdr) != 0 {
823 // convert to interface with methods
824 // this conversion is guaranteed to succeed - we checked in SetFinalizer
825 *(*fInterface)(frame) = assertE2I(ityp, *(*interface{})(frame))
828 gothrow("bad kind in runfinq")
830 reflectcall(unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
832 // drop finalizer queue references to finalized object
848 var persistent struct {
854 // Wrapper around sysAlloc that can allocate small chunks.
855 // There is no associated free operation.
856 // Intended for things like function/type/debug-related persistent data.
857 // If align is 0, uses default align (currently 8).
858 func persistentalloc(size, align uintptr, stat *uint64) unsafe.Pointer {
861 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
865 if align&(align-1) != 0 {
866 gothrow("persistentalloc: align is not a power of 2")
868 if align > _PageSize {
869 gothrow("persistentalloc: align is too large")
875 if size >= maxBlock {
876 return sysAlloc(size, stat)
879 lock(&persistent.lock)
880 persistent.pos = roundup(persistent.pos, align)
881 if uintptr(persistent.pos)+size > uintptr(persistent.end) {
882 persistent.pos = sysAlloc(chunk, &memstats.other_sys)
883 if persistent.pos == nil {
884 unlock(&persistent.lock)
885 gothrow("runtime: cannot allocate memory")
887 persistent.end = add(persistent.pos, chunk)
890 persistent.pos = add(persistent.pos, size)
891 unlock(&persistent.lock)
893 if stat != &memstats.other_sys {
894 xadd64(stat, int64(size))
895 xadd64(&memstats.other_sys, -int64(size))