1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Garbage collector (GC).
9 // - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
10 // - parallel (up to MaxGcproc threads)
11 // - partially concurrent (mark is stop-the-world, while sweep is concurrent)
12 // - non-moving/non-compacting
13 // - full (non-partial)
16 // Next GC is after we've allocated an extra amount of memory proportional to
17 // the amount already in use. The proportion is controlled by GOGC environment variable
18 // (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
19 // (this mark is tracked in next_gc variable). This keeps the GC cost in linear
20 // proportion to the allocation cost. Adjusting GOGC just changes the linear constant
21 // (and also the amount of extra memory used).
24 // The sweep phase proceeds concurrently with normal program execution.
25 // The heap is swept span-by-span both lazily (when a goroutine needs another span)
26 // and concurrently in a background goroutine (this helps programs that are not CPU bound).
27 // However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
28 // and so next_gc calculation is tricky and happens as follows.
29 // At the end of the stop-the-world phase next_gc is conservatively set based on total
30 // heap size; all spans are marked as "needs sweeping".
31 // Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
32 // The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
33 // closer to the target value. However, this is not enough to avoid over-allocating memory.
34 // Consider that a goroutine wants to allocate a new span for a large object and
35 // there are no free swept spans, but there are small-object unswept spans.
36 // If the goroutine naively allocates a new span, it can surpass the yet-unknown
37 // target next_gc value. In order to prevent such cases (1) when a goroutine needs
38 // to allocate a new small-object span, it sweeps small-object spans for the same
39 // object size until it frees at least one object; (2) when a goroutine needs to
40 // allocate large-object span from heap, it sweeps spans until it frees at least
41 // that many pages into heap. Together these two measures ensure that we don't surpass
42 // target next_gc value by a large margin. There is an exception: if a goroutine sweeps
43 // and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
44 // but there can still be other one-page unswept spans which could be combined into a two-page span.
45 // It's critical to ensure that no operations proceed on unswept spans (that would corrupt
46 // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
47 // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
48 // When a goroutine explicitly frees an object or sets a finalizer, it ensures that
49 // the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
50 // The finalizer goroutine is kicked off only when all spans are swept.
51 // When the next GC starts, it sweeps all not-yet-swept spans (if any).
54 #include "arch_GOARCH.h"
67 DebugPtrs = 0, // if 1, print trace of every pointer load during GC
71 FinBlockSize = 4*1024,
80 // ptrmask for an allocation containing a single pointer.
81 static byte oneptr[] = {BitsPointer};
83 // Initialized from $GOGC. GOGC=off means no gc.
84 extern int32 runtime·gcpercent;
86 // Holding worldsema grants an M the right to try to stop the world.
89 // runtime·semacquire(&runtime·worldsema);
91 // runtime·stoptheworld();
96 // runtime·semrelease(&runtime·worldsema);
97 // runtime·starttheworld();
99 uint32 runtime·worldsema = 1;
101 typedef struct Workbuf Workbuf;
104 LFNode node; // must be first
106 byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
109 extern byte runtime·data[];
110 extern byte runtime·edata[];
111 extern byte runtime·bss[];
112 extern byte runtime·ebss[];
114 extern byte runtime·gcdata[];
115 extern byte runtime·gcbss[];
117 Mutex runtime·finlock; // protects the following variables
118 G* runtime·fing; // goroutine that runs finalizers
119 FinBlock* runtime·finq; // list of finalizers that are to be executed
120 FinBlock* runtime·finc; // cache of free blocks
121 static byte finptrmask[FinBlockSize/PtrSize/PointersPerByte];
122 bool runtime·fingwait;
123 bool runtime·fingwake;
124 FinBlock *runtime·allfin; // list of all blocks
126 BitVector runtime·gcdatamask;
127 BitVector runtime·gcbssmask;
129 Mutex runtime·gclock;
131 static uintptr badblock[1024];
132 static int32 nbadblock;
134 static Workbuf* getempty(Workbuf*);
135 static Workbuf* getfull(Workbuf*);
136 static void putempty(Workbuf*);
137 static Workbuf* handoff(Workbuf*);
138 static void gchelperstart(void);
139 static void flushallmcaches(void);
140 static bool scanframe(Stkframe *frame, void *unused);
141 static void scanstack(G *gp);
142 static BitVector unrollglobgcprog(byte *prog, uintptr size);
144 void runtime·bgsweep(void);
145 static FuncVal bgsweepv = {runtime·bgsweep};
147 typedef struct WorkData WorkData;
149 uint64 full; // lock-free list of full blocks
150 uint64 empty; // lock-free list of empty blocks
151 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
154 volatile uint32 nwait;
155 volatile uint32 ndone;
159 // Copy of mheap.allspans for marker or sweeper.
163 WorkData runtime·work;
165 // Is _cgo_allocate linked into the binary?
167 have_cgo_allocate(void)
169 extern byte go·weak·runtime·_cgo_allocate_internal[1];
170 return go·weak·runtime·_cgo_allocate_internal != nil;
173 // scanblock scans a block of n bytes starting at pointer b for references
174 // to other objects, scanning any it finds recursively until there are no
175 // unscanned objects left. Instead of using an explicit recursion, it keeps
176 // a work list in the Workbuf* structures and loops in the main function
177 // body. Keeping an explicit work list is easier on the stack allocator and
180 scanblock(byte *b, uintptr n, byte *ptrmask)
182 byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
183 uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
192 // Cache memory arena parameters in local vars.
193 arena_start = runtime·mheap.arena_start;
194 arena_used = runtime·mheap.arena_used;
196 wbuf = getempty(nil);
198 wp = &wbuf->obj[nobj];
199 keepworking = b == nil;
201 for(i = 0; i < nelem(scanbuf); i++)
206 // ptrmask can have 2 possible values:
207 // 1. nil - obtain pointer mask from GC bitmap.
208 // 2. pointer to a compact mask (for stacks and data).
213 // Out of work in workbuf.
214 // First, see is there is any work in scanbuf.
215 for(i = 0; i < nelem(scanbuf); i++) {
216 b = scanbuf[scanbufpos];
217 scanbuf[scanbufpos++] = nil;
218 scanbufpos %= nelem(scanbuf);
220 n = arena_used - b; // scan until bitBoundary or BitsDead
221 ptrmask = nil; // use GC bitmap for pointer info
229 // Refill workbuf from global queue.
230 wbuf = getfull(wbuf);
234 wp = &wbuf->obj[nobj];
237 // If another proc wants a pointer, give it some.
238 if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
240 wbuf = handoff(wbuf);
242 wp = &wbuf->obj[nobj];
248 n = arena_used - b; // scan until next bitBoundary or BitsDead
249 ptrmask = nil; // use GC bitmap for pointer info
253 runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
254 // Find bits of the beginning of the object.
256 off = (uintptr*)b - (uintptr*)arena_start;
257 ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
259 for(i = 0; i < n; i += PtrSize) {
261 // Find bits for this word.
263 // Check is we have reached end of span.
264 if((((uintptr)b+i)%PageSize) == 0 &&
265 runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
267 // Consult GC bitmap.
270 if(wordsPerBitmapByte != 2)
271 runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
272 j = ((uintptr)b+i)/PtrSize & 1;
276 if((bits&bitBoundary) != 0 && i != 0)
277 break; // reached beginning of the next object
278 bits = (bits>>2)&BitsMask;
280 break; // reached no-scan part of the object
281 } else // dense mask (stack or data)
282 bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
284 if(bits <= BitsScalar) // BitsScalar || BitsDead
286 if(bits == BitsPointer) {
287 obj = *(byte**)(b+i);
292 // With those three out of the way, must be multi-word.
293 if(Debug && bits != BitsMultiWord)
294 runtime·throw("unexpected garbage collection bits");
295 // Find the next pair of bits.
298 j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
301 bits = (bits>>2)&BitsMask;
303 bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
305 if(Debug && bits != BitsIface && bits != BitsEface)
306 runtime·throw("unexpected garbage collection bits");
308 if(bits == BitsIface) {
309 iface = (Iface*)(b+i);
310 if(iface->tab != nil) {
311 typ = iface->tab->type;
312 if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
316 eface = (Eface*)(b+i);
319 if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
328 // At this point we have extracted the next potential pointer.
329 // Check if it points into heap.
332 if(obj < arena_start || obj >= arena_used) {
333 if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
340 obj = (byte*)((uintptr)obj & ~(PtrSize-1));
341 off = (uintptr*)obj - (uintptr*)arena_start;
342 bitp = arena_start - off/wordsPerBitmapByte - 1;
343 shift = (off % wordsPerBitmapByte) * gcBits;
345 bits = (xbits >> shift) & bitMask;
346 if((bits&bitBoundary) == 0) {
347 // Not a beginning of a block, consult span table to find the block beginning.
348 k = (uintptr)obj>>PageShift;
350 x -= (uintptr)arena_start>>PageShift;
351 s = runtime·mheap.spans[x];
352 if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) {
353 // Stack pointers lie within the arena bounds but are not part of the GC heap.
355 if(s != nil && s->state == MSpanStack)
359 // If cgo_allocate is linked into the binary, it can allocate
360 // memory as []unsafe.Pointer that may not contain actual
361 // pointers and must be scanned conservatively.
362 // In this case alone, allow the bad pointer.
363 if(have_cgo_allocate() && ptrmask == nil)
366 // Anything else indicates a bug somewhere.
367 // If we're in the middle of chasing down a different bad pointer,
368 // don't confuse the trace by printing about this one.
372 runtime·printf("runtime: garbage collector found invalid heap pointer *(%p+%p)=%p", b, i, obj);
374 runtime·printf(" s=nil\n");
376 runtime·printf(" span=%p-%p-%p state=%d\n", (uintptr)s->start<<PageShift, s->limit, (uintptr)(s->start+s->npages)<<PageShift, s->state);
378 runtime·throw("invalid heap pointer");
379 // Add to badblock list, which will cause the garbage collection
380 // to keep repeating until it has traced the chain of pointers
381 // leading to obj all the way back to a root.
383 badblock[nbadblock++] = (uintptr)b;
386 p = (byte*)((uintptr)s->start<<PageShift);
387 if(s->sizeclass != 0) {
389 idx = ((byte*)obj - p)/size;
393 runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
394 p, s->start*PageSize, s->limit);
395 runtime·throw("failed to find block beginning");
401 runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
403 if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
404 // Running garbage collection again because
405 // we want to find the path from a root to a bad pointer.
406 // Found possible next step; extend or finish path.
407 for(j=0; j<nbadblock; j++)
408 if(badblock[j] == (uintptr)b)
410 runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
412 runtime·throw("bad pointer");
413 if(nbadblock >= nelem(badblock))
414 runtime·throw("badblock trace too long");
415 badblock[nbadblock++] = (uintptr)b;
419 // Now we have bits, bitp, and shift correct for
420 // obj pointing at the base of the object.
421 // Only care about not marked objects.
422 if((bits&bitMarked) != 0)
424 // If obj size is greater than 8, then each byte of GC bitmap
425 // contains info for at most one object. In such case we use
426 // non-atomic byte store to mark the object. This can lead
427 // to double enqueue of the object for scanning, but scanning
428 // is an idempotent operation, so it is OK. This cannot lead
429 // to bitmap corruption because the single marked bit is the
430 // only thing that can change in the byte.
431 // For 8-byte objects we use non-atomic store, if the other
432 // quadruple is already marked. Otherwise we resort to CAS
434 if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
435 runtime·work.nproc == 1)
436 *bitp = xbits | (bitMarked<<shift);
438 runtime·atomicor8(bitp, bitMarked<<shift);
440 if(((xbits>>(shift+2))&BitsMask) == BitsDead)
441 continue; // noscan object
443 // Queue the obj for scanning.
445 p = scanbuf[scanbufpos];
446 scanbuf[scanbufpos++] = obj;
447 scanbufpos %= nelem(scanbuf);
451 // If workbuf is full, obtain an empty one.
452 if(nobj >= nelem(wbuf->obj)) {
454 wbuf = getempty(wbuf);
456 wp = &wbuf->obj[nobj];
463 runtime·printf("end scanblock %p +%p %p\n", b, n, ptrmask);
465 if(Debug && ptrmask == nil) {
466 // For heap objects ensure that we did not overscan.
469 if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) {
470 runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n);
471 runtime·throw("scanblock: scanned invalid object");
478 markroot(ParFor *desc, uint32 i)
489 // Note: if you add a case here, please also update heapdump.c:dumproots.
492 scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
496 scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
500 for(fb=runtime·allfin; fb; fb=fb->alllink)
501 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
505 // mark MSpan.specials
506 sg = runtime·mheap.sweepgen;
507 for(spanidx=0; spanidx<runtime·work.nspan; spanidx++) {
509 SpecialFinalizer *spf;
511 s = runtime·work.spans[spanidx];
512 if(s->state != MSpanInUse)
514 if(s->sweepgen != sg) {
515 runtime·printf("sweep %d %d\n", s->sweepgen, sg);
516 runtime·throw("gc: unswept span");
518 for(sp = s->specials; sp != nil; sp = sp->next) {
519 if(sp->kind != KindSpecialFinalizer)
521 // don't mark finalized object, but scan it so we
522 // retain everything it points to.
523 spf = (SpecialFinalizer*)sp;
524 // A finalizer can be set for an inner byte of an object, find object beginning.
525 p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
526 scanblock(p, s->elemsize, nil);
527 scanblock((void*)&spf->fn, PtrSize, oneptr);
532 case RootFlushCaches:
537 // the rest is scanning goroutine stacks
538 if(i - RootCount >= runtime·allglen)
539 runtime·throw("markroot: bad index");
540 gp = runtime·allg[i - RootCount];
541 // remember when we've first observed the G blocked
542 // needed only to output in traceback
543 status = runtime·readgstatus(gp);
544 if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
545 gp->waitsince = runtime·work.tstart;
546 // Shrink a stack if not much of it is being used.
547 runtime·shrinkstack(gp);
548 if(runtime·readgstatus(gp) == Gdead)
549 gp->gcworkdone = true;
551 gp->gcworkdone = false;
552 restart = runtime·stopg(gp);
555 runtime·restartg(gp);
560 // Get an empty work buffer off the work.empty list,
561 // allocating new buffers as needed.
568 runtime·lfstackpush(&runtime·work.full, &b->node);
571 if(c->gcworkbuf != nil) {
576 b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
578 b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
589 if(c->gcworkbuf == nil) {
593 runtime·lfstackpush(&runtime·work.empty, &b->node);
597 runtime·gcworkbuffree(void *b)
603 // Get a full work buffer off the work.full list, or return nil.
610 runtime·lfstackpush(&runtime·work.empty, &b->node);
611 b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
612 if(b != nil || runtime·work.nproc == 1)
615 runtime·xadd(&runtime·work.nwait, +1);
617 if(runtime·work.full != 0) {
618 runtime·xadd(&runtime·work.nwait, -1);
619 b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
622 runtime·xadd(&runtime·work.nwait, +1);
624 if(runtime·work.nwait == runtime·work.nproc)
627 g->m->gcstats.nprocyield++;
628 runtime·procyield(20);
630 g->m->gcstats.nosyield++;
633 g->m->gcstats.nsleep++;
645 // Make new buffer with half of b's pointers.
650 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
651 g->m->gcstats.nhandoff++;
652 g->m->gcstats.nhandoffcnt += n;
654 // Put b on full list - let first half of b get stolen.
655 runtime·lfstackpush(&runtime·work.full, &b->node);
660 runtime·stackmapdata(StackMap *stackmap, int32 n)
662 if(n < 0 || n >= stackmap->n)
663 runtime·throw("stackmapdata: index out of range");
664 return (BitVector){stackmap->nbit, stackmap->bytedata + n*((stackmap->nbit+31)/32*4)};
667 // Scan a stack frame: local variables and function arguments/results.
669 scanframe(Stkframe *frame, void *unused)
674 uintptr size, minsize;
680 targetpc = frame->continpc;
686 runtime·printf("scanframe %s\n", runtime·funcname(f));
687 if(targetpc != f->entry)
689 pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
691 // We do not have a valid pcdata value but there might be a
692 // stackmap for this function. It is likely that we are looking
693 // at the function prologue, assume so and hope for the best.
697 // Scan local variables if stack frame has been allocated.
698 size = frame->varp - frame->sp;
699 if(thechar != '6' && thechar != '8')
700 minsize = sizeof(uintptr);
704 stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
705 if(stackmap == nil || stackmap->n <= 0) {
706 runtime·printf("runtime: frame %s untyped locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
707 runtime·throw("missing stackmap");
710 // Locals bitmap information, scan just the pointers in locals.
711 if(pcdata < 0 || pcdata >= stackmap->n) {
712 // don't know where we are
713 runtime·printf("runtime: pcdata is %d and %d locals stack map entries for %s (targetpc=%p)\n",
714 pcdata, stackmap->n, runtime·funcname(f), targetpc);
715 runtime·throw("scanframe: bad symbol table");
717 bv = runtime·stackmapdata(stackmap, pcdata);
718 size = (bv.n * PtrSize) / BitsPerPointer;
719 scanblock((byte*)(frame->varp - size), bv.n/BitsPerPointer*PtrSize, bv.bytedata);
723 if(frame->arglen > 0) {
724 if(frame->argmap != nil)
727 stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
728 if(stackmap == nil || stackmap->n <= 0) {
729 runtime·printf("runtime: frame %s untyped args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
730 runtime·throw("missing stackmap");
732 if(pcdata < 0 || pcdata >= stackmap->n) {
733 // don't know where we are
734 runtime·printf("runtime: pcdata is %d and %d args stack map entries for %s (targetpc=%p)\n",
735 pcdata, stackmap->n, runtime·funcname(f), targetpc);
736 runtime·throw("scanframe: bad symbol table");
738 bv = runtime·stackmapdata(stackmap, pcdata);
740 scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
749 bool (*fn)(Stkframe*, void*);
751 if(runtime·readgstatus(gp)&Gscan == 0) {
752 runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
753 runtime·throw("mark - bad status");
756 switch(runtime·readgstatus(gp)&~Gscan) {
758 runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
759 runtime·throw("mark - bad status");
763 runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
764 runtime·throw("mark - world not stopped");
772 runtime·throw("can't scan our own stack");
773 if((mp = gp->m) != nil && mp->helpgc)
774 runtime·throw("can't scan gchelper stack");
777 runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &fn, nil, false);
778 runtime·tracebackdefers(gp, &fn, nil);
781 // The gp has been moved to a gc safepoint. If there is gcphase specific
782 // work it is done here.
784 runtime·gcphasework(G *gp)
786 switch(runtime·gcphase) {
788 runtime·throw("gcphasework in bad gcphase");
796 // Disabled until concurrent GC is implemented
797 // but indicate the scan has been done.
801 gp->gcworkdone = true;
804 #pragma dataflag NOPTR
805 static byte finalizer1[] = {
806 // Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
807 // Each byte describes 4 words.
808 // Need 4 Finalizers described by 5 bytes before pattern repeats:
809 // ptr ptr uintptr ptr ptr
810 // ptr ptr uintptr ptr ptr
811 // ptr ptr uintptr ptr ptr
812 // ptr ptr uintptr ptr ptr
814 // ptr ptr uintptr ptr
815 // ptr ptr ptr uintptr
817 // uintptr ptr ptr ptr
818 // ptr uintptr ptr ptr
819 // Assumptions about Finalizer layout checked below.
820 BitsPointer | BitsPointer<<2 | BitsScalar<<4 | BitsPointer<<6,
821 BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsScalar<<6,
822 BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
823 BitsScalar | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
824 BitsPointer | BitsScalar<<2 | BitsPointer<<4 | BitsPointer<<6,
828 runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
834 runtime·lock(&runtime·finlock);
835 if(runtime·finq == nil || runtime·finq->cnt == runtime·finq->cap) {
836 if(runtime·finc == nil) {
837 runtime·finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
838 runtime·finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
839 runtime·finc->alllink = runtime·allfin;
840 runtime·allfin = runtime·finc;
841 if(finptrmask[0] == 0) {
842 // Build pointer mask for Finalizer array in block.
843 // Check assumptions made in finalizer1 array above.
844 if(sizeof(Finalizer) != 5*PtrSize ||
845 offsetof(Finalizer, fn) != 0 ||
846 offsetof(Finalizer, arg) != PtrSize ||
847 offsetof(Finalizer, nret) != 2*PtrSize ||
848 offsetof(Finalizer, fint) != 3*PtrSize ||
849 offsetof(Finalizer, ot) != 4*PtrSize ||
850 BitsPerPointer != 2) {
851 runtime·throw("finalizer out of sync");
853 for(i=0; i<nelem(finptrmask); i++)
854 finptrmask[i] = finalizer1[i%nelem(finalizer1)];
857 block = runtime·finc;
858 runtime·finc = block->next;
859 block->next = runtime·finq;
860 runtime·finq = block;
862 f = &runtime·finq->fin[runtime·finq->cnt];
869 runtime·fingwake = true;
870 runtime·unlock(&runtime·finlock);
874 runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
880 for(fb = runtime·allfin; fb; fb = fb->alllink) {
881 for(i = 0; i < fb->cnt; i++) {
883 callback(f->fn, f->arg, f->nret, f->fint, f->ot);
889 runtime·MSpan_EnsureSwept(MSpan *s)
893 // Caller must disable preemption.
894 // Otherwise when this function returns the span can become unswept again
895 // (if GC is triggered on another goroutine).
896 if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
897 runtime·throw("MSpan_EnsureSwept: m is not locked");
899 sg = runtime·mheap.sweepgen;
900 if(runtime·atomicload(&s->sweepgen) == sg)
902 if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
903 runtime·MSpan_Sweep(s, false);
906 // unfortunate condition, and we don't have efficient means to wait
907 while(runtime·atomicload(&s->sweepgen) != sg)
911 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
912 // It clears the mark bits in preparation for the next GC round.
913 // Returns true if the span was returned to heap.
914 // If preserve=true, don't return it to heap nor relink in MCentral lists;
915 // caller takes care of it.
917 runtime·MSpan_Sweep(MSpan *s, bool preserve)
919 int32 cl, n, npages, nfree;
920 uintptr size, off, step;
922 byte *p, *bitp, shift, xbits, bits;
925 MLink head, *end, *link;
926 Special *special, **specialp, *y;
927 bool res, sweepgenset;
929 // It's critical that we enter this function with preemption disabled,
930 // GC must not start while we are in the middle of this function.
931 if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
932 runtime·throw("MSpan_Sweep: m is not locked");
933 sweepgen = runtime·mheap.sweepgen;
934 if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
935 runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
936 s->state, s->sweepgen, sweepgen);
937 runtime·throw("MSpan_Sweep: bad span state");
939 arena_start = runtime·mheap.arena_start;
945 // Chunk full of small blocks.
946 npages = runtime·class_to_allocnpages[cl];
947 n = (npages << PageShift) / size;
955 // Mark any free objects in this span so we don't collect them.
956 for(link = s->freelist; link != nil; link = link->next) {
957 off = (uintptr*)link - (uintptr*)arena_start;
958 bitp = arena_start - off/wordsPerBitmapByte - 1;
959 shift = (off % wordsPerBitmapByte) * gcBits;
960 *bitp |= bitMarked<<shift;
963 // Unlink & free special records for any objects we're about to free.
964 specialp = &s->specials;
966 while(special != nil) {
967 // A finalizer can be set for an inner byte of an object, find object beginning.
968 p = (byte*)(s->start << PageShift) + special->offset/size*size;
969 off = (uintptr*)p - (uintptr*)arena_start;
970 bitp = arena_start - off/wordsPerBitmapByte - 1;
971 shift = (off % wordsPerBitmapByte) * gcBits;
972 bits = (*bitp>>shift) & bitMask;
973 if((bits&bitMarked) == 0) {
974 // Find the exact byte for which the special was setup
975 // (as opposed to object beginning).
976 p = (byte*)(s->start << PageShift) + special->offset;
977 // about to free object: splice out special record
979 special = special->next;
981 if(!runtime·freespecial(y, p, size, false)) {
982 // stop freeing of object if it has a finalizer
983 *bitp |= bitMarked << shift;
986 // object is still live: keep special record
987 specialp = &special->next;
992 // Sweep through n objects of given size starting at p.
993 // This thread owns the span now, so it can manipulate
994 // the block bitmap without atomic operations.
995 p = (byte*)(s->start << PageShift);
996 // Find bits for the beginning of the span.
997 off = (uintptr*)p - (uintptr*)arena_start;
998 bitp = arena_start - off/wordsPerBitmapByte - 1;
1000 step = size/(PtrSize*wordsPerBitmapByte);
1001 // Rewind to the previous quadruple as we move to the next
1002 // in the beginning of the loop.
1009 for(; n > 0; n--, p += size) {
1014 shift = gcBits - shift;
1018 bits = (xbits>>shift) & bitMask;
1020 // Allocated and marked object, reset bits to allocated.
1021 if((bits&bitMarked) != 0) {
1022 *bitp &= ~(bitMarked<<shift);
1025 // At this point we know that we are looking at garbage object
1026 // that needs to be collected.
1027 if(runtime·debug.allocfreetrace)
1028 runtime·tracefree(p, size);
1029 // Reset to allocated+noscan.
1030 *bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
1034 runtime·throw("can't preserve large span");
1035 runtime·unmarkspan(p, s->npages<<PageShift);
1037 // important to set sweepgen before returning it to heap
1038 runtime·atomicstore(&s->sweepgen, sweepgen);
1040 // NOTE(rsc,dvyukov): The original implementation of efence
1041 // in CL 22060046 used SysFree instead of SysFault, so that
1042 // the operating system would eventually give the memory
1043 // back to us again, so that an efence program could run
1044 // longer without running out of memory. Unfortunately,
1045 // calling SysFree here without any kind of adjustment of the
1046 // heap data structures means that when the memory does
1047 // come back to us, we have the wrong metadata for it, either in
1048 // the MSpan structures or in the garbage collection bitmap.
1049 // Using SysFault here means that the program will run out of
1050 // memory fairly quickly in efence mode, but at least it won't
1051 // have mysterious crashes due to confused memory reuse.
1052 // It should be possible to switch back to SysFree if we also
1053 // implement and then call some kind of MHeap_DeleteSpan.
1054 if(runtime·debug.efence) {
1055 s->limit = nil; // prevent mlookup from finding this span
1056 runtime·SysFault(p, size);
1058 runtime·MHeap_Free(&runtime·mheap, s, 1);
1059 c->local_nlargefree++;
1060 c->local_largefree += size;
1061 runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
1064 // Free small object.
1065 if(size > 2*sizeof(uintptr))
1066 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
1067 else if(size > sizeof(uintptr))
1068 ((uintptr*)p)[1] = 0;
1070 end->next = (MLink*)p;
1076 // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
1077 // because of the potential for a concurrent free/SetFinalizer.
1078 // But we need to set it before we make the span available for allocation
1079 // (return it to heap or mcentral), because allocation code assumes that a
1080 // span is already swept if available for allocation.
1082 if(!sweepgenset && nfree == 0) {
1083 // The span must be in our exclusive ownership until we update sweepgen,
1084 // check for potential races.
1085 if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
1086 runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
1087 s->state, s->sweepgen, sweepgen);
1088 runtime·throw("MSpan_Sweep: bad span state after sweep");
1090 runtime·atomicstore(&s->sweepgen, sweepgen);
1093 c->local_nsmallfree[cl] += nfree;
1094 c->local_cachealloc -= nfree * size;
1095 runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
1096 res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
1097 // MCentral_FreeSpan updates sweepgen
1102 // State of background runtime·sweep.
1103 // Protected by runtime·gclock.
1104 typedef struct SweepData SweepData;
1110 uint32 spanidx; // background sweeper position
1115 SweepData runtime·sweep;
1118 // returns number of pages returned to heap, or -1 if there is nothing to sweep
1120 runtime·sweepone(void)
1126 // increment locks to ensure that the goroutine is not preempted
1127 // in the middle of sweep thus leaving the span in an inconsistent state for next GC
1129 sg = runtime·mheap.sweepgen;
1131 idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
1132 if(idx >= runtime·work.nspan) {
1133 runtime·mheap.sweepdone = true;
1137 s = runtime·work.spans[idx];
1138 if(s->state != MSpanInUse) {
1142 if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
1145 if(!runtime·MSpan_Sweep(s, false))
1155 g->m->scalararg[0] = runtime·sweepone();
1158 #pragma textflag NOSPLIT
1160 runtime·gosweepone(void)
1166 return g->m->scalararg[0];
1169 #pragma textflag NOSPLIT
1171 runtime·gosweepdone(void)
1173 return runtime·mheap.sweepdone;
1177 runtime·gchelper(void)
1181 g->m->traceback = 2;
1184 // parallel mark for over gc roots
1185 runtime·parfordo(runtime·work.markfor);
1187 // help other threads scan secondary blocks
1188 scanblock(nil, 0, nil);
1190 nproc = runtime·work.nproc; // runtime·work.nproc can change right after we increment runtime·work.ndone
1191 if(runtime·xadd(&runtime·work.ndone, +1) == nproc-1)
1192 runtime·notewakeup(&runtime·work.alldone);
1193 g->m->traceback = 0;
1202 for(pp=runtime·allp; p=*pp; pp++) {
1206 runtime·purgecachedstats(c);
1211 flushallmcaches(void)
1216 // Flush MCache's to MCentral.
1217 for(pp=runtime·allp; p=*pp; pp++) {
1221 runtime·MCache_ReleaseAll(c);
1222 runtime·stackcache_clear(c);
1227 flushallmcaches_m(G *gp)
1230 runtime·gogo(&gp->sched);
1234 runtime·updatememstats(GCStats *stats)
1244 runtime·memclr((byte*)stats, sizeof(*stats));
1245 for(mp=runtime·allm; mp; mp=mp->alllink) {
1247 src = (uint64*)&mp->gcstats;
1248 dst = (uint64*)stats;
1249 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
1251 runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
1254 mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
1255 mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
1256 mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
1257 mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
1259 // Calculate memory allocator stats.
1260 // During program execution we only count number of frees and amount of freed memory.
1261 // Current number of alive object in the heap and amount of alive heap memory
1262 // are calculated by scanning all spans.
1263 // Total number of mallocs is calculated as number of frees plus number of alive objects.
1264 // Similarly, total amount of allocated memory is calculated as amount of freed memory
1265 // plus amount of alive heap memory.
1267 mstats.total_alloc = 0;
1270 for(i = 0; i < nelem(mstats.by_size); i++) {
1271 mstats.by_size[i].nmalloc = 0;
1272 mstats.by_size[i].nfree = 0;
1275 // Flush MCache's to MCentral.
1279 fn = flushallmcaches_m;
1283 // Aggregate local stats.
1286 // Scan all spans and count number of alive objects.
1287 runtime·lock(&runtime·mheap.lock);
1288 for(i = 0; i < runtime·mheap.nspan; i++) {
1289 s = runtime·mheap.allspans[i];
1290 if(s->state != MSpanInUse)
1292 if(s->sizeclass == 0) {
1294 mstats.alloc += s->elemsize;
1296 mstats.nmalloc += s->ref;
1297 mstats.by_size[s->sizeclass].nmalloc += s->ref;
1298 mstats.alloc += s->ref*s->elemsize;
1301 runtime·unlock(&runtime·mheap.lock);
1303 // Aggregate by size class.
1305 mstats.nfree = runtime·mheap.nlargefree;
1306 for(i = 0; i < nelem(mstats.by_size); i++) {
1307 mstats.nfree += runtime·mheap.nsmallfree[i];
1308 mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
1309 mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
1310 smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
1312 mstats.nfree += mstats.tinyallocs;
1313 mstats.nmalloc += mstats.nfree;
1315 // Calculate derived stats.
1316 mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
1317 mstats.heap_alloc = mstats.alloc;
1318 mstats.heap_objects = mstats.nmalloc - mstats.nfree;
1321 // Structure of arguments passed to function gc().
1322 // This allows the arguments to be passed via runtime·mcall.
1325 int64 start_time; // start time of GC in ns (just before stoptheworld)
1329 static void gc(struct gc_args *args);
1332 runtime·readgogc(void)
1336 p = runtime·getenv("GOGC");
1337 if(p == nil || p[0] == '\0')
1339 if(runtime·strcmp(p, (byte*)"off") == 0)
1341 return runtime·atoi(p);
1345 runtime·gcinit(void)
1347 if(sizeof(Workbuf) != WorkbufSize)
1348 runtime·throw("runtime: size of Workbuf is suboptimal");
1350 runtime·work.markfor = runtime·parforalloc(MaxGcproc);
1351 runtime·gcpercent = runtime·readgogc();
1352 runtime·gcdatamask = unrollglobgcprog(runtime·gcdata, runtime·edata - runtime·data);
1353 runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
1363 runtime·casgstatus(gp, Grunning, Gwaiting);
1364 gp->waitreason = runtime·gostringnocopy((byte*)"garbage collection");
1366 a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
1367 a.eagersweep = g->m->scalararg[2];
1371 // Work out path from root to bad block.
1374 if(nbadblock >= nelem(badblock))
1375 runtime·throw("cannot find path to bad pointer");
1379 runtime·casgstatus(gp, Gwaiting, Grunning);
1383 gc(struct gc_args *args)
1385 int64 t0, t1, t2, t3, t4;
1386 uint64 heap0, heap1, obj;
1390 runtime·printf("GC start\n");
1392 if(runtime·debug.allocfreetrace)
1395 g->m->traceback = 2;
1396 t0 = args->start_time;
1397 runtime·work.tstart = args->start_time;
1400 if(runtime·debug.gctrace)
1401 t1 = runtime·nanotime();
1403 // Sweep what is not sweeped by bgsweep.
1404 while(runtime·sweepone() != -1)
1405 runtime·sweep.npausesweep++;
1407 // Cache runtime.mheap.allspans in work.spans to avoid conflicts with
1408 // resizing/freeing allspans.
1409 // New spans can be created while GC progresses, but they are not garbage for
1411 // - new stack spans can be created even while the world is stopped.
1412 // - new malloc spans can be created during the concurrent sweep
1414 // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
1415 runtime·lock(&runtime·mheap.lock);
1416 // Free the old cached sweep array if necessary.
1417 if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
1418 runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
1419 // Cache the current array for marking.
1420 runtime·mheap.gcspans = runtime·mheap.allspans;
1421 runtime·work.spans = runtime·mheap.allspans;
1422 runtime·work.nspan = runtime·mheap.nspan;
1423 runtime·unlock(&runtime·mheap.lock);
1425 runtime·work.nwait = 0;
1426 runtime·work.ndone = 0;
1427 runtime·work.nproc = runtime·gcprocs();
1428 runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
1429 if(runtime·work.nproc > 1) {
1430 runtime·noteclear(&runtime·work.alldone);
1431 runtime·helpgc(runtime·work.nproc);
1435 if(runtime·debug.gctrace)
1436 t2 = runtime·nanotime();
1439 runtime·parfordo(runtime·work.markfor);
1440 scanblock(nil, 0, nil);
1443 if(runtime·debug.gctrace)
1444 t3 = runtime·nanotime();
1446 if(runtime·work.nproc > 1)
1447 runtime·notesleep(&runtime·work.alldone);
1449 runtime·shrinkfinish();
1452 // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
1453 // estimate what was live heap size after previous GC (for tracing only)
1454 heap0 = mstats.next_gc*100/(runtime·gcpercent+100);
1455 // conservatively set next_gc to high value assuming that everything is live
1456 // concurrent/lazy sweep will reduce this number while discovering new garbage
1457 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100;
1459 t4 = runtime·nanotime();
1460 runtime·atomicstore64(&mstats.last_gc, runtime·unixnanotime()); // must be Unix time to make sense to user
1461 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
1462 mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = t4;
1463 mstats.pause_total_ns += t4 - t0;
1466 runtime·printf("pause %D\n", t4-t0);
1468 if(runtime·debug.gctrace) {
1469 heap1 = mstats.heap_alloc;
1470 runtime·updatememstats(&stats);
1471 if(heap1 != mstats.heap_alloc) {
1472 runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
1473 runtime·throw("mstats skew");
1475 obj = mstats.nmalloc - mstats.nfree;
1477 stats.nprocyield += runtime·work.markfor->nprocyield;
1478 stats.nosyield += runtime·work.markfor->nosyield;
1479 stats.nsleep += runtime·work.markfor->nsleep;
1481 runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
1484 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
1485 mstats.numgc, runtime·work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
1486 heap0>>20, heap1>>20, obj,
1487 mstats.nmalloc, mstats.nfree,
1489 runtime·work.nspan, runtime·sweep.nbgsweep, runtime·sweep.npausesweep,
1490 stats.nhandoff, stats.nhandoffcnt,
1491 runtime·work.markfor->nsteal, runtime·work.markfor->nstealcnt,
1492 stats.nprocyield, stats.nosyield, stats.nsleep);
1493 runtime·sweep.nbgsweep = runtime·sweep.npausesweep = 0;
1496 // See the comment in the beginning of this function as to why we need the following.
1497 // Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
1498 runtime·lock(&runtime·mheap.lock);
1499 // Free the old cached mark array if necessary.
1500 if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
1501 runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
1502 // Cache the current array for sweeping.
1503 runtime·mheap.gcspans = runtime·mheap.allspans;
1504 runtime·mheap.sweepgen += 2;
1505 runtime·mheap.sweepdone = false;
1506 runtime·work.spans = runtime·mheap.allspans;
1507 runtime·work.nspan = runtime·mheap.nspan;
1508 runtime·sweep.spanidx = 0;
1509 runtime·unlock(&runtime·mheap.lock);
1511 if(ConcurrentSweep && !args->eagersweep) {
1512 runtime·lock(&runtime·gclock);
1513 if(runtime·sweep.g == nil)
1514 runtime·sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, gc);
1515 else if(runtime·sweep.parked) {
1516 runtime·sweep.parked = false;
1517 runtime·ready(runtime·sweep.g);
1519 runtime·unlock(&runtime·gclock);
1521 // Sweep all spans eagerly.
1522 while(runtime·sweepone() != -1)
1523 runtime·sweep.npausesweep++;
1524 // Do an additional mProf_GC, because all 'free' events are now real as well.
1529 g->m->traceback = 0;
1532 runtime·printf("GC end\n");
1535 extern uintptr runtime·sizeof_C_MStats;
1537 static void readmemstats_m(void);
1540 runtime·readmemstats_m(void)
1544 stats = g->m->ptrarg[0];
1545 g->m->ptrarg[0] = nil;
1547 runtime·updatememstats(nil);
1548 // Size of the trailing by_size array differs between Go and C,
1549 // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
1550 runtime·memmove(stats, &mstats, runtime·sizeof_C_MStats);
1552 // Stack numbers are part of the heap numbers, separate those out for user consumption
1553 stats->stacks_sys = stats->stacks_inuse;
1554 stats->heap_inuse -= stats->stacks_inuse;
1555 stats->heap_sys -= stats->stacks_inuse;
1558 static void readgcstats_m(void);
1560 #pragma textflag NOSPLIT
1562 runtime∕debug·readGCStats(Slice *pauses)
1566 g->m->ptrarg[0] = pauses;
1578 pauses = g->m->ptrarg[0];
1579 g->m->ptrarg[0] = nil;
1581 // Calling code in runtime/debug should make the slice large enough.
1582 if(pauses->cap < nelem(mstats.pause_ns)+3)
1583 runtime·throw("runtime: short slice passed to readGCStats");
1585 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
1586 p = (uint64*)pauses->array;
1587 runtime·lock(&runtime·mheap.lock);
1590 if(n > nelem(mstats.pause_ns))
1591 n = nelem(mstats.pause_ns);
1593 // The pause buffer is circular. The most recent pause is at
1594 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
1595 // from there to go back farther in time. We deliver the times
1596 // most recent first (in p[0]).
1597 for(i=0; i<n; i++) {
1598 j = (mstats.numgc-1-i)%nelem(mstats.pause_ns);
1599 p[i] = mstats.pause_ns[j];
1600 p[n+i] = mstats.pause_end[j];
1603 p[n+n] = mstats.last_gc;
1604 p[n+n+1] = mstats.numgc;
1605 p[n+n+2] = mstats.pause_total_ns;
1606 runtime·unlock(&runtime·mheap.lock);
1607 pauses->len = n+n+3;
1611 runtime·setgcpercent_m(void)
1616 in = (int32)(intptr)g->m->scalararg[0];
1618 runtime·lock(&runtime·mheap.lock);
1619 out = runtime·gcpercent;
1622 runtime·gcpercent = in;
1623 runtime·unlock(&runtime·mheap.lock);
1625 g->m->scalararg[0] = (uintptr)(intptr)out;
1631 if(g->m->helpgc < 0 || g->m->helpgc >= MaxGcproc)
1632 runtime·throw("gchelperstart: bad m->helpgc");
1634 runtime·throw("gchelper not running on g0 stack");
1638 runtime·wakefing(void)
1643 runtime·lock(&runtime·finlock);
1644 if(runtime·fingwait && runtime·fingwake) {
1645 runtime·fingwait = false;
1646 runtime·fingwake = false;
1649 runtime·unlock(&runtime·finlock);
1653 // Recursively unrolls GC program in prog.
1654 // mask is where to store the result.
1655 // ppos is a pointer to position in mask, in bits.
1656 // sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
1658 unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
1660 uintptr pos, siz, i, off;
1661 byte *arena_start, *prog1, v, *bitp, shift;
1663 arena_start = runtime·mheap.arena_start;
1671 for(i = 0; i < siz; i++) {
1672 v = prog[i/PointersPerByte];
1673 v >>= (i%PointersPerByte)*BitsPerPointer;
1676 // Store directly into GC bitmap.
1677 off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
1678 bitp = arena_start - off/wordsPerBitmapByte - 1;
1679 shift = (off % wordsPerBitmapByte) * gcBits;
1682 *bitp |= v<<(shift+2);
1693 pos += BitsPerPointer;
1696 prog += ROUND(siz*BitsPerPointer, 8)/8;
1701 for(i = 0; i < PtrSize; i++)
1702 siz = (siz<<8) + prog[PtrSize-i-1];
1705 for(i = 0; i < siz; i++)
1706 prog1 = unrollgcprog1(mask, prog, &pos, inplace, sparse);
1707 if(prog1[0] != insArrayEnd)
1708 runtime·throw("unrollgcprog: array does not end with insArrayEnd");
1716 runtime·throw("unrollgcprog: unknown instruction");
1721 // Unrolls GC program prog for data/bss, returns dense GC mask.
1723 unrollglobgcprog(byte *prog, uintptr size)
1726 uintptr pos, masksize;
1728 masksize = ROUND(ROUND(size, PtrSize)/PtrSize*BitsPerPointer, 8)/8;
1729 mask = runtime·persistentalloc(masksize+1, 0, &mstats.gc_sys);
1730 mask[masksize] = 0xa1;
1732 prog = unrollgcprog1(mask, prog, &pos, false, false);
1733 if(pos != size/PtrSize*BitsPerPointer) {
1734 runtime·printf("unrollglobgcprog: bad program size, got %D, expect %D\n",
1735 (uint64)pos, (uint64)size/PtrSize*BitsPerPointer);
1736 runtime·throw("unrollglobgcprog: bad program size");
1738 if(prog[0] != insEnd)
1739 runtime·throw("unrollglobgcprog: program does not end with insEnd");
1740 if(mask[masksize] != 0xa1)
1741 runtime·throw("unrollglobgcprog: overflow");
1742 return (BitVector){masksize*8, mask};
1746 runtime·unrollgcproginplace_m(void)
1748 uintptr size, size0, pos, off;
1749 byte *arena_start, *prog, *bitp, shift;
1753 v = g->m->ptrarg[0];
1754 typ = g->m->ptrarg[1];
1755 size = g->m->scalararg[0];
1756 size0 = g->m->scalararg[1];
1757 g->m->ptrarg[0] = nil;
1758 g->m->ptrarg[1] = nil;
1761 prog = (byte*)typ->gc[1];
1763 unrollgcprog1(v, prog, &pos, true, true);
1764 // Mark first word as bitAllocated.
1765 arena_start = runtime·mheap.arena_start;
1766 off = (uintptr*)v - (uintptr*)arena_start;
1767 bitp = arena_start - off/wordsPerBitmapByte - 1;
1768 shift = (off % wordsPerBitmapByte) * gcBits;
1769 *bitp |= bitBoundary<<shift;
1770 // Mark word after last as BitsDead.
1772 off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
1773 bitp = arena_start - off/wordsPerBitmapByte - 1;
1774 shift = (off % wordsPerBitmapByte) * gcBits;
1775 *bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
1779 // Unrolls GC program in typ->gc[1] into typ->gc[0]
1781 runtime·unrollgcprog_m(void)
1789 typ = g->m->ptrarg[0];
1790 g->m->ptrarg[0] = nil;
1792 runtime·lock(&lock);
1793 mask = (byte*)typ->gc[0];
1795 pos = 8; // skip the unroll flag
1796 prog = (byte*)typ->gc[1];
1797 prog = unrollgcprog1(mask, prog, &pos, false, true);
1798 if(prog[0] != insEnd)
1799 runtime·throw("unrollgcprog: program does not end with insEnd");
1800 if(((typ->size/PtrSize)%2) != 0) {
1801 // repeat the program twice
1802 prog = (byte*)typ->gc[1];
1803 unrollgcprog1(mask, prog, &pos, false, true);
1806 // atomic way to say mask[0] = 1
1807 x = *(uintptr*)mask;
1809 runtime·atomicstorep((void**)mask, (void*)x);
1811 runtime·unlock(&lock);
1814 // mark the span of memory at v as having n blocks of the given size.
1815 // if leftover is true, there is left over space at the end of the span.
1817 runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
1819 uintptr i, off, step;
1822 if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
1823 runtime·throw("markspan: bad pointer");
1825 // Find bits of the beginning of the span.
1826 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
1827 b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
1828 if((off%wordsPerBitmapByte) != 0)
1829 runtime·throw("markspan: unaligned length");
1831 // Okay to use non-atomic ops here, because we control
1832 // the entire span, and each bitmap byte has bits for only
1833 // one span, so no other goroutines are changing these bitmap words.
1835 if(size == PtrSize) {
1836 // Possible only on 64-bits (minimal size class is 8 bytes).
1837 // Poor man's memset(0x11).
1838 if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
1839 runtime·throw("markspan: bad bits");
1840 if((n%(wordsPerBitmapByte*PtrSize)) != 0)
1841 runtime·throw("markspan: unaligned length");
1842 b = b - n/wordsPerBitmapByte + 1; // find first byte
1843 if(((uintptr)b%PtrSize) != 0)
1844 runtime·throw("markspan: unaligned pointer");
1845 for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
1846 *(uintptr*)b = (uintptr)0x1111111111111111ULL; // bitBoundary+BitsDead
1851 n++; // mark a boundary just past end of last block too
1852 step = size/(PtrSize*wordsPerBitmapByte);
1853 for(i = 0; i != n; i++, b -= step)
1854 *b = bitBoundary|(BitsDead<<2);
1857 // unmark the span of memory at v of length n bytes.
1859 runtime·unmarkspan(void *v, uintptr n)
1864 if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
1865 runtime·throw("markspan: bad pointer");
1867 off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
1868 if((off % (PtrSize*wordsPerBitmapByte)) != 0)
1869 runtime·throw("markspan: unaligned pointer");
1870 b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
1872 if(n%(PtrSize*wordsPerBitmapByte) != 0)
1873 runtime·throw("unmarkspan: unaligned length");
1874 // Okay to use non-atomic ops here, because we control
1875 // the entire span, and each bitmap word has bits for only
1876 // one span, so no other goroutines are changing these
1878 n /= wordsPerBitmapByte;
1879 runtime·memclr(b - n + 1, n);
1883 runtime·MHeap_MapBits(MHeap *h)
1885 // Caller has added extra mappings to the arena.
1886 // Add extra mappings of bitmap words as needed.
1887 // We allocate extra bitmap pieces in chunks of bitmapChunk.
1893 n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
1894 n = ROUND(n, bitmapChunk);
1895 n = ROUND(n, PhysPageSize);
1896 if(h->bitmap_mapped >= n)
1899 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
1900 h->bitmap_mapped = n;
1904 getgcmaskcb(Stkframe *frame, void *ctxt)
1909 if(frame->sp <= frame0->sp && frame0->sp < frame->varp) {
1916 // Returns GC type info for object p for testing.
1918 runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
1922 byte *base, bits, shift, *b;
1923 bool (*cb)(Stkframe*, void*);
1929 if(p >= runtime·data && p < runtime·edata) {
1930 n = ((PtrType*)t)->elem->size;
1932 *mask = runtime·mallocgc(*len, nil, FlagNoScan);
1933 for(i = 0; i < n; i += PtrSize) {
1934 off = (p+i-runtime·data)/PtrSize;
1935 bits = (runtime·gcdatamask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
1936 (*mask)[i/PtrSize] = bits;
1941 if(p >= runtime·bss && p < runtime·ebss) {
1942 n = ((PtrType*)t)->elem->size;
1944 *mask = runtime·mallocgc(*len, nil, FlagNoScan);
1945 for(i = 0; i < n; i += PtrSize) {
1946 off = (p+i-runtime·bss)/PtrSize;
1947 bits = (runtime·gcbssmask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
1948 (*mask)[i/PtrSize] = bits;
1953 if(runtime·mlookup(p, &base, &n, nil)) {
1955 *mask = runtime·mallocgc(*len, nil, FlagNoScan);
1956 for(i = 0; i < n; i += PtrSize) {
1957 off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
1958 b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
1959 shift = (off % wordsPerBitmapByte) * gcBits;
1960 bits = (*b >> (shift+2))&BitsMask;
1961 (*mask)[i/PtrSize] = bits;
1967 frame.sp = (uintptr)p;
1969 runtime·gentraceback(g->m->curg->sched.pc, g->m->curg->sched.sp, 0, g->m->curg, 0, nil, 1000, &cb, &frame, false);
1970 if(frame.fn != nil) {
1979 targetpc = frame.continpc;
1982 if(targetpc != f->entry)
1984 pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
1987 stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
1988 if(stackmap == nil || stackmap->n <= 0)
1990 bv = runtime·stackmapdata(stackmap, pcdata);
1991 size = bv.n/BitsPerPointer*PtrSize;
1992 n = ((PtrType*)t)->elem->size;
1994 *mask = runtime·mallocgc(*len, nil, FlagNoScan);
1995 for(i = 0; i < n; i += PtrSize) {
1996 off = (p+i-(byte*)frame.varp+size)/PtrSize;
1997 bits = (bv.bytedata[off*BitsPerPointer/8] >> ((off*BitsPerPointer)%8))&BitsMask;
1998 (*mask)[i/PtrSize] = bits;
2003 void runtime·gc_unixnanotime(int64 *now);
2006 runtime·unixnanotime(void)
2010 runtime·gc_unixnanotime(&now);