Created
November 21, 2017 08:43
-
-
Save 303248153/2c1d8b4a692abe0d16fc8832dd788515 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# allocator | |
### [ spans, bitmap, arena ] | |
schedinit | |
mallocinit | |
calculate arena, spans, bitmap size | |
arena = 512GB | |
spans = 512MB (arena / page * ptr, 512G / 8K * 8, map page => *span) | |
bitmap = 16GB (arena / (ptr * 8 / 2), 512G / 32, map 1 byte => 4 pointer | 2 bit => pointer) | |
reverse size | |
pSize = bitmapSize + spansSize + arenaSize + _PageSize | |
pSize = 0x400000000 + 0x20000000 + 0x8000000000 + 0x2000 | |
pSize = 0x0000008420002000 = 528.5GB | |
reversed address | |
p = 0x000000c000000000 | |
spansStart = 0xc000000000 | |
mheap_.bitmap = 0xc420000000 (same as areana start) | |
mheap_.arena_start = 0xc420000000 | |
mheap_.arena_end: = 0x14420002000 | |
use fixalloc to allocate _g_.m.mcache | |
### fixalloc | |
how fixalloc works: | |
never free | |
system => persistentalloc | |
256K each times | |
persistentalloc => fixalloc | |
16K each times | |
fixalloc | |
f.size each times | |
### struct | |
mheap (global geap) | |
- allspans []*mspan (a slice of all mspans ever created) | |
- spans []*mspan (array used to index page(8K) => span pointer) | |
- bitmap (gc metadata) | |
- [arena_start, arena_used) metadata is mapped | |
- [arena_alloc, arena_end) allocate area | |
- mcentral[numSpanClasses(67*2=134)] | |
- spanalloc (fixed allocator for mspan) | |
- cachealloc (fixed allocator for mcache) | |
- treapalloc (fixed allocator for treapNodes) | |
- specialfinalizeralloc (fixed allocator for specialfinalizer) | |
- specialprofilealloc (fixed allocator for specialprofile) | |
- speciallock mutex | |
- sweepgen | |
- sweepSpans | |
- contains two mspan stacks: | |
- one of swept in-use spans, and one of unswept in-use spans | |
- swept spans are in sweepSpans[sweepgen/2%2] | |
- unswept spans are in sweepSpans[1-sweepgen/2%2] | |
- sweeping pops spans from the unswept stack and pushes spans | |
that are still in-use on the swept stack | |
- likewise, allocating an in-use span pushes it on the swept stack | |
- alloc => alloc_m => h.sweepSpans[h.sweepgen/2%2].push(s) | |
mcentral | |
- lock mutex | |
- spanclass spanClass (0, 1, 2, ..., 133) | |
- nonempty mSpanList (double linked list, contains free elements) | |
- empty mSpanList (double linked list, no free elements or cached in mcache) | |
mcache (per thread cache) | |
- tiny (address of last allocated tiny object) | |
- tinyoffset (size of last allocated tiny object) | |
- alloc [numSpanClasses(67*2=134)]*mspan | |
mspan | |
- next *mspan | |
- prev *mspan | |
- startAddr | |
- limit | |
- npages (size / 8K) | |
- manualFreeList gclinkptr (list of free objects in _MSpanManual spans) | |
- freeindex (base index of allocCache) | |
- nelems (max elements this span can hold) | |
- allocCache (64 bits for free elements followed by freeindex) | |
- allocBits *gcBits (bitmap of allocated elements, replaced by gcmarkBits after sweep) | |
- gcmarkBits *gcBits (bitmap of marked elements, clear after sweep) | |
- sweepgen | |
- if span.sweepgen == heap.sweepgen - 2, the span needs sweeping | |
- if span.sweepgen == heap.sweepgen - 1, the span is sweeping | |
- if span.sweepgen == heap.sweepgen, the span is swept and ready to use | |
- heap.sweepgen is incremented by 2 after every gc | |
### span size classes | |
see sizeclasses.go | |
dataSize: 8 size: 8 sizeclass: 1 noscan: false spc: 2 | |
dataSize: 16 size: 16 sizeclass: 2 noscan: false spc: 4 | |
dataSize: 17 size: 32 sizeclass: 3 noscan: true spc: 7 | |
dataSize: 40 size: 48 sizeclass: 4 noscan: false spc: 8 | |
alloc: [ | |
0: class 0 scan, | |
1: class 0 noscan, | |
2: class 1 scan, | |
3: class 1 noscan, | |
4: class 2 scan, | |
5: class 2 noscan, (tinySpanClass) | |
6: class 3 scan, | |
7: class 3 noscan | |
] | |
### span allocBits and gcmarkBits | |
- when a span created, allocBits and gcmarkBits are allocated from gcBitsArenas.next | |
- when sweep termination finished(finishsweep_m), it will start a new nextMarkBitArenaEpoch | |
- append gcBitsArenas.previous to gcBitsArenas.free (free old gcmarkBits) | |
- gcBitsArenas.previous = gcBitsArenas.current (allocBits = gcmarkBits when this gc finished) | |
- gcBitsArenas.current = gcBitsArenas.next (bits doing this gc) | |
- gcBitsArenas.next = nil (see tryAlloc, if b == nil then newArenaMayUnlock will called) | |
### gc bitmap | |
// Garbage collector: type and heap bitmaps. | |
// | |
// Stack, data, and bss bitmaps | |
// | |
// Stack frames and global variables in the data and bss sections are described | |
// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer | |
// to be visited during GC. The bits in each byte are consumed starting with | |
// the low bit: 1<<0, 1<<1, and so on. | |
// | |
// Heap bitmap | |
// | |
// The allocated heap comes from a subset of the memory in the range [start, used), | |
// where start == mheap_.arena_start and used == mheap_.arena_used. | |
// The heap bitmap comprises 2 bits for each pointer-sized word in that range, | |
// stored in bytes indexed backward in memory from start. | |
// That is, the byte at address start-1 holds the 2-bit entries for the four words | |
// start through start+3*ptrSize, the byte at start-2 holds the entries for | |
// start+4*ptrSize through start+7*ptrSize, and so on. | |
// | |
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit | |
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. | |
// The meaning of the high bit depends on the position of the word being described | |
// in its allocated object. In all words *except* the second word, the | |
// high bit indicates that the object is still being described. In | |
// these words, if a bit pair with a high bit 0 is encountered, the | |
// low bit can also be assumed to be 0, and the object description is | |
// over. This 00 is called the ``dead'' encoding: it signals that the | |
// rest of the words in the object are uninteresting to the garbage | |
// collector. | |
// | |
// In the second word, the high bit is the GC ``checkmarked'' bit (see below). | |
// | |
// The 2-bit entries are split when written into the byte, so that the top half | |
// of the byte contains 4 high bits and the bottom half contains 4 low (pointer) | |
// bits. | |
// This form allows a copy from the 1-bit to the 4-bit form to keep the | |
// pointer bits contiguous, instead of having to space them out. | |
// | |
// The code makes use of the fact that the zero value for a heap bitmap | |
// has no live pointer bit set and is (depending on position), not used, | |
// not checkmarked, and is the dead encoding. | |
// These properties must be preserved when modifying the encoding. | |
// | |
// The bitmap for noscan spans is not maintained. Code must ensure | |
// that an object is scannable before consulting its bitmap by | |
// checking either the noscan bit in the span or by consulting its | |
// type's information. | |
// | |
// Checkmarks | |
// | |
// In a concurrent garbage collector, one worries about failing to mark | |
// a live object due to mutations without write barriers or bugs in the | |
// collector implementation. As a sanity check, the GC has a 'checkmark' | |
// mode that retraverses the object graph with the world stopped, to make | |
// sure that everything that should be marked is marked. | |
// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry | |
// for the second word of the object holds the checkmark bit. | |
// When not in checkmark mode, this bit is set to 1. | |
// | |
// The smallest possible allocation is 8 bytes. On a 32-bit machine, that | |
// means every allocated object has two words, so there is room for the | |
// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is | |
// just one word, so the second bit pair is not available for encoding the | |
// checkmark. However, because non-pointer allocations are combined | |
// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation | |
// must be a pointer, so the type bit in the first word is not actually needed. | |
// It is still used in general, except in checkmark the type bit is repurposed | |
// as the checkmark bit and then reinitialized (to 1) as the type bit when | |
// finished. | |
example: | |
type MyStruct struct { | |
X int | |
P *int | |
P1 *int | |
P2 *int | |
X1 int | |
P3 *int | |
X2 int | |
X3 int | |
} | |
bitmap in type info | |
(lldb) me read typ.gcdata | |
0x004dc859: 2e 35 36 38 3c 42 55 58 72 8e d8 f5 ff 00 78 03 .568<BUXr.....x. | |
0x004dc869: 04 1f 0f 21 0f 38 19 40 22 49 12 50 01 55 01 55 ...!.8.@"I.P.U.U | |
0x2e => 0b101110 => 0 1 1 1 0 1 0 0 | |
when written to heap bitmap | |
(lldb) p x | |
(void *) x = 0x000000c42009a000 | |
>>> offset = (0x000000c42009a000 - 0xc420000000) / 8 | |
>>> hex(0xc420000000 - offset / 4 - 1) | |
'0xc41fffb2ff' | |
>>> offset & 3 | |
0 | |
// before | |
(lldb) me read 0xc41fffb2ff | |
0xc41fffb2ff: 00 00 00 33 ff fe fa fa fa fa ff ff fa fa fa fa ...3............ | |
0xc41fffb30f: fb ff fe fa fa fa fa ff ff fa fa fa fa fb ff fe ................ | |
// after | |
(lldb) me read 0xc41fffb2ff | |
0xc41fffb2ff: de 00 00 33 ff fe fa fa fa fa ff ff fa fa fa fa ...3............ | |
0xc41fffb30f: fb ff fe fa fa fa fa ff ff fa fa fa fa fb ff fe ................ | |
(lldb) me read 0xc41fffb2ff-2 | |
0xc41fffb2fd: 00 32 de 00 00 33 ff fe fa fa fa fa ff ff fa fa .2...3.......... | |
0xc41fffb30d: fa fa fb ff fe fa fa fa fa ff ff fa fa fa fa fb ................ | |
de => 0b1101 1110 => [scan, scan, checkmark, scan, ptr, ptr, ptr, noptr] | |
32 => 0b0011 0010 => [noscan, noscan, scan, scan, noptr, noptr, ptr, noptr] | |
### newobject | |
newobject: | |
mallocgc(size, type, needzero) | |
if gcBlackenEnabled != 0 | |
assistG = getg() | |
if assistG.m.curg != nil | |
assistG = assistG.m.curg | |
assistG.gcAssistBytes -= int64(size) | |
if assistG.gcAssistBytes < 0 | |
gcAssistAlloc(assistG) | |
mp = acquirem() // getg.m, lock++ | |
c = gomcache() // getg.m.mcache | |
dataSize = size // original size | |
if size <= maxSmallSize (32K) | |
if no pointers and size < maxTinySize(16 byte) | |
// allocate from tiny | |
off = c.tinyoffset + align | |
if off+size <= maxTinySize and c.tiny != null | |
// fits into existing tiny block | |
c.tinyoffset = off + size | |
releasem(mp) | |
return c.tiny + off | |
span = c.alloc[tinySpanClass(5)] | |
v = nextFreeFast(span) | |
if v == 0 | |
v = c.nextFree(tinySpanClass) | |
x = unsafe.Pointer(v) | |
if size < c.tinyoffset || c.tiny == 0 | |
c.tiny = x | |
c.tinyoffset = size | |
size = maxTinySize(16) | |
else | |
// allocate from span | |
sizeclass = size_to_class[roundUp(size, 8)] | |
size = class_to_size[sizeclass] // real size | |
spc = makeSpanClass(sizeclass, noscan) // sizeclass * 2 + (noscan ? 1 : 0) | |
span = c.alloc[spc] | |
v = nextFreeFast(span) | |
if v == 0 | |
v = c.nextFree(tinySpanClass) | |
x = unsafe.Pointer(v) | |
else | |
// allocate from heap | |
s = largeAlloc(size, needzero, noscan) | |
s.freeindex = 1 | |
s.allocCount = 1 | |
x = s.base() | |
size = s.elemsize // real size | |
if !noscan | |
heapBitsSetType(uintptr(x), size, dataSize, typ) | |
publicationBarrier() | |
if gcphase != _GCoff | |
gcmarknewobject(uintptr(x), size, scanSize) | |
mp.mallocing = 0 | |
releasem(mp) // lock-- | |
if shouldhelpgc | |
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() | |
gcStart(gcBackgroundMode, t) | |
dataSize and size: | |
when dataSize == size, it's fit | |
when dataSize < size, it choosed the bigger span | |
when dataSize > typ.size, that mean it's an array | |
nextFreeFast: | |
theBit := first not zero bit (rtol) of s.allocCache | |
if theBit < 64 | |
result = s.freeindex + theBit | |
if result < s.nelems | |
s.allocCache >>= theBit + 1 | |
s.freeindex = result + 1 | |
v = result * s.elemsize + s.base() | |
s.allocCount++ | |
return v | |
return 0 | |
nextFree: | |
// malloc dont need to set allocBits, because freeIndex always move forward | |
s = c.alloc[spc] | |
freeIndex = s.nextFreeIndex() | |
if freeIndex == s.nelems | |
// the span is full | |
c.refill(spc) | |
s = c.alloc[spc] | |
freeIdex = s.nextFreeIndex() | |
v = freeIndex * s.elemsize + s.base() | |
s.allocCount++ | |
nextFreeIndex: | |
freeindex = s.freeindex | |
bitIndex = first not zero bit (rtol) of s.allocCache | |
while bitIndex == 64 | |
freeindex = (freeindex + 64) &^ (64 - 1) | |
if freeindex >= s.nelems | |
s.freeindex = s.nelems | |
return s.nelems | |
s.refillAllocCache(freeindex / 8) | |
bitIndex = first not zero bit (rtol) of s.allocCache | |
result = freeindex + bitIndex | |
if result >= s.nelems | |
s.freeindex = s.nelems | |
return s.nelems | |
s.allocCache >>= bitIndex + 1 | |
s.freeindex = result + 1 | |
return result | |
refillAllocCache: | |
bytes = s.allocBits.bytep(whichByte) | |
aCache = 0 | |
aCache |= bytes[0] | |
aCache |= bytes[1] << (1 * 8) | |
aCache |= bytes[2] << (2 * 8) | |
aCache |= bytes[3] << (3 * 8) | |
aCache |= bytes[4] << (4 * 8) | |
aCache |= bytes[5] << (5 * 8) | |
aCache |= bytes[6] << (6 * 8) | |
aCache |= bytes[7] << (7 * 8) | |
s.allocCache = ^aCache | |
refill: | |
s = c.alloc[spc] | |
if s != &emptyspan | |
s.incache = false | |
s = mheap_.central[spc].mcentral.cacheSpan() | |
c.alloc[spc] = s | |
return s | |
cacheSpan: | |
spanBytes = class_to_allocnpages[c.spanclass.sizeclass()] * _PageSize | |
sg = heap.sweepgen | |
retry: | |
for s in c.nonempty | |
if s.sweepgen == sg - 2 and cmpxchg(s.sweepgen, sg - 2, sg - 1) | |
c.nonempty.remove(s) | |
c.empty.insertBack(s) | |
s.sweep(true) | |
goto havespan | |
if s.sweepgen == sg - 1 | |
// span is sweeping | |
continue | |
c.nonempty.remove(s) | |
c.empty.insertBack(s) | |
goto havespan | |
for s in c.empty | |
if s.sweepgen == sg - 2 and cmpxchg(s.sweepgen, sg - 2, sg - 1) | |
c.empty.remove(s) | |
c.empty.insertBack(s) // put to back | |
s.sweep(true) | |
freeindex = s.nextFreeIndex() | |
if freeindex != n.elems | |
s.freeindex = freeindex | |
goto havespan | |
// span still not contains free objects | |
goto retry | |
if s.sweepgen == sg - 1 | |
// span is sweeping | |
continue | |
break | |
nospan: | |
s = c.grow() | |
c.empty.insertBack(s) | |
havespan: | |
s.incache = true | |
s.refillAllocCache((s.freeindex &^ (64 - 1)) / 8) | |
s.allocCache >>= s.freeindex % 64 | |
memstats.heap_live += spanBytes - usedBytes | |
return s | |
grow: | |
npages := class_to_allocnpages[c.spanclass.sizeclass()] | |
size := class_to_size[c.spanclass.sizeclass()] | |
n := (npages << _PageShift) / size | |
s := mheap_.alloc(npages, c.spanclass, false, true) // => alloc_m | |
s.limit = s.base() + size*n | |
heapBitsForSpan(s.base()).initSpan(s) | |
largeAlloc: | |
npages = size / 8K | |
if size % 8K > 0 | |
npages++ | |
s = mheap_.alloc(npages, makeSpanClass(0, noscan), true, needzero) // => alloc_m | |
s.limit = s.base() + size | |
return s | |
heapBitsSetType: | |
// bitPointer = 1 << 0 | |
// bitScan = 1 << 4 | |
// 1 byte => 4 pointer => [scan 3, scan 2, scan 1, scan 0, ptr 3, ptr 2, ptr 1, ptr 0] | |
h := heapBitsForAddr(x) | |
ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) | |
if size is 2 pointer contains 1 object of pointer size | |
*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift | |
*h.bitp |= (bitPointer | bitScan) << h.shift | |
else if size is 2 pointer contains 2 object of pointer size | |
*h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift // 2th pointer is checkmark | |
else if size is 2 pointer contains 1 object of 2*pointer size | |
b := uint32(*ptrmask) | |
hb := (b & 3) | bitScan // 11 => [0001 0011] | |
*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift | |
*h.bitp |= uint8(hb << h.shift) | |
else | |
see example above | |
heapBitsForAddr: | |
off := (addr - mheap_.arena_start) / sys.PtrSize | |
return heapBits { mheap_.bitmap - off/4 - 1, off & 3 } // which byte, which bit(0, 2, 4, 6) | |
publicationBarrier: | |
// Stores are already ordered on x86, so this is just a compile barrier. | |
RET | |
gcmarknewobject: | |
markBitsForAddr(obj).setMarked() | |
gcw := &getg().m.p.ptr().gcw | |
gcw.bytesMarked += uint64(size) | |
gcw.scanWork += int64(scanSize) | |
if gcBlackenPromptly | |
gcw.dispose() | |
# collector | |
### gc | |
GC: | |
// We consider a cycle to be: | |
// sweep termination, mark, mark termination, and sweep | |
// sweep termination: | |
// - stop the world | |
// - sweep any unswept spans | |
// mark: | |
// - set gcphase to _GCmark | |
// - enable write barrier | |
// - enable mutator assist | |
// - enqueue root mark jobs | |
// - start the world | |
// - scanning all stacks and global (stop a goroutine and resume) | |
// - drain work queue | |
// - stop all works, disable local work queue cache | |
// - drain work queue again | |
// mark termination: | |
// - stop the world | |
// - set gcphase to _GCmarktermination | |
// - disable write barrier | |
// - disable mutator assist | |
// sweep: | |
// - set gcphase to _GCoff | |
// - start the world | |
// - concurrent sweeping in the background | |
n = work.cycles | |
wait until sweep termination, mark, and mark termination of cycle N complete | |
if (n + 1) - work.cycles > 0 | |
start new gc cycle | |
... | |
### global variables | |
// gcController implements the GC pacing controller that determines | |
// when to trigger concurrent garbage collection and how much marking | |
// work to do in mutator assists and background marking | |
// | |
// All fields of gcController are used only during a single mark cycle | |
var gcController gcControllerState | |
### gc trigger | |
gcTrigger.test: | |
if kind == gcTriggerAlways | |
// start gc whatever | |
return true | |
if gcphase != _GCoff | |
return false // already in gc | |
if kind == gcTriggerHeap | |
// allocated size large then trigger threshold | |
// when heap_live increase | |
// - mcentral.cacheSpan from mcache.refill | |
// when heap_live decrease | |
// - mcentral.uncacheSpan from mcache.releaseAll | |
// when heap_live updated | |
// - gcMark (memstats.heap_live = work.bytesMarked) | |
// about gc_trigger see gcSetTriggerRatio | |
// - trigger ratio example: 0.875 => 0.935 => 0.950 | |
// - default gcpercent is 100 | |
// - endCycle computes the trigger ratio for the next cycle | |
return memstats.heap_live >= memstats.gc_trigger | |
if kind == gcTriggerTime | |
// trigger for a period of time | |
return lastgc != 0 && now - lastgc > forcegcperiod | |
if kind == gcTriggerCycle | |
// use to ensure new cycle is started, see GC function | |
return n - work.cycles > 0 | |
### entry point | |
gcStart: | |
if g == g.m.g0 || m.locks > 1 || mp.preemptoff != "" | |
// this thread is in non-preemptible state so dont do the gc | |
return | |
while trigger.test() && gosweepone() != ^0 | |
// sweep unswept spans | |
sweep.nbgsweep++ | |
if !trigger.test() | |
return | |
if mode == gcBackgroundMode | |
gcBgMarkStartWorkers() | |
gcResetMarkState() | |
// set work state | |
work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs | |
work.heap0 = atomic.Load64(&memstats.heap_live) | |
work.pauseNS = 0 | |
work.mode = mode | |
now := nanotime() | |
work.tSweepTerm = now | |
work.pauseStart = now | |
// stw | |
systemstack(stopTheWorldWithSema) | |
// finish sweep before we start concurrent scan. | |
systemstack(func() { finishsweep_m() }) | |
// clearpools before we start the GC | |
clearpools() | |
// increase work cycle | |
work.cycles++ | |
if mode == gcBackgroundMode | |
gcController.startCycle() | |
setGCPhase(_GCmark) | |
gcBgMarkPrepare() | |
gcMarkRootPrepare() | |
gcMarkTinyAllocs() | |
// enable write barrier | |
atomic.Store(&gcBlackenEnabled, 1) | |
gcController.markStartTime = now | |
// concurrent mark | |
systemstack(startTheWorldWithSema) | |
now = nanotime() | |
work.pauseNS += now - work.pauseStart | |
work.tMark = now | |
else | |
t := nanotime() | |
work.tMark, work.tMarkTerm = t, t | |
work.heapGoal = work.heap0 | |
// perform mark termination. this will restart the world. | |
gcMarkTermination(memstats.triggerRatio) | |
### stw | |
stopTheWorldWithSema: | |
sched.stopwait = gomaxprocs | |
sched.gcwaiting = 1 // schedule() will stop m when seeing this | |
preemptall() | |
// stop current P | |
_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic | |
sched.stopwait-- | |
// try to retake all P's in Psyscall status | |
for i := 0; i < int(gomaxprocs); i++ | |
if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) | |
p.syscalltick++ | |
sched.stopwait-- | |
// stop idle P's | |
while p := pidleget() | |
p.status = _Pgcstop | |
sched.stopwait-- | |
if sched.stopwait > 0 | |
while true | |
// wait for 100us, then try to re-preempt in case of any races | |
if notetsleep(&sched.stopnote, 100*1000) | |
noteclear(&sched.stopnote) | |
break | |
preemptall() | |
preemptall: | |
for i := int32(0); i < gomaxprocs; i++ | |
if _p_ == nil || _p_.status != _Prunning | |
continue | |
preemptone(_p_) | |
preemptone: | |
mp := _p_.m.ptr() | |
if mp == nil || mp == getg().m | |
return false | |
gp := mp.curg | |
if gp == nil || gp == mp.g0 | |
return false | |
gp.preempt = true | |
gp.stackguard0 = stackPreempt | |
startTheWorldWithSema: | |
// pull network events first | |
gp := netpoll(false) | |
injectglist(gp) | |
add := needaddgcproc() | |
// resize procs if different, p1 is all p with local works | |
procs := gomaxprocs | |
if newprocs != 0 | |
procs = newprocs | |
newprocs = 0 | |
p1 := procresize(procs) | |
// no gc waiting | |
sched.gcwaiting = 0 | |
// wake runnable p | |
for all p1 (p with local works) | |
if p.m != 0 | |
notewakeup(&mp.park) // start exist m | |
else | |
newm(nil, p) // start new m | |
// Wakeup an additional proc in case we have excessive runnable goroutines | |
if npidle != 0 and nmspinning == 0 | |
wakep() | |
// If GC could have used another helper proc, start one now, | |
// in the hope that it will be available next time. | |
if add | |
newm(mhelpgc, nil) | |
needaddgcproc: | |
// start gc proc once a time until there enough idle m | |
return (min(gomaxprocs, ncpu, _MaxGcproc(32)) - sched.nmidle + 1) > 0 | |
mhelpgc: | |
_g_ := getg() | |
_g_.m.helpgc = -1 | |
// will call gchelper later when wakeup? | |
gchelper: | |
gchelperstart() // just check _g_.m.helpgc in [0, _MaxGcproc) and _g_ == _g_.m.g0 | |
if gcphase == _GCmarktermination | |
if work.helperDrainBlock | |
gcDrain(gcw, gcDrainBlock) // blocks in getfull | |
else | |
gcDrain(gcw, gcDrainNoBlock) | |
gcw.dispose() | |
nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone | |
if atomic.Xadd(&work.ndone, +1) == nproc-1 | |
notewakeup(&work.alldone) | |
### write barrier | |
writebarrierptr(dst *uintptr, src uintptr): | |
if writeBarrier.cgo | |
cgoCheckWriteBarrier(dst, src) | |
if !writeBarrier.needed | |
*dst = src | |
return | |
writebarrierptr_prewrite1(dst, src) | |
*dst = src | |
writebarrierptr_prewrite1(dst *uintptr, src uintptr): | |
mp := acquirem() | |
if mp.inwb || mp.dying > 0 | |
releasem(mp) | |
return | |
systemstack(func() { | |
mp.inwb = true | |
gcmarkwb_m(dst, src) | |
}) | |
mp.inwb = false | |
releasem(mp) | |
gcmarkwb_m(slot *uintptr, ptr uintptr): | |
if writeBarrier.needed | |
// The reason of shade old pointer: | |
// prevents a mutator from hiding an object by | |
// moving the sole pointer to it from the heap to its stack | |
// | |
// for example | |
// b = obj | |
// oldx = nil | |
// scan oldx... | |
// oldx = b.x | |
// b.x = ptr (barrier should mark old b.x here) | |
// scan b... | |
// if no write barrier, the oldx will not found by gc | |
// | |
// load a pointer to register or local variable not require write barrier | |
// so the mutator may move the pointer to stack without gc notice | |
// | |
if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize | |
if optr := *slot; optr != 0 | |
shade(optr) | |
// The reason of shade new pointer: | |
// prevents a mutator from hiding an object by moving the sole pointer | |
// to it from its stack into a black object in the heap | |
// | |
// for example | |
// a = ptr | |
// b = obj | |
// scan b... | |
// b.x = a (barrier should mark new b.x here, if b is scanned) | |
// a = nil | |
// scan a... | |
// if no write barrier, the ptr will not found by gc | |
// | |
// Make this conditional on the caller's stack color | |
// The insertion part of the barrier | |
// is necessary while the calling goroutine's stack is grey | |
if ptr != 0 && inheap(ptr) | |
shade(ptr) | |
### sweep termination | |
finishsweep_m: | |
while sweepone() != ^0 | |
sweep.npausesweep++ | |
nextMarkBitArenaEpoch() | |
clearpools: | |
// clear sync.Pools | |
if poolcleanup != nil | |
poolcleanup() | |
// Clear central sudog cache. | |
// Leave per-P caches alone, they have strictly bounded size. | |
// Disconnect cached list before dropping it on the floor, | |
// so that a dangling ref to one entry does not pin all of them. | |
clear sched.sudogcache and set their sg.next to nil | |
// Clear central defer pools. | |
// Leave per-P pools alone, they have strictly bounded size. | |
for i := range sched.deferpool | |
clear sched.deferpool[i] and set their d.link to nil | |
### mark | |
gcBgMarkStartWorkers: | |
for p in allp | |
if p.gcBgMarkWorker == 0 | |
go gcBgMarkWorker(p) // only start once normally | |
notetsleepg(&work.bgMarkReady, -1) | |
noteclear(&work.bgMarkReady) | |
gcBgMarkWorker: | |
gp = getg() // this goroutine | |
notewakeup(&work.bgMarkReady) // wakeup gcBgMarkStartWorkers | |
while true | |
// Go to sleep until woken by gcController.findRunnable. | |
// We can't releasem yet since even the call to gopark may be preempted. | |
gopark(func(g *g, parkp unsafe.Pointer) bool { | |
park := (*parkInfo)(parkp) | |
releasem(park.m.ptr()) // release m before g sleep | |
// If the worker isn't attached to its P, attach now | |
if park.attach != 0 | |
p := park.attach.ptr() | |
park.attach.set(nil) | |
if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) | |
return false // exit this worker | |
return true // ok to park | |
}) | |
if _p_.gcBgMarkWorker.ptr() != gp | |
break | |
ensure if gcBlackenEnabled == 0 // bg mark worker only wakeup on gc | |
switch to system stack | |
// Mark our goroutine preemptible so its stack can be scanned | |
// This lets two mark workers scan each other | |
casgstatus(gp, _Grunning, _Gwaiting) | |
switch _p_.gcMarkWorkerMode | |
gcMarkWorkerDedicatedMode: | |
gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) | |
move all runable g from p to global queue | |
gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) | |
gcMarkWorkerFractionalMode: | |
gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) | |
gcMarkWorkerIdleMode: | |
gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) | |
casgstatus(gp, _Gwaiting, _Grunning) | |
switch _p_.gcMarkWorkerMode | |
gcMarkWorkerDedicatedMode: | |
dedicatedMarkTime += nanotime() - startTime | |
dedicatedMarkWorkersNeeded++ | |
gcMarkWorkerFractionalMode: | |
fractionalMarkTime += nanotime() - startTime | |
fractionalMarkWorkersNeeded++ | |
gcMarkWorkerIdleMode: | |
idleMarkTime += nanotime() - startTime | |
incnwait = ++work.nwait | |
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) | |
// all worker done | |
_p_.gcBgMarkWorker.set(nil) | |
gcMarkDone() | |
// disable preemption and prepare to reattach to the P | |
park.m.set(acquirem()) | |
park.attach.set(_p_) | |
startCycle: | |
// reset gcControllerState state | |
// for example, 4 core need 1 dedicated and 0 fractional, 5 core need 1 dedicated and 1 fractional | |
// the purpose of spliting dedicated and fractional is to ensure there 25% cpu is using to gc | |
dedicatedMarkWorkersNeeded = int(gomaxprocs * gcGoalUtilization(0.25)) | |
fractionalUtilizationGoal = totalUtilizationGoal - dedicatedMarkWorkersNeeded // [0, 1) | |
fractionalMarkWorkersNeeded = fractionalUtilizationGoal ? 1 : 0 | |
// Compute initial values for controls that are updated throughout the cycle | |
c.revise() | |
findRunnableGCWorker: | |
// set p.gcMarkWorkerMode and return p.gcBgMarkWorker | |
if dedicatedMarkWorkersNeeded > 0 | |
dedicatedMarkWorkersNeeded-- | |
p.gcMarkWorkerMode = gcMarkWorkerDedicatedMode | |
else if fractionalMarkWorkersNeeded > 0 | |
fractionalMarkWorkersNeeded-- | |
if ((fractionalMarkTime + gcForcePreemptNS) / | |
(nanotime() - markStartTime + gcForcePreemptNS) > fractionalUtilizationGoal) | |
fractionalMarkWorkersNeeded++ // fractional worker use too much time | |
return nil | |
p.gcMarkWorkerMode = fractionalMarkWorkersNeeded | |
else | |
// no more workers are need right now | |
return nil | |
revise: | |
// how many pointers waiting for scan = how many pointers can scan - how many scanned | |
scanWorkExpected := int64(memstats.heap_scan) - c.scanWork | |
if scanWorkExpected < 1000 | |
scanWorkExpected = 1000 | |
// how many bytes should trigger gc the last round - how many bytes lived the last round | |
heapDistance := int64(memstats.next_gc) - int64(atomic.Load64(&memstats.heap_live)) | |
if heapDistance <= 0 | |
heapDistance = 1 | |
// this makes mallocgc to help gc if allocated too many bytes | |
// | |
// in gc, every mallocgc will set `assistG.gcAssistBytes -= int64(size)` | |
// then `assistG.gcAssistBytes < 0`, mallocgc will execute gcAssistAlloc | |
// gcAssistAlloc will call gcDrainN with debtBytes * assistWorkPerByte | |
c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) | |
c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) | |
gcResetMarkState: | |
for g in allgs | |
gp.gcscandone = false // set to true in gcphasework | |
gp.gcscanvalid = false // stack has not been scanned | |
gp.gcAssistBytes = 0 | |
work.bytesMarked = 0 | |
work.initialHeapLive = atomic.Load64(&memstats.heap_live) | |
work.markrootDone = false | |
gcDrain: | |
gp := getg().m.curg | |
// flags | |
preemptible := flags&gcDrainUntilPreempt != 0 | |
blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0 | |
flushBgCredit := flags&gcDrainFlushBgCredit != 0 | |
idle := flags&gcDrainIdle != 0 | |
// idleCheck is the scan work at which to perform the next | |
// idle check with the scheduler. | |
initScanWork := gcw.scanWork | |
idleCheck := initScanWork + idleCheckThreshold(100000) | |
// Drain root marking jobs (this set from gcMarkRootPrepare) | |
if work.markrootNext < work.markrootJobs | |
while !(preemptible && gp.preempt) // if somebody preempt this g then break | |
job := atomic.Xadd(&work.markrootNext, +1) - 1 | |
if job >= work.markrootJobs | |
break | |
markroot(gcw, job) | |
if idle && pollWork() | |
goto done | |
// Drain heap marking jobs. | |
while !(preemptible && gp.preempt) // if somebody preempt this g then break | |
// Try to keep work available on the global queue | |
// if wbuf2 not empty then move wbuf2 to global queue(work.full) | |
// else if wbuf1 jobs > 4 then move half of wbuf1 to global queue | |
if work.full == 0 | |
gcw.balance() | |
// get pointer from work queue | |
if blocking | |
b = gcw.get() | |
else | |
b = gcw.tryGetFast() || gcw.tryGet() | |
// work barrier reached or tryGet failed. | |
if b == 0 | |
break | |
// scan pointer | |
scanobject(b, gcw) | |
// Flush background scan work credit to the global account | |
// if we've accumulated enough locally so mutator assists can draw on it | |
if gcw.scanWork >= gcCreditSlack(2000) | |
atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) | |
if flushBgCredit | |
gcFlushBgCredit(gcw.scanWork - initScanWork) | |
initScanWork = 0 | |
idleCheck -= gcw.scanWork | |
gcw.scanWork = 0 | |
if idle && idleCheck <= 0 | |
idleCheck += idleCheckThreshold | |
if pollWork() | |
break | |
done: | |
// Flush remaining scan work credit. | |
if gcw.scanWork > 0 | |
atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) | |
if flushBgCredit | |
gcFlushBgCredit(gcw.scanWork - initScanWork) | |
gcw.scanWork = 0 | |
markroot: | |
// calculate bases | |
// [ | |
// fixed roots, // +fixedRootCount(2) | |
// flush cache roots, // +nFlushCacheRoots | |
// data roots, // +nDataRoots | |
// BSS roots, // +nBSSRoots | |
// span roots, // +nSpanRoots | |
// stack roots, // +nStackRoots | |
// ] | |
baseFlushCache := uint32(fixedRootCount) | |
baseData := baseFlushCache + uint32(work.nFlushCacheRoots) | |
baseBSS := baseData + uint32(work.nDataRoots) | |
baseSpans := baseBSS + uint32(work.nBSSRoots) | |
baseStacks := baseSpans + uint32(work.nSpanRoots) | |
end := baseStacks + uint32(work.nStackRoots) | |
// find out which work type associate with index | |
switch | |
case baseFlushCache <= i && i < baseData | |
flushmcache(int(i - baseFlushCache)) | |
case baseData <= i && i < baseBSS | |
for _, datap := range activeModules() | |
markrootBlock(datap.data, | |
datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-baseData)) | |
case baseBSS <= i && i < baseSpans | |
for _, datap := range activeModules() | |
markrootBlock(datap.bss, | |
datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-baseBSS)) | |
case i == fixedRootFinalizers(0) | |
// Only do this once per GC cycle since we don't call queuefinalizer during marking | |
if work.markrootDone | |
break | |
for fb := allfin; fb != nil; fb = fb.alllink | |
cnt := uintptr(atomic.Load(&fb.cnt)) | |
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), | |
cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw) | |
case i == fixedRootFreeGStacks(1) | |
// Only do this once per GC cycle; preferably concurrently | |
if !work.markrootDone | |
systemstack(markrootFreeGStacks) | |
case baseSpans <= i && i < baseStacks: | |
// mark MSpan.specials | |
markrootSpans(gcw, int(i-baseSpans)) | |
default: | |
// the rest is scanning goroutine stacks | |
gp = allgs[i-baseStacks] | |
// remember when we've first observed the G blocked | |
// needed only to output in traceback | |
status := readgstatus(gp) // We are not in a scan state | |
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 | |
gp.waitsince = work.tstart | |
// scang must be done on the system stack in case | |
// we're trying to scan our own stack | |
systemstack(func() { | |
userG := getg().m.curg | |
selfScan := gp == userG && readgstatus(userG) == _Grunning | |
if selfScan | |
casgstatus(userG, _Grunning, _Gwaiting) | |
userG.waitreason = "garbage collection scan" | |
scang(gp, gcw) | |
if selfScan | |
casgstatus(userG, _Gwaiting, _Grunning) | |
}) | |
pollWork: | |
// pollWork returns true if there is non-background work this P could be doing | |
// This is a fairly lightweight check to be used for background work loops, like idle GC | |
// It checks a subset of the conditions checked by the actual scheduler | |
if sched.runqsize != 0 | |
return true // global queue | |
p := getg().m.p.ptr() | |
if !runqempty(p) | |
return true // local queue | |
if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 | |
if gp := netpoll(false); gp != nil | |
injectglist(gp) | |
return true // net poll | |
return false | |
gcFlushBgCredit: | |
if work.assistQueue.head == 0 | |
atomic.Xaddint64(&gcController.bgScanCredit, scanWork) | |
return | |
scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) | |
lock(&work.assistQueue.lock) | |
gp := work.assistQueue.head.ptr() | |
for gp != nil && scanBytes > 0 | |
if scanBytes+gp.gcAssistBytes >= 0 | |
// Satisfy this entire assist debt | |
scanBytes += gp.gcAssistBytes | |
gp.gcAssistBytes = 0 | |
xgp := gp | |
gp = gp.schedlink.ptr() | |
ready(xgp, 0, false) | |
else | |
gp.gcAssistBytes += scanBytes | |
scanBytes = 0 | |
xgp := gp | |
gp = gp.schedlink.ptr() | |
if gp == nil | |
gp = xgp | |
else | |
xgp.schedlink = 0 | |
work.assistQueue.tail.ptr().schedlink.set(xgp) | |
work.assistQueue.tail.set(xgp) | |
break | |
work.assistQueue.head.set(gp) | |
if gp == nil | |
work.assistQueue.tail.set(nil) | |
if scanBytes > 0 | |
scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) | |
atomic.Xaddint64(&gcController.bgScanCredit, scanWork) | |
unlock(&work.assistQueue.lock) | |
flushmcache: | |
// flushmcache flushes the mcache of allp[i] | |
// The world must be stopped (only mark termination will flush cache) | |
p := allp[i] | |
c := p.mcache | |
if c == nil | |
return | |
c.releaseAll() // call mcentral.uncacheSpan return all c.alloc[index] | |
stackcache_clear(c) // free c.stackcache[index] | |
markrootBlock: | |
b := b0 + uintptr(shard)*rootBlockBytes // which block should scan | |
if b >= b0+n0 | |
return | |
ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), | |
uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize)))) // ptrmask of that block | |
n := uintptr(rootBlockBytes) | |
if b+n > b0+n0 | |
n = b0 + n0 - b // how many bytes should scan | |
// Scan this shard | |
scanblock(b, n, ptrmask, gcw) | |
scanblock: | |
// scanblock scans b as scanobject would, but using an explicit | |
// pointer bitmap instead of the heap bitmap | |
// This is used to scan non-heap roots, so it does not update | |
// gcw.bytesMarked or gcw.scanWork | |
for i := uintptr(0); i < n; | |
bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) | |
if bits == 0 | |
i += sys.PtrSize * 8 | |
for j := 0; j < 8 && i < n; j++ | |
if bits&1 != 0 | |
// Same work as in scanobject; see comments there | |
obj := *(*uintptr)(unsafe.Pointer(b + i)) | |
if obj != 0 && arena_start <= obj && obj < arena_used | |
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 | |
greyobject(obj, b, i, hbits, span, gcw, objIndex) | |
bits >>= 1 | |
i += sys.PtrSize | |
scanobject: | |
// scanobject scans the object starting at b, adding pointers to gcw | |
// b must point to the beginning of a heap object or an oblet | |
// scanobject consults the GC bitmap for the pointer mask and the | |
// spans for the size of the object | |
hbits := heapBitsForAddr(b) | |
s := spanOfUnchecked(b) | |
n := s.elemsize | |
if n > maxObletBytes // 128KB | |
// Large object. Break into oblets for better | |
// parallelism and lower latency | |
if b == s.base() | |
for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes | |
if !gcw.putFast(oblet) | |
gcw.put(oblet) | |
n = s.base() + s.elemsize - b | |
if n > maxObletBytes | |
n = maxObletBytes | |
var i uintptr | |
for i = 0; i < n; i += sys.PtrSize | |
if i != 0 | |
hbits = hbits.next() | |
// Load bits once. See CL 22712 and issue 16973 for discussion | |
bits := hbits.bits() | |
// During checkmarking, 1-word objects store the checkmark | |
// in the type bit for the one word. The only one-word objects | |
// are pointers, or else they'd be merged with other non-pointer | |
// data into larger allocations. | |
if i != 1*sys.PtrSize && bits&bitScan == 0 | |
break // no more pointers in this object | |
if bits&bitPointer == 0 | |
continue // not a pointer | |
// Work here is duplicated in scanblock and above. | |
// If you make changes here, make changes there too | |
obj := *(*uintptr)(unsafe.Pointer(b + i)) | |
// At this point we have extracted the next potential pointer | |
// Check if it points into heap and not back at the current object | |
if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n | |
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 | |
greyobject(obj, b, i, hbits, span, gcw, objIndex) | |
gcw.bytesMarked += uint64(n) | |
gcw.scanWork += int64(i) | |
markrootFreeGStacks: | |
// markrootFreeGStacks frees stacks of dead Gs. | |
// This does not free stacks of dead Gs cached on Ps, but having a few | |
// cached stacks around isn't a problem | |
// | |
// Take list of dead Gs with stacks | |
lock(&sched.gflock) | |
list := sched.gfreeStack | |
sched.gfreeStack = nil | |
unlock(&sched.gflock) | |
if list == nil | |
return | |
tail := list | |
for gp := list; gp != nil; gp = gp.schedlink.ptr() | |
shrinkstack(gp) // stackfree(gp.stack) | |
tail = gp | |
// Put Gs back on the free list. | |
lock(&sched.gflock) | |
tail.schedlink.set(sched.gfreeNoStack) | |
sched.gfreeNoStack = list | |
unlock(&sched.gflock) | |
markrootSpans: | |
// markrootSpans marks roots for one shard of work.spans | |
if work.markrootDone | |
throw("markrootSpans during second markroot") | |
sg := mheap_.sweepgen | |
// gcSweepBuf: [gcSweepBlock, gcSweepBlock, ...] | |
// gcSweepBlock: [mspan, mspan, ... | gcSweepBlockEntries(512) ] | |
// NOTICE: here isn't mark the span itself, just mark the special finalizers | |
// also see: func (b *gcSweepBuf) block(i int) []*mspan | |
spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) | |
for _, s := range spans | |
if s.state != mSpanInUse | |
continue | |
if s.specials == nil | |
continue | |
lock(&s.speciallock) | |
for sp := s.specials; sp != nil; sp = sp.next | |
if sp.kind != _KindSpecialFinalizer | |
continue | |
// don't mark finalized object, but scan it so we | |
// retain everything it points to. | |
spf := (*specialfinalizer)(unsafe.Pointer(sp)) | |
// A finalizer can be set for an inner byte of an object, find object beginning | |
p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize | |
// Mark everything that can be reached from | |
// the object (but *not* the object itself or | |
// we'll never collect it). | |
scanobject(p, gcw) | |
// The special itself is a root. | |
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw) | |
unlock(&s.speciallock) | |
scang: | |
// scang blocks until gp's stack has been scanned | |
// It might be scanned by scang or it might be scanned by the goroutine itself | |
// Either way, the stack scan has completed when scang returns. | |
gp.gcscandone = false | |
const yieldDelay = 10 * 1000 | |
var nextYield int64 | |
// Endeavor to get gcscandone set to true, | |
// either by doing the stack scan ourselves or by coercing gp to scan itself. | |
// gp.gcscandone can transition from false to true when we're not looking | |
// (if we asked for preemption), so any time we lock the status using | |
// castogscanstatus we have to double-check that the scan is still not done. | |
loop: | |
for i := 0; !gp.gcscandone; i++ | |
switch s := readgstatus(gp); s | |
case _Gdead: | |
// No stack. | |
gp.gcscandone = true | |
break loop | |
case _Gcopystack: | |
// Stack being switched. Go around again. | |
case _Grunnable, _Gsyscall, _Gwaiting: | |
// Claim goroutine by setting scan bit. | |
// Racing with execution or readying of gp. | |
// The scan bit keeps them from running | |
// the goroutine until we're done. | |
if castogscanstatus(gp, s, s|_Gscan) | |
if !gp.gcscandone | |
scanstack(gp, gcw) | |
gp.gcscandone = true | |
restartg(gp) | |
break loop | |
case _Gscanwaiting: | |
// newstack is doing a scan for us right now. Wait. | |
case _Grunning: | |
// Goroutine running. Try to preempt execution so it can scan itself. | |
// The preemption handler (in newstack) does the actual scan. | |
// Optimization: if there is already a pending preemption request | |
// (from the previous loop iteration), don't bother with the atomics. | |
if gp.preemptscan && gp.preempt && gp.stackguard0 == stackPreempt | |
break | |
// Ask for preemption and self scan | |
// how g scan itself | |
// see stack.go:newstack | |
// it will call scanstack(gp, gcw) then set gp.gcscandone = true | |
if castogscanstatus(gp, _Grunning, _Gscanrunning) | |
if !gp.gcscandone | |
gp.preemptscan = true | |
gp.preempt = true | |
gp.stackguard0 = stackPreempt | |
casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) | |
if i == 0 | |
nextYield = nanotime() + yieldDelay | |
if nanotime() < nextYield // first round: 10ms | |
procyield(10) | |
else | |
osyield() | |
nextYield = nanotime() + yieldDelay/2 // following round: 5ms | |
gp.preemptscan = false // cancel scan request if no longer needed | |
scanstack: | |
// scanstack scans gp's stack, greying all pointers found on the stack | |
// scanstack is marked go:systemstack because it must not be preempted while using a workbuf | |
if gp.gcscanvalid | |
return | |
switch readgstatus(gp) &^ _Gscan | |
case _Gdead: | |
return | |
case _Grunning: | |
throw("scanstack: goroutine not stopped") | |
case _Grunnable, _Gsyscall, _Gwaiting: | |
// ok | |
// Shrink the stack if not much of it is being used. During | |
// concurrent GC, we can do this during concurrent mark. | |
if !work.markrootDone | |
shrinkstack(gp) | |
// Scan the stack. | |
var cache pcvalueCache | |
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { | |
scanframeworker(frame, &cache, gcw) | |
return true | |
} | |
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) | |
tracebackdefers(gp, scanframe, nil) | |
gp.gcscanvalid = true | |
scanframeworker: | |
f := frame.fn | |
targetpc := frame.continpc | |
if targetpc == 0 | |
// Frame is dead. | |
return | |
if targetpc != f.entry | |
targetpc-- | |
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, cache) | |
if pcdata == -1 | |
// We do not have a valid pcdata value but there might be a | |
// stackmap for this function. It is likely that we are looking | |
// at the function prologue, assume so and hope for the best. | |
pcdata = 0 | |
// Scan local variables if stack frame has been allocated | |
size := frame.varp - frame.sp | |
minsize := ARM64 ? sys.SpAlign : MinFrameSize | |
if size > minsize | |
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps)) | |
bv := stackmapdata(stkmap, pcdata) | |
size = uintptr(bv.n) * sys.PtrSize | |
scanblock(frame.varp-size, size, bv.bytedata, gcw) | |
// Scan arguments | |
if frame.arglen > 0 | |
if frame.argmap != nil | |
bv = *frame.argmap | |
else | |
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) | |
bv = stackmapdata(stkmap, pcdata) | |
scanblock(frame.argp, uintptr(bv.n)*sys.PtrSize, bv.bytedata, gcw) | |
gentraceback: | |
// Generic traceback. Handles runtime stack prints (pcbuf == nil), | |
// the runtime.Callers function (pcbuf != nil), as well as the garbage | |
// collector (callback != nil). A little clunky to merge these, but avoids | |
// duplicating the code and all its subtlety | |
tracebackdefers: | |
// Traceback over the deferred function calls. | |
// Report them like calls that have been invoked but not started executing yet. | |
procyield: | |
MOVL cycles+0(FP), AX | |
again: | |
PAUSE | |
SUBL $1, AX | |
JNZ again | |
RET | |
osyield: | |
MOVL $24, AX // sched_yield | |
SYSCALL | |
RET | |
gcMarkDone: | |
// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 | |
// to mark termination | |
semacquire(&work.markDoneSema) | |
// Disallow starting new workers so that any remaining workers | |
// in the current mark phase will drain out | |
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) | |
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) | |
if !gcBlackenPromptly | |
// Transition from mark 1 to mark 2 | |
// | |
// The global work list is empty, but there can still be work | |
// sitting in the per-P work caches | |
// Flush and disable work caches | |
gcBlackenPromptly = true | |
// Prevent completion of mark 2 until we've flushed | |
// cached workbufs | |
atomic.Xadd(&work.nwait, -1) | |
// GC is set up for mark 2. Let Gs blocked on the | |
// transition lock go while we flush caches. | |
semrelease(&work.markDoneSema) | |
// Flush all currently cached workbufs and | |
// ensure all Ps see gcBlackenPromptly | |
systemstack(func() { forEachP(func(_p_ *p) { _p_.gcw.dispose() }) }) | |
// Check that roots are marked, for debugging | |
gcMarkRootCheck() | |
// Now we can start up mark 2 workers. | |
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) | |
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) | |
incnwait := atomic.Xadd(&work.nwait, +1) | |
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) | |
// This loop will make progress because | |
// gcBlackenPromptly is now true, so it won't | |
// take this same "if" branch. | |
goto top | |
else | |
// Transition to mark termination | |
now := nanotime() | |
work.tMarkTerm = now | |
work.pauseStart = now | |
getg().m.preemptoff = "gcing" | |
systemstack(stopTheWorldWithSema) | |
// The gcphase is _GCmark, it will transition to _GCmarktermination | |
// below. The important thing is that the wb remains active until | |
// all marking is complete. This includes writes made by the GC | |
// Record that one root marking pass has completed. | |
work.markrootDone = true | |
// Disable assists and background workers. We must do | |
// this before waking blocked assists. | |
atomic.Store(&gcBlackenEnabled, 0) | |
// Wake all blocked assists. These will run when we | |
// start the world again. | |
gcWakeAllAssists() | |
// Likewise, release the transition lock. Blocked | |
// workers and assists will run when we start the | |
// world again. | |
semrelease(&work.markDoneSema) | |
// endCycle depends on all gcWork cache stats being | |
// flushed. This is ensured by mark 2. | |
nextTriggerRatio := gcController.endCycle() | |
// Perform mark termination. This will restart the world. | |
gcMarkTermination(nextTriggerRatio) | |
gcBgMarkPrepare: | |
// Background marking will stop when the work queues are empty | |
// and there are no more workers (note that, since this is | |
// concurrent, this may be a transient state, but mark | |
// termination will clean it up). Between background workers | |
// and assists, we don't really know how many workers there | |
// will be, so we pretend to have an arbitrarily large number | |
// of workers, almost all of which are "waiting". While a | |
// worker is working it decrements nwait. If nproc == nwait, | |
// there are no workers. | |
work.nproc = ^0 | |
work.nwait = ^0 | |
gcMarkRootPrepare: | |
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and | |
// some miscellany) and initializes scanning-related state. | |
// The caller must have call gcCopySpans(). | |
// The world must be stopped. | |
work.nFlushCacheRoots = gcphase == _GCmarktermination ? int(gomaxprocs) : 0 | |
// Compute how many data and BSS root blocks there are. | |
nBlocks = \bytes => int((bytes + rootBlockBytes(256KB) - 1) / rootBlockBytes) | |
if !work.markrootDone | |
// Only scan globals once per cycle | |
work.nDataRoots = max(map(activeModules, \a => nBlocks(datap.edata - datap.data))) | |
work.nBSSRoots = max(map(activeModules, \a => nBlocks(datap.ebss - datap.bss))) | |
if !work.markrootDone | |
// On the first markroot, we need to scan span roots. | |
work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() | |
work.nStackRoots = int(atomic.Loaduintptr(&allglen)) | |
else | |
// We've already scanned span roots and kept the scan | |
// up-to-date during concurrent mark. | |
work.nSpanRoots = 0 | |
// The hybrid barrier ensures that stacks can't | |
// contain pointers to unmarked objects, so on the | |
// second markroot, there's no need to scan stacks. | |
work.nStackRoots = 0 | |
if debug.gcrescanstacks > 0 | |
work.nStackRoots = int(atomic.Loaduintptr(&allglen)) | |
work.markrootNext = 0 | |
work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + | |
work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) | |
gcMarkTinyAllocs: | |
// greys all active tiny alloc blocks | |
for _, p := range &allp | |
if p == nil || p.status == _Pdead | |
break | |
c := p.mcache | |
if c == nil || c.tiny == 0 | |
continue | |
_, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0) | |
gcw := &p.gcw | |
greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex) // just mark, it contains no pointer | |
if gcBlackenPromptly // flush and disable work caches | |
gcw.dispose() // dispose returns any cached pointers to the global queue | |
heapBitsForObject: | |
// heapBitsForObject returns the base address for the heap object | |
// containing the address p, the heapBits for base, | |
// the object's span, and of the index of the object in s | |
// If p does not point into a heap object, | |
// return base == 0 | |
// otherwise return the base of the object | |
arenaStart := mheap_.arena_start | |
if p < arenaStart || p >= mheap_.arena_used | |
return | |
off := p - arenaStart | |
idx := off >> _PageShift // which page | |
// p points into the heap, but possibly to the middle of an object. | |
// Consult the span table to find the block beginning. | |
s = mheap_.spans[idx] | |
if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse | |
if s == nil || s.state == _MSpanManual | |
// If s is nil, the virtual address has never been part of the heap. | |
// This pointer may be to some mmap'd region, so we allow it. | |
// Pointers into stacks are also ok, the runtime manages these explicitly. | |
return | |
// If this span holds object of a power of 2 size, just mask off the bits to | |
// the interior of the object. Otherwise use the size to get the base. | |
if s.baseMask != 0 | |
// optimize for power of 2 sized objects | |
base = s.base() | |
base = base + (p-base)&uintptr(s.baseMask) | |
objIndex = (base - s.base()) >> s.divShift | |
else | |
base = s.base() | |
if p-base >= s.elemsize | |
objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2 | |
base += objIndex * s.elemsize | |
// Now that we know the actual base, compute heapBits to return to caller. | |
hbits = heapBitsForAddr(base) | |
return | |
greyobject: | |
// obj is the start of an object with mark mbits. | |
// If it isn't already marked, mark it and enqueue into gcw | |
// base and off are for debugging only and could be removed | |
mbits := span.markBitsForIndex(objIndex) | |
if useCheckmark | |
if hbits.isCheckmarked(span.elemsize) | |
return | |
hbits.setCheckmarked(span.elemsize) | |
else | |
// If marked we have nothing to do. | |
if mbits.isMarked() | |
return | |
// mbits.setMarked() // Avoid extra call overhead with manual inlining. | |
atomic.Or8(mbits.bytep, mbits.mask) | |
// If this is a noscan object, fast-track it to black instead of greying it | |
if span.spanclass.noscan() | |
gcw.bytesMarked += uint64(span.elemsize) | |
return | |
if !gcw.putFast(obj) | |
gcw.put(obj) | |
gcw.putFast: | |
wbuf := w.wbuf1 | |
if wbuf == nil | |
return false | |
else if wbuf.nobj == len(wbuf.obj) | |
return false | |
wbuf.obj[wbuf.nobj] = obj | |
wbuf.nobj++ | |
return true | |
gcw.put: | |
flushed := false | |
wbuf := w.wbuf1 | |
if wbuf == nil | |
w.init() | |
wbuf = w.wbuf1 | |
else if wbuf.nobj == len(wbuf.obj) | |
w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 | |
wbuf = w.wbuf1 | |
if wbuf.nobj == len(wbuf.obj) | |
putfull(wbuf) | |
wbuf = getempty() | |
w.wbuf1 = wbuf | |
flushed = true | |
wbuf.obj[wbuf.nobj] = obj | |
wbuf.nobj++ | |
// If we put a buffer on full, let the GC controller know so | |
// it can encourage more workers to run. We delay this until | |
// the end of put so that w is in a consistent state, since | |
// enlistWorker may itself manipulate w. | |
if flushed && gcphase == _GCmark | |
gcController.enlistWorker() | |
gcw.tryGetFast: | |
wbuf := w.wbuf1 | |
if wbuf == nil { | |
return 0 | |
} | |
if wbuf.nobj == 0 { | |
return 0 | |
} | |
wbuf.nobj-- | |
return wbuf.obj[wbuf.nobj] | |
gcw.tryGet: | |
wbuf := w.wbuf1 | |
if wbuf == nil | |
w.init() | |
wbuf = w.wbuf1 | |
if wbuf.nobj == 0 | |
w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 | |
wbuf = w.wbuf1 | |
if wbuf.nobj == 0 | |
owbuf := wbuf | |
wbuf = trygetfull() | |
if wbuf == nil | |
return 0 | |
putempty(owbuf) | |
w.wbuf1 = wbuf | |
wbuf.nobj-- | |
return wbuf.obj[wbuf.nobj] | |
gcw.get: | |
same as gcw.tryGet, but replace trygetfull with getfull | |
### mark termination | |
gcMarkTermination: | |
// World is stopped. | |
// Start marktermination which includes enabling the write barrier. | |
atomic.Store(&gcBlackenEnabled, 0) | |
gcBlackenPromptly = false | |
setGCPhase(_GCmarktermination) | |
work.heap1 = memstats.heap_live | |
startTime := nanotime() | |
mp := acquirem() | |
mp.preemptoff = "gcing" | |
_g_ := getg() | |
_g_.m.traceback = 2 | |
gp := _g_.m.curg | |
casgstatus(gp, _Grunning, _Gwaiting) | |
gp.waitreason = "garbage collection" | |
// Run gc on the g0 stack. We do this so that the g stack | |
// we're currently running on will no longer change | |
systemstack(func() { gcMark(startTime) }) | |
systemstack(func() { | |
work.heap2 = work.bytesMarked | |
if debug.gccheckmark > 0 | |
// Run a full stop-the-world mark using checkmark bits, | |
// to check that we didn't forget to mark anything during | |
// the concurrent mark process | |
gcResetMarkState() | |
initCheckmarks() | |
gcMark(startTime) | |
clearCheckmarks() | |
// marking is complete so we can turn the write barrier off | |
setGCPhase(_GCoff) | |
gcSweep(work.mode) | |
if debug.gctrace > 1 | |
startTime = nanotime() | |
// The g stacks have been scanned so | |
// they have gcscanvalid==true and gcworkdone==true | |
// Reset these so that all stacks will be rescanned | |
gcResetMarkState() | |
finishsweep_m() | |
// Still in STW but gcphase is _GCoff, reset to _GCmarktermination | |
// At this point all objects will be found during the gcMark which | |
// does a complete STW mark and object scan | |
setGCPhase(_GCmarktermination) | |
gcMark(startTime) | |
setGCPhase(_GCoff) // marking is done, turn off wb. | |
gcSweep(work.mode) | |
}) | |
_g_.m.traceback = 0 | |
casgstatus(gp, _Gwaiting, _Grunning) | |
if trace.enabled | |
traceGCDone() | |
// all done | |
mp.preemptoff = "" | |
// Update GC trigger and pacing for the next cycle | |
gcSetTriggerRatio(nextTriggerRatio) | |
// Update timing memstats | |
now := nanotime() | |
sec, nsec, _ := time_now() | |
unixNow := sec*1e9 + int64(nsec) | |
work.pauseNS += now - work.pauseStart | |
work.tEnd = now | |
atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user | |
atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us | |
memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) | |
memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) | |
memstats.pause_total_ns += uint64(work.pauseNS) | |
// Update work.totaltime. | |
sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) | |
// We report idle marking time below, but omit it from the | |
// overall utilization here since it's "free". | |
markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime | |
markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) | |
cycleCpu := sweepTermCpu + markCpu + markTermCpu | |
work.totaltime += cycleCpu | |
// Compute overall GC CPU utilization. | |
totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) | |
memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) | |
// Reset sweep state. | |
sweep.nbgsweep = 0 | |
sweep.npausesweep = 0 | |
if work.userForced | |
memstats.numforcedgc++ | |
// Bump GC cycle count and wake goroutines waiting on sweep. | |
lock(&work.sweepWaiters.lock) | |
memstats.numgc++ | |
injectglist(work.sweepWaiters.head.ptr()) | |
work.sweepWaiters.head = 0 | |
unlock(&work.sweepWaiters.lock) | |
// Finish the current heap profiling cycle and start a new | |
// heap profiling cycle. We do this before starting the world | |
// so events don't leak into the wrong cycle. | |
mProf_NextCycle() | |
systemstack(startTheWorldWithSema) | |
// Flush the heap profile so we can start a new cycle next GC. | |
// This is relatively expensive, so we don't do it with the | |
// world stopped. | |
mProf_Flush() | |
// Prepare workbufs for freeing by the sweeper. We do this | |
// asynchronously because it can take non-trivial time. | |
prepareFreeWorkbufs() | |
// Free stack spans. This must be done between GC cycles. | |
systemstack(freeStackSpans) | |
semrelease(&worldsema) | |
// Careful: another GC cycle may start now | |
releasem(mp) | |
mp = nil | |
// now that gc is done, kick off finalizer thread if needed | |
if !concurrentSweep | |
// give the queued finalizers, if any, a chance to run | |
Gosched() | |
### sweep | |
gcSweep: | |
lock(&mheap_.lock) | |
mheap_.sweepgen += 2 | |
mheap_.sweepdone = 0 | |
mheap_.pagesSwept = 0 | |
unlock(&mheap_.lock) | |
// Background sweep. | |
lock(&sweep.lock) | |
if sweep.parked { | |
sweep.parked = false | |
ready(sweep.g, 0, true) // bgsweep | |
} | |
unlock(&sweep.lock) | |
bgsweep: | |
sweep.g = getg() | |
lock(&sweep.lock) | |
sweep.parked = true | |
c <- 1 | |
goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) | |
for | |
for gosweepone() != ^uintptr(0) | |
sweep.nbgsweep++ | |
Gosched() | |
for freeSomeWbufs(true) | |
Gosched() | |
lock(&sweep.lock) | |
if !gosweepdone() | |
// This can happen if a GC runs between | |
// gosweepone returning ^0 above | |
// and the lock being acquired | |
unlock(&sweep.lock) | |
continue | |
sweep.parked = true | |
goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) | |
sweepone: | |
npages := ^uintptr(0) | |
sg := mheap_.sweepgen | |
for | |
s := mheap_.sweepSpans[1-sg/2%2].pop() | |
if s == nil | |
atomic.Store(&mheap_.sweepdone, 1) | |
break | |
if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) { | |
continue | |
npages = s.npages | |
if !s.sweep(false) | |
npages = 0 | |
return npages | |
sweep: | |
atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) | |
freeToHeap := false | |
specialp := &s.specials | |
special := *specialp | |
for special != nil | |
objIndex := uintptr(special.offset) / size | |
p := s.base() + objIndex*size | |
mbits := s.markBitsForIndex(objIndex) | |
if !mbits.isMarked() | |
// queue finalizers of this object | |
nalloc := uint16(s.countAlloc()) | |
if spc.sizeclass() == 0 && nalloc == 0 | |
s.needzero = 1 | |
freeToHeap = true | |
nfreed := s.allocCount - nalloc | |
s.allocCount = nalloc | |
wasempty := s.nextFreeIndex() == s.nelems | |
s.freeindex = 0 // reset allocation index to start of span. | |
/ gcmarkBits becomes the allocBits. | |
// get a fresh cleared gcmarkBits in preparation for next GC | |
s.allocBits = s.gcmarkBits | |
s.gcmarkBits = newMarkBits(s.nelems) | |
// Initialize alloc bits cache. | |
s.refillAllocCache(0) | |
if freeToHeap || nfreed == 0 | |
atomic.Store(&s.sweepgen, sweepgen) | |
if nfreed > 0 && spc.sizeclass() != 0 | |
c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) | |
res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty) | |
// MCentral_FreeSpan updates sweepgen | |
else if freeToHeap | |
mheap_.freeSpan(s, 1) | |
c.local_nlargefree++ | |
c.local_largefree += size | |
res = true | |
if !res | |
// The span has been swept and is still in-use, so put | |
// it on the swept in-use list. | |
mheap_.sweepSpans[sweepgen/2%2].push(s) | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment