Skip to content

Instantly share code, notes, and snippets.

@aclements
Created October 23, 2016 03:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aclements/5872f2b521ced4dd8d0b5e2fb4f88957 to your computer and use it in GitHub Desktop.
Save aclements/5872f2b521ced4dd8d0b5e2fb4f88957 to your computer and use it in GitHub Desktop.
GDB Python script for Go to find a path from a GC root to an object
# Compute the object graph of a Go heap and find the path from a root
# to a target address.
import gdb
import collections
class SliceValue:
"""Wrapper for slice values."""
def __init__(self, val):
self.val = val
@property
def len(self):
return int(self.val['len'])
@property
def cap(self):
return int(self.val['cap'])
def __getitem__(self, i):
if i < 0 or i >= self.len:
raise IndexError(i)
ptr = self.val["array"]
return (ptr + i).dereference()
def iterlist(x, link="next"):
while x != 0:
yield x
x = x[link]
_MSpanInUse = 0
_MSpanStack = 1
_Gdead = 6
_KindSpecialFinalizer = 1
ptrSize = 8
_PageShift = 13
def heapSpans():
"""Yield in-use heap spans."""
for span in SliceValue(gdb.parse_and_eval("'runtime.h_allspans'")):
if span['state'] == _MSpanInUse:
yield span
# def spanobjs(span):
# """Yield addresses of allocated objects in span."""
# base = span['startAddr']
# elemsize = span['elemsize']
# nelems = span['nelems']
# freeindex = span['freeindex']
# abits = span['allocBits']
# for i in range(0, nelems):
# if i < freeindex or abits[i/8]&(1<<(i%8)) != 0:
# yield base + i * elemsize
class AddrSpace(object):
def __init__(self):
# Map from base address to Obj.
self.objs = {}
self.arena = (long(gdb.parse_and_eval("'runtime.mheap_'.arena_start")),
long(gdb.parse_and_eval("'runtime.mheap_'.arena_used")))
self.uintptr_p = gdb.lookup_type("uintptr").pointer()
self.hspans = gdb.parse_and_eval("'runtime.h_spans'.array")
bytep = gdb.lookup_type("uint8").pointer()
self.bitmap = gdb.parse_and_eval("'runtime.mheap_'.bitmap").cast(bytep)
def obj(self, base, *args):
if base in self.objs:
return self.objs[base]
obj = Obj(base, *args)
self.objs[base] = obj
return obj
def spanOf(self, b):
if b < self.arena[0] or b >= self.arena[1]:
return None
s = self.hspans[(b - self.arena[0]) >> _PageShift]
if s == 0:
return None
return s
def inheap(self, b):
s = self.spanOf(b)
if s == None or b >= s["limit"] or s["state"] != _MSpanInUse:
return False
return True
# Bitmap-based object
class Obj(object):
def __init__(self, base, len, name, bitmap):
self.base, self.len, self.name, self.bitmap = base, len, name, bitmap
self.marked = False
self.parent = None
def __repr__(self):
return "Obj(%#x, %d, %s, %s)" % (self.base, self.len, self.name, self.bitmap)
def children(self, addrspace):
for word in range(self.len / ptrSize):
if self.bitmap[word]:
val = long(gdb.Value(self.base + word * ptrSize).cast(addrspace.uintptr_p).dereference())
if not addrspace.inheap(val):
continue
s = spanOf(val)
len = long(s["elemsize"])
yield addrspace.obj(val, len, "", HeapBitmap(addrspace, val, len))
class OneBitBitmap(object):
def __init__(self, bitmap, startBit):
self.bitmap, self.startBit = bitmap, startBit
def __getitem__(self, i):
bit = self.startBit + i
return bool(self.bitmap[bit / 8] & (1 << (bit % 8)))
class ConstBitmap(object):
def __init__(self, bits, len):
self.bits, self.len = bits, len
def repeat(self, n):
bits, len = self.bits, self.len
while len <= n * self.len:
bits |= bits << len
len *= 2
len = n * self.len
bits &= (1 << len) - 1
return ConstBitmap(bits, len)
def __getitem__(self, i):
if not (0 <= i < self.len):
raise IndexError()
return bool((self.bits >> i) & 1)
def __repr__(self):
return "ConstBitmap(%#x, %d)" % (self.bits, self.len)
def __str__(self):
return "0b" + bin(self.bits)[2:].rjust(self.len, "0")
class HeapBitmap(object):
def __init__(self, addrspace, base, len):
off = (base - addrspace.arena[0]) / ptrSize
self.bitp = addrspace.bitmap - off / 4 - 1
self.shift = off & 3
self.len = len
def __getitem__(self, i):
if not (0 <= i < self.len):
raise IndexError()
shift = self.shift + i
bitp = self.bitp - shift / 4
shift = shift % 4
return bool((bitp.dereference() >> shift) & 1)
def typeBitmap(typ):
typ = typ.strip_typedefs()
words = typ.sizeof / ptrSize
if typ.code == gdb.TYPE_CODE_PTR:
if words != 1:
raise ValueError("pointer %s has wrong size %d" % (typ, typ.sizeof))
return ConstBitmap(1, 1)
if typ.code == gdb.TYPE_CODE_FUNC:
# This has size 1 for some reason.
return ConstBitmap(1, 1)
if typ.code == gdb.TYPE_CODE_ARRAY:
lo, hi = typ.range()
return typeBitmap(typ.target()).repeat(hi - lo)
bitmap = 0
if typ.code == gdb.TYPE_CODE_STRUCT:
for f in typ.fields():
fbitmap = typeBitmap(f.type)
start = f.bitpos / 8 / ptrSize
bitmap |= fbitmap.bits << start
return ConstBitmap(bitmap, words)
if typ.code in (gdb.TYPE_CODE_INT, gdb.TYPE_CODE_FLT,
gdb.TYPE_CODE_CHAR, gdb.TYPE_CODE_BOOL):
return ConstBitmap(0, words)
raise ValueError("Unhandled type code %d (%s)" % (typ.code, typ))
# Type-based object
#
# Doesn't work because for global roots, we only know the DWARF types,
# but for interfaces, we only know the Go types.
# class Obj(object):
# def __init__(self, val, name):
# self.val, self.name = val, name
# self.marked, self.parent = False, None
# def __repr__(self):
# return "Obj(%r, %r)" % (self.val, self.name)
# def children(self, addrspace):
# children = []
# def rec(val, name):
# typ = val.type.strip_typedefs()
# # TODO: gdb seems confused by our func pointers. It thinks
# # they're 1 byte and dereference just seems to return the
# # same thing.
# if typ.code in (gdb.TYPE_CODE_PTR, gdb.TYPE_CODE_FUNC):
# print typ, self.val.type
# children.append(addrspace.obj(val.dereference(), "*" + name))
# elif typ.code == gdb.TYPE_CODE_ARRAY:
# for i in range(*typ.range()):
# rec(val[i], "%s[%d]" % (name, i))
# elif typ.code == gdb.TYPE_CODE_STRUCT:
# for f in typ.fields():
# rec(val[f.name], "%s.%s" % (name, f.name))
# elif typ.code in (gdb.TYPE_CODE_INT, gdb.TYPE_CODE_FLT,
# gdb.TYPE_CODE_CHAR, gdb.TYPE_CODE_BOOL):
# return
# else:
# raise ValueError("Unhandled type code %d (%s)" % (typ.code, typ))
# rec(self.val, self.name)
# return children
def roots(addrspace):
# Globals
gbitmaps = []
bytep = gdb.lookup_type("uint8").pointer()
for md in iterlist(gdb.parse_and_eval("&'runtime.firstmoduledata'")):
gbitmaps.append((long(md["data"]), long(md["edata"]), md["gcdata"].cast(bytep)))
gbitmaps.append((long(md["bss"]), long(md["ebss"]), md["gcbss"].cast(bytep)))
for sym in gdb.selected_frame().block().global_block:
if not sym.is_variable:
continue
addr = long(sym.value().address)
# Find sym's bitmap
bitmap = None
for sbase, send, sbitmap in gbitmaps:
if sbase <= addr < send:
bitmap = OneBitBitmap(sbitmap, (addr - sbase) / ptrSize)
break
if bitmap is None:
# Could be rodata, noptrdata, etc.
continue
yield addrspace.obj(addr, sym.type.sizeof, sym.name, bitmap)
#yield addrspace.obj(sym.value(), sym.name)
# Stacks of running Gs
tidToThr = {}
for thr in gdb.selected_inferior().threads():
pid, lwpid, tid = thr.ptid
tidToThr[lwpid] = thr
haveGs = set()
for mp in iterlist(gdb.parse_and_eval("'runtime.allm'"), "alllink"):
thr = tidToThr.get(long(mp["procid"]), None)
if thr is None:
continue
thr.switch()
curg = mp["curg"]
if curg == 0:
continue
sp = gdb.parse_and_eval("$sp")
if not (curg["stack"]["lo"] < sp <= curg["stack"]["hi"]):
# We're on the g0 and curg's state is in sched.
continue
for root in stackRoots(addrspace):
yield root
haveGs.add(curg["goid"])
# Stacks of non-running Gs
for g in SliceValue(gdb.parse_and_eval("'runtime.allgs'")):
if g["goid"] in haveGs or g["atomicstatus"] == _Gdead:
continue
if g['syscallsp'] != 0:
sp, pc = g['syscallsp'], g['syscallpc']
else:
sp, pc = g['sched']['sp'], g['sched']['pc']
gdb.execute('set $sp = %#x' % sp)
gdb.execute('set $pc = %#x' % pc)
for root in stackRoots(addrspace):
yield root
# Span specials
for s in heapSpans():
for sp in iterlist(s["specials"]):
if sp["kind"] != _KindSpecialFinalizer:
continue
obj = long(s["startAddr"] + sp["offset"]/s["elemsize"]*s["elemsize"])
yield addrspace.obj(obj, s["elemsize"], "finalized object", HeapBitmap(addrspace, obj, s["elemsize"]))
# TODO: Finalizer function
# TODO: Finalizer queue
def stackRoots(addrspace):
"""Yield the roots from the current stack."""
fr = gdb.newest_frame()
while fr:
block = fr.block()
while block.function != None:
for sym in block:
val = sym.value(fr)
name = "%s[%s]" % (fr.name(), sym.name)
base = long(val.address)
#print hex(base), name
#if addrspace.inheap(base):
yield addrspace.obj(base, sym.type.sizeof, name, typeBitmap(sym.type))
#yield addrspace.obj(val, name)
block = block.superblock
if fr.name() == "runtime.goexit":
# GDB thinks it can backtrace past here, but just gets
# confused.
break
fr = fr.older()
def computeParents():
addrspace = AddrSpace()
q = collections.deque(roots(addrspace))
for obj in q:
obj.marked = True
while len(q):
parent = q.popleft()
for obj in parent.children(addrspace):
if obj.parent == None:
# Record parents for roots to, to help find cycles.
obj.parent = parent
if obj.marked:
continue
obj.marked = True
q.append(obj)
return addrspace
class FindPath(gdb.Command):
"""find-path obj: print the path from a GC root to obj"""
def __init__(self):
super(FindPath, self).__init__("find-path", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
target = gdb.parse_and_eval(arg)
if target.type.code == gdb.TYPE_CODE_PTR:
target = long(target.dereference().address)
else:
target = long(target)
addrspace = computeParents()
o = addrspace.objs.get(target)
while o:
print o
o = o.parent
FindPath()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment