Skip to content

Instantly share code, notes, and snippets.

@aclements
Created November 17, 2015 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aclements/cd6cd1a93732099ff982 to your computer and use it in GitHub Desktop.
Save aclements/cd6cd1a93732099ff982 to your computer and use it in GitHub Desktop.
Symbolic bug finder for golang/go#13143
# PYTHONPATH=/home/austin/r/stp/build/bindings/python
import sys, collections
import sym
_PageSize = 8192
_PageShift = 13
_HeapAllocChunk = 1 << 20
_MaxArena32 = 2 << 30
ptrSize = 4
_PhysPageSize = 4096
PAGE_OFFSET = 0xC0000000 # Linux default 3G/1G split
UserTop = PAGE_OFFSET-1
TASK_SIZE = PAGE_OFFSET - 0x01000000
class H(object):
def __init__(self):
# mallocinit
arenaSize = 128 << 20
bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
spansSize = _MaxArena32 / _PageSize * ptrSize
spansSize = round(spansSize, _PageSize)
firstmoduledata_end = 0x1000
p = round(firstmoduledata_end + (1<<18), 1<<20)
pSize = bitmapSize + spansSize + arenaSize + _PageSize
p, pSize = sym.uintptr(p), sym.uintptr(pSize)
#p = sym.uintptr(0) # <- Quick way to the bug
# XXX Probably don't need arena_reserved
p, self.arena_reserved = sysReserve(p, pSize)
p1 = round(p, _PageSize)
self.arena_start = sym.uintptr(p1 + spansSize + bitmapSize)
self.arena_used = self.arena_start
self.arena_end = sym.uintptr(p + pSize)
self.spans_len = spansSize / ptrSize
def print_(self):
sym.print_("h.arena_start=%s h.arena_used=%s h.arena_end=%s",
self.arena_start, self.arena_used, self.arena_end)
def round(n, a):
return (n + (a - 1)) & ~sym.uintptr(a - 1)
def mHeap_Grow(npage):
sym.print_("mHeap_Grow %s", npage)
npage = round(npage, (64<<10)/_PageSize)
sym.print_("mHeap_Grow rounded %s", npage)
ask = npage << _PageShift
if ask < _HeapAllocChunk:
ask = _HeapAllocChunk
v = mHeap_SysAlloc(ask)
if v == 0:
if ask > npage<<_PageShift:
ask = npage << _PageShift
v = mHeap_SysAlloc(ask)
if v == 0:
print "out of memory"
return False
s_start, s_npages = v>>_PageShift, ask>>_PageShift
p = s_start
p -= h.arena_start >> _PageShift
min_i = p
max_i = p+s_npages-1
sym.print_("min_i=%s max_i=%s", min_i, max_i)
assert min_i >= 0
assert min_i < h.spans_len
assert max_i >= 0
assert max_i < h.spans_len
return True
def mHeap_SysAlloc(n):
# n uintptr
# returns uintptr
sym.print_("mHeap_SysAlloc %s", n)
if n > h.arena_end - h.arena_used:
sym.print_("mHeap_SysAlloc overflow arena %d", h.arena_end - h.arena_used)
p_size = round(n + _PageSize, 256<<20)
new_end = h.arena_end + p_size # BUG: This can wrap around
if False and new_end < h.arena_end: # NEW NEW NEW
print "out of memory (large allocation 2)"
return 0
#if new_end <= h.arena_start + _MaxArena32:
# NEW NEW NEW
if h.arena_end <= new_end and new_end <= h.arena_start + _MaxArena32:
sym.print_("mHeap_SysAlloc has space below _MaxArena32 %s <= %s", new_end, h.arena_start + _MaxArena32)
# BUG: sysReserve calls mmap *without* MAP_FIXED, so mmap
# can return an address *below* arena_start. Then we'll
# take the second path below (p+p_size <= ...), then we'll
# take the "has space" path below, return p, and then wrap
# around backwards when trying to compute the first index
# and use a huge index.
p, reserved = sysReserve(h.arena_end, p_size)
if p == 0:
return 0
if p == h.arena_end:
h.arena_end = new_end
h.arena_reserved = reserved
#elif p+p_size <= h.arena_start+_MaxArena32:
# NEW NEW NEW
elif h.arena_start <= p and p+p_size <= h.arena_start+_MaxArena32:
h.arena_end = p + p_size
used = p + (-p & (_PageSize - 1))
h.arena_used = used
h.arena_reserved = reserved
else:
sysFree(p, p_size)
if n <= h.arena_end - h.arena_used:
sym.print_("mHeap_SysAlloc has space")
p = h.arena_used
sysMap(p, n, h.arena_reserved)
h.arena_used = p + n
return p
sym.print_("mHeap_SysAlloc no space; sysAlloc %s", p_size)
p_size = round(n, _PageSize) + _PageSize
p = sysAlloc(p_size)
if p == 0:
return 0
if p < h.arena_start or p+p_size-h.arena_start >= _MaxArena32:
print "runtime: memory allocated by OS not in usable range"
sysFree(p, p_size)
return 0
p_end = p + p_size
p += (0-p) & (_PageSize - 1)
if p + n > h.arena_used:
h.arena_used = p + n
if p_end > h.arena_end:
h.arena_end = p_end
return p
def largeAloc(size):
# size uintptr
if size == 0:
return 0
if size + _PageSize < size:
print "out of memory (large allocation)"
return 0
npages = size >> _PageShift
if size & (_PageSize - 1) != 0:
npages += 1
return mHeap_Grow(npages)
def sysReserve(v, n):
p = mmap(v, n, False) # _PROT_NONE, _MAP_ANON|_MAP_PRIVATE
return p, True
def sysMap(v, n, reserved):
p = mmap(v, n, True) # _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE
if p == 0:
print "sysMap: out of memory"
sys.exit(0)
if p != v:
print "sysMap: cannot map pages in arena address space"
sys.exit(0)
def sysAlloc(n):
return mmap(0, n, False) # _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE
def sysFree(v, n):
munmap(v, n)
#
# Linux implementation
#
vmas = []
def find_vma_ends_after(addr):
for i, (start, end) in enumerate(vmas):
if end > addr:
return i
return len(vmas)
def remove_vma(start, end):
i = 0
while i < len(vmas):
vmaStart, vmaEnd = vmas[i]
if vmaStart < start and end < vmaEnd:
# Split this region
vmas.insert(i + 1, (end, vmaEnd))
assert end != 0
vmaEnd = start
elif start <= vmaStart and vmaEnd <= end:
# Remove region
vmas.pop(i)
continue
elif start <= vmaStart and end > vmaStart:
# Left truncate
vmaStart = end
elif start < vmaEnd and end >= vmaEnd:
# Right truncate
vmaEnd = start
vmas[i] = (vmaStart, vmaEnd)
assert vmaStart != 0
i += 1
def check_vmas():
return
lastEnd = 0
for (start, end) in vmas:
assert lastEnd <= start
assert start < end
assert end <= PAGE_OFFSET
lastEnd = end
def munmap(addr, n):
assert addr & (_PhysPageSize - 1) == 0
n = round(n, _PhysPageSize)
remove_vma(addr, addr + n)
def mmap(addr, n, fixed): # do_mmap_pgoff
sym.print_("mmap %s %s %s", addr, n, fixed)
if n == 0:
return 0 # EINVAL
n = round(n, _PhysPageSize)
if n == 0:
return 0 # ENOMEM
addr = get_unmapped_area(addr, n, fixed)
if addr == 0:
return 0 # error
remove_vma(addr, addr + n)
assert addr != 0
vmas.insert(find_vma_ends_after(addr), (addr, addr + n))
check_vmas()
return addr
def get_unmapped_area(addr, n, fixed):
if n > TASK_SIZE:
return 0 # ENOMEM
addr = arch_get_unmapped_area(addr, n, fixed)
if addr == 0:
return 0 # error
sym.print_("addr=%s n=%s TASK_SIZE=%s TASK_SIZE-n=%s", addr, n, TASK_SIZE, TASK_SIZE - n)
if addr > TASK_SIZE - n:
return 0 # ENOMEM
if addr & (_PhysPageSize - 1) != 0:
return 0 # EINVAL
return addr
def arch_get_unmapped_area(addr, n, fixed):
if fixed:
return addr
if n > TASK_SIZE:
return 0 # ENOMEM
# Linux uses the following to satisfy the request for addr if it
# can. However, returning an arbitrary unmapped region strictly
# generalizes this, so in order to cut down on paths, we only do
# that.
# if addr != 0:
# addr = round(addr, _PhysPageSize)
# if TASK_SIZE - n >= addr:
# vmai = find_vma_ends_after(addr)
# if vmai == len(vmas) or addr + n <= vmas[vmai][0]:
# # Non-fixed mmap is allowed to return addr, but also
# # allowed to return something else entirely. Do both.
# if sym.fork():
# return addr
return unmapped_area(n)
def unmapped_area(n):
lastEnd = _PhysPageSize # First page off limits
addr = sym.uintptr("unmapped_addr")
anyRoom = False
for (start, end) in vmas:
if start - lastEnd >= n:
anyRoom = True
if sym.fork():
sym.assume(addr + n <= start)
break
lastEnd = end
else:
# After last VMA
if PAGE_OFFSET - lastEnd < n:
# There isn't enough room after the last VMA.
if anyRoom:
# There was room earlier and we just didn't take it.
# Other branches took those spaces, so just kill off
# this branch.
sys.exit(0)
return 0
sym.assume(addr + n < PAGE_OFFSET)
sym.assume(addr >= lastEnd)
sym.assume(addr & (_PhysPageSize - 1) == 0)
sym.print_("picked base %s for %s byte alloc", addr, n)
return addr
h = H()
def run():
h.print_()
try:
# Do enough symbolic allocations to cover all code paths in a
# single trace.
alloc1, alloc2, alloc3 \
= sym.uintptr("alloc1"), sym.uintptr("alloc2"), sym.uintptr("alloc3")
if largeAloc(alloc1) == 0:
sym.print_("alloc1 failed")
h.print_()
# if largeAloc(alloc2) == 0:
# print "alloc2 failed"
# return
# h.print_()
# if largeAloc(alloc3) == 0:
# print "alloc3 failed"
# return
# h.print_()
if largeAloc(0x8 * _PageSize) == 0:
print "final alloc failed"
return
check_vmas()
print "success", [(sym.get(s), sym.get(e)) for s, e in vmas]
# print "success", [(sym.exprString(s), sym.exprString(e)) for s, e in vmas]
except AssertionError:
sym.print_("alloc=[%s,%s,%s]", alloc1, alloc2, alloc3)
h.print_()
sym.show_prints()
raise
run()
import os, sys
import stp
# The stp bindings are rather incomplete.
from stp.stp import _lib as _stplib
s = stp.Solver()
_id = 0
prints = []
def namegen(name):
global _id
i = _id
_id += 1
return "%s_%d" % (name, i)
def uintptr(name):
if isinstance(name, stp.Expr):
return name
if isinstance(name, basestring):
return s.bitvec(namegen(name), 32)
if isinstance(name, (int, long)):
return s.bitvecval(32, name)
raise TypeError("don't know how to uintptr %r" % name)
uintptrType = _stplib.vc_bvType(s.vc, 32)
uintptrArrayType = _stplib.vc_arrayType(s.vc, uintptrType, uintptrType)
def uintptrArray(name):
if isinstance(name, UintptrArray):
return name
if isinstance(name, basestring):
se = _stplib.vc_varExpr(s.vc, namegen(name), uintptrArrayType)
return UintptrArray(s, se)
raise TypeError("don't know how to uintptrArray %r" % name)
class UintptrArray(object):
def __init__(self, s, expr):
self.s = s
self.expr = expr
def __toExpr(self, x, width):
if isinstance(x, (int, long)):
return self.s.bitvecval(width, x)
return x
def __getitem__(self, idx):
idx = self.__toExpr(idx, 32)
return stp.Expr(self.s, 32, _stplib.vc_readExpr(self.s.vc, self.expr, idx.expr))
def write(self, idx, val):
idx = self.__toExpr(idx, 32)
val = self.__toExpr(val, 32)
return UintptrArray(self.s, _stplib.vc_writeExpr(self.s.vc, self.expr, idx.expr, val.expr))
def assume(expr):
s.add(expr)
def print_(fmt, *exprs):
prints.append((fmt, exprs))
def fork():
pid = os.fork()
if pid == 0:
return True
else:
_, status = os.waitpid(pid, 0)
if status != 0:
if os.WIFEXITED(status):
sys.exit(os.WEXITSTATUS(status))
print "exit status:", status
sys.exit(1)
return False
def _branch(expr):
canTrue, canFalse = s.check(expr), s.check(s.not_(expr))
if canTrue and canFalse:
if fork():
s.add(expr)
return True
else:
s.add(s.not_(expr))
return False
elif canTrue:
return True
elif canFalse:
return False
else:
raise RuntimeError("%s is neither satisfiable nor unsatisfiable" % expr)
stp.Expr.__nonzero__ = _branch
def get(expr):
if isinstance(expr, stp.Expr):
# XXX vc_getCounterExample is flaky. Sometimes it just returns
# constant zeros when given a full expression. Simplifying the
# expression first helps.
expr2 = _stplib.vc_simplify(s.vc, expr.expr)
value = _stplib.vc_getCounterExample(s.vc, expr2)
return _stplib.getBVUnsignedLongLong(value)
return expr
def show_prints():
# Force there to be a current counterexample.
s.push()
_stplib.vc_query(s.vc, s.false().expr)
for fmt, exprs in prints:
print fmt % tuple(map(get, exprs))
s.pop()
def exprString(expr):
return _stplib.exprString(expr.expr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment