Skip to content

Instantly share code, notes, and snippets.

@pgoodman

pgoodman/cgc.py Secret

Created December 19, 2018 02:13
Show Gist options
  • Save pgoodman/f1eb082936d433d32c65a2596bce7c12 to your computer and use it in GitHub Desktop.
Save pgoodman/f1eb082936d433d32c65a2596bce7c12 to your computer and use it in GitHub Desktop.
# Copyright (c) 2013, Felipe Andres Manzano
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice,this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import sys
import time
import os
import tempfile
import copy
#import pickle
import cPickle as pickle
import random
import logging
import traceback
import signal
import argparse
import collections
import math
from cpu import Cpu, ConcretizeRegister, ConcretizeMemory, InvalidPCException, SymbolicPCException
from memory import MemoryWriteException
from decree import SDecree, ProcessExit, Deadlock
from smtlib import Symbol, Solver, BitVec, Array, Bool, Operators, SolverException
from visitor import StopExecutionException, StopAllExecutionException, EventVisitor
from coverage import EmptyCoverage
logger = logging.getLogger("EXECUTOR")
MAX_ICOUNT = 10000
STATE_COUNT = 0
START_TIME = int(time.time())
def next_state_id():
global STATE_COUNT
ni = STATE_COUNT
STATE_COUNT += 1
return ni
class State(object):
def __init__(self, programs, event_visitor):
logger.info("Loading CGC program %s", programs)
self.solver = Solver()
self.os = SDecree(self.solver, programs, event_visitor)
self.co = next_state_id()
self.visited = collections.defaultdict(int)
self.event_visitor = event_visitor
self.num_symbolic_branches = 0
self.icount = 0
event_visitor.init(self)
def __getstate__(self):
return self.__dict__
def __setstate__(self, d):
self.__dict__.update(d)
@property
def current(self):
return self.os.current
def processes(self):
return self.os.procs
@property
def mem(self):
return self.os.current.mem
def execute(self, execute_visitor):
return self.os.execute(execute_visitor)
def can_be_equal(self, A, B):
solver = self.solver
assert len(A) == len(B)
solver.push()
for i in range(len(A)):
solver.add(A[i] == B[i])
can_be_equal = 'sat' == solver.check()
solver.pop()
return can_be_equal
def can_be_different(self, A, B):
solver = self.solver
assert len(A) == len(B) and len(A)>0
solver.push()
cond = A[0] != B[0]
for i in range(1,len(A)):
cond = Operators.OR(cond, A[i] != B[i])
solver.add(cond)
can_be_different = 'sat' == solver.check()
solver.pop()
return can_be_different
def _try_simplify_eq(self, sym, val):
self.solver.push()
try:
self.solver.add(sym == val)
ret = self.solver.check() == 'sat'
finally:
self.solver.pop()
return ret
def _try_simplify(self):
#Try to do some symplifications to shrink symbolic footprint
#logger.info("Trying to simplify state {} before fork".format(self.co))
try:
bvals = []
for reg_name in ['ZF', 'CF', 'OF', 'SF']:
reg = self.current.read_register(reg_name)
if not isinstance(reg, Symbol):
continue
can_be_true = self._try_simplify_eq(reg, True)
can_be_false = self._try_simplify_eq(reg, False)
if can_be_true and not can_be_false:
bvals.append((reg_name, reg, True))
elif not can_be_true and can_be_false:
bvals.append((reg_name, reg, False))
# Concretize the flags.
for reg_name, reg, val in bvals:
self.current.write_register(reg_name, val)
self.solver.add(reg == val)
except Exception,e:
logger.debug("Exception trying to simplify %r",e)
def concretize(self, symbolic, policy):
if not isinstance(symbolic, Symbol):
return [symbolic]
vals = set()
if policy == 'MIN':
vals.add(self.solver.min(symbolic))
elif policy == 'MAX':
vals.add(self.solver.max(symbolic))
elif policy == 'MINMAX':
vals.update(self.solver.minmax(symbolic))
elif policy == 'SAMPLED' or policy == 'PAGE_SAMPLED' or policy == 'POSITIVE_SAMPLED':
m, M = self.solver.minmax(symbolic)
vals.add(m)
vals.add(M)
if M-m > 3:
if self.solver.can_be(symbolic, (m+M)/2):
vals.add((m+M)/2)
if M-m > 100:
vals.update(self.solver.getallvalues(symbolic, maxcnt=5))
if 'PAGE_SAMPLED' == policy:
page_vals = set()
new_vals = set()
vals = list(vals)
vals.sort(reverse=True)
for val in vals:
page = val & ~4095
if page in page_vals:
continue
page_vals.add(page)
new_vals.add(val)
vals = new_vals
elif 'POSITIVE_SAMPLED' == policy:
if 0 in vals:
vals.remove(0)
else:
assert policy == 'ALL'
vals.update(self.solver.getallvalues(symbolic, maxcnt=100))
return list(vals)
class MaxConsecutiveIntructions(Exception):
pass
def null_transition(state):
return state
class Executor(object):
def __init__(self, programs, workspace, event_visitor, coverage=None):
assert isinstance(event_visitor, EventVisitor)
self.programs = programs
self.coverage = coverage or EmptyCoverage()
#Make working directory
self.workspace = workspace
self.error_states = [] # First priority; crashes.
self.recovered_states = [] # Second priority; memory error recovery.
self.new_coverage_states = [] # Third priority; new global code coverage.
self.states = [] # Third priority; normal states.
self.test_number = 0
self.visited = collections.defaultdict(int)
meg = 1024 * 1024
gig = 1024 * meg
self.max_storage = 80 * gig
self.total_used_storage = collections.defaultdict(int)
self.state_storage_size = {}
self.current_state = State(self.programs, event_visitor)
def _state_filename(self, co):
return os.path.join(self.workspace, 'state_{}'.format(co))
def _do_save_state(self, state, co):
file_size = 0
source_name = self._state_filename(state.co)
# No state file, or stale state file; (re)create it.
if not os.path.exists(source_name) or int(os.path.getctime(source_name)) < START_TIME:
if 'sat' != state.solver.check():
raise SolverException("Can't sat before fork.")
state.event_visitor.visit_exe_fork()
with open(source_name, 'wb') as f:
pickle.dump(state, f, pickle.HIGHEST_PROTOCOL)
file_size = f.tell()
self.state_storage_size[state.co] = file_size
self.total_used_storage[file_size] += 1
else:
file_size = self.state_storage_size[state.co]
dest_name = self._state_filename(co)
if os.path.exists(dest_name):
os.remove(dest_name)
os.link(source_name, dest_name)
self.total_used_storage[file_size] += 1
self.state_storage_size[co] = file_size
return file_size, dest_name
def save_state(self, state, next_pc=None, transition_func=null_transition):
if not self._can_store_more():
logger.info("Maximum state storage budget exceeded. Ignoring state.")
return False
cpu = state.current
is_covered = True
branch_info = (0, 0, 0)
if next_pc is None:
next_pc = cpu.read_register('PC')
assert not isinstance(next_pc, Symbol)
else:
state.num_symbolic_branches += 1
last_branch_block_pc = state.concretize(cpu.branch_info[0], 'ALL')[0]
block_pc_of_branch = state.concretize(cpu.branch_info[1], 'ALL')[0]
branch_target_block_pc = next_pc
cpu.branch_info = (last_branch_block_pc, block_pc_of_branch, cpu.branch_info[2])
branch_info = (last_branch_block_pc, block_pc_of_branch, branch_target_block_pc)
logger.info("Saving state for path {:08x} -> {:08x} -> {:08x}".format(*branch_info))
is_covered = self.coverage.is_covered(*branch_info)
try:
co = next_state_id()
file_size, file_name = self._do_save_state(state, co)
except SolverException:
logger.info("Can't sat before fork.")
return True
except StopExecutionException:
return True
logger.info("Saved state {} (derived from {}) to file {}".format(co, state.co, file_name))
num_recoveries = 0
for process in state.processes():
if process:
num_recoveries += process.mem.num_recoveries
new_state = {
'num_received_bytes' : state.os.num_received_bytes,
'num_transmitted_bytes': state.os.num_transmitted_bytes,
'num_symbolic_branches': state.num_symbolic_branches,
'num_syscalls': state.os.num_syscalls,
'num_instructions': sum(state.visited.values()),
'num_unique_instructions': len(state.visited),
'file_size': file_size,
'co': co,
'source_co': state.co,
'transition_func': transition_func,
'next_pc': next_pc,
'num_recoveries': num_recoveries,
'event_score': state.event_visitor.visit_exe_score(),
}
if not state.current.mem.is_executable(next_pc):
self.error_states.append(new_state)
elif num_recoveries:
self.recovered_states.append(new_state)
elif not is_covered:
logger.info("!!! Found new coverage state {:08x} -> {:08x} -> {:08x}".format(
*branch_info))
self.new_coverage_states.append(new_state)
else:
self.states.append(new_state)
#logger.info("Adding state %s to processing list. State list size: %d", co, len(self.states))
return True
def _make_reg_transition(self, reg_name, symbol, val):
def transition(state):
logger.debug("Concretizing {} = 0x{:02x} in state {}".format(reg_name, val, state.co))
state.current.write_register(reg_name, val)
if not state.event_visitor.visit_exe_assert_reg(reg_name, symbol, val):
raise SolverException("Cannot concretize {} = {:02x} at PC {:08x}".format(
reg_name, val, state.current.read_register('PC')))
return state
return transition
MEMORY_SIZE_NAME = {8: "BYTE", 16: "WORD", 32: "DWORD", 64: "QWORD", 128: "XMMWORD", 256: "YMMWORD"}
def _make_mem_transition(self, addr, size, symbol, val):
def transition(state):
word = Executor.MEMORY_SIZE_NAME[size]
logger.debug("Concretizing {} PTR [{:08x}] = 0x{:02x} in state {}".format(word, addr, val, state.co))
state.solver.add(symbol == val)
state.current.store(addr, val, size)
if 'sat' != state.solver.check():
raise SolverException("Cannot concretize {} PTR [{:08x}] = {:02x} at PC {:08x}".format(
word, addr, val, state.current.read_register('PC')))
return state
return transition
def _score_state(self, state):
return (state['event_score'],
state['num_recoveries'],
int(state['next_pc'] not in self.visited),
int(state['num_received_bytes'] < 25),
int(state['num_transmitted_bytes'] < 25),
state['num_received_bytes'] / (state['num_transmitted_bytes']+1.0),
state['num_unique_instructions'] / (state['num_instructions']+1.0))
STATE_SELECTOR = 0
def load_saved_state(self):
logger.info("Selecting a new state; processing list size is {} / {} / {} / {}".format(
len(self.error_states), len(self.recovered_states),
len(self.new_coverage_states), len(self.states)))
# Static prioritization.
state_list = self.error_states or \
self.recovered_states or \
self.new_coverage_states or \
self.states
if not len(state_list):
return
# Dynamic prioritization
state_list.sort(key=self._score_state)
possible_states = []
indexes = range(len(state_list))
possible_states = list(set(indexes[-5:] + indexes[:5]))
random.shuffle(possible_states)
state_index = possible_states[0]
info = state_list.pop(state_index)
# possible_states.sort()
# state_index = Executor.STATE_SELECTOR % len(possible_states)
# info = state_list.pop(possible_states[state_index])
# Executor.STATE_SELECTOR += 1
# random.shuffle(possible_states)
#info = state_list.pop(possible_states.pop())
# Garbage collect the old state file if it still exists.
file_name = self._state_filename(info['source_co'])
if os.path.exists(file_name):
os.remove(file_name) # TODO(pag): Change later to allow time-travelling.
self.total_used_storage[info['file_size']] -= 1
del self.state_storage_size[info['source_co']]
# Deserialize the old state.
file_name = self._state_filename(info['co'])
with file(file_name, 'rb') as f:
source_state = pickle.load(f)
os.remove(file_name)
self.total_used_storage[info['file_size']] -= 1
# Convert the old state into a new state.
source_state.co = info['co'] # rename.
try:
if 'sat' != source_state.solver.check():
raise SolverException("Can't sat after resume at PC {:08x}".format(
source_state.current.read_register('PC')))
dest_state = info['transition_func'](source_state)
except StopExecutionException:
return None
except:
logger.error("Unable to transition state {} to {}: {}".format(
info['source_co'], info['co'], traceback.format_exc()))
return None
logger.info("Selected state: %s", dest_state.co)
# Try to add new flag constraints given the application of the state
# transition function.
dest_state._try_simplify()
dest_state.event_visitor.visit_exe_resume()
return dest_state
# Figure out if we have the storage budget to save another state file.
def _can_store_more(self):
total = 0
for size, yes in self.total_used_storage.items():
if yes:
total += size
return total < self.max_storage
def _visit_execute(self, cpu):
pc = cpu.read_register('PC')
self.current_state.visited[pc] += 1
self.visited[pc] += 1
self.current_state.event_visitor.visit_cpu_execute(cpu)
def _num_pending_states(self):
return len(self.error_states) + \
len(self.recovered_states) + \
len(self.new_coverage_states) + \
len(self.states)
def _try_execute_instructions(self):
if self.current_state is None:
self.current_state = self.load_saved_state()
while self.current_state:
self.current_state.execute(self._visit_execute)
self.current_state.icount += 1
PC = self.current_state.current.read_register('PC')
if self.current_state.icount >= MAX_ICOUNT \
and isinstance(PC, (int, long)):
self.current_state.icount = 0
raise MaxConsecutiveIntructions()
def _concretize_register(self, reg_name, policy):
set_next_pc = reg_name in ('PC', 'EIP', 'RIP')
current_state = self.current_state
self.current_state = None
symbol = current_state.current.read_register(reg_name)
get_vals = lambda: current_state.concretize(symbol, policy)
try:
vals = current_state.event_visitor.visit_exe_concretize_reg(
reg_name, symbol, get_vals)
except StopExecutionException:
logger.info("Not concretizing register {} with policy {}".format(
reg_name, policy))
return
if 1 == len(vals) and not len(self.states):
transition_func = self._make_reg_transition(
reg_name, symbol, vals[0])
self.current_state = transition_func(current_state)
else:
for val in vals:
next_pc = None
if set_next_pc:
next_pc = val
self.save_state(
current_state,
next_pc=next_pc,
transition_func=self._make_reg_transition(
reg_name, symbol, val))
def _concretize_memory(self, address, size, policy):
current_state = self.current_state
self.current_state = None
symbol = current_state.current.load(address, size)
get_vals = lambda: current_state.concretize(symbol, policy)
try:
vals = current_state.event_visitor.visit_exe_concretize_mem(
address, size, symbol, get_vals)
except StopExecutionException:
logger.info("Not concretizing memory {:08x} with policy {}".format(
address, policy))
return
if 1 == len(vals) and not len(self.states):
transition_func = self._make_mem_transition(
address, size, symbol, vals[0])
self.current_state = transition_func(current_state)
else:
for val in vals:
self.save_state(
current_state,
transition_func=self._make_mem_transition(
address, size, symbol, val))
def _segmentation_fault(self, addr_list):
current_state = self.current_state
self.current_state = None
cpu = current_state.current
logger.error("Segmentation fault at PC {:08x}".format(
cpu.read_register('PC')))
try:
current_state.event_visitor.visit_exe_error(
mem_write=addr_list)
except StopExecutionException:
pass
def _invalid_instruction(self, pc):
logger.error("Executing bad instruction at PC {:08x}".format(pc))
current_state = self.current_state
self.current_state = None
try:
current_state.event_visitor.visit_exe_error(pc=pc)
except StopExecutionException:
pass
def _run(self):
global MAX_ICOUNT
logger.info("Starting cgc-symbolic-emulator main loop.")
while self.current_state or self._num_pending_states():
try:
self._try_execute_instructions()
except ConcretizeRegister as e:
self._concretize_register(e.reg_name, e.policy)
except ConcretizeMemory, e:
logger.info(str(e))
self._concretize_memory(e.address, e.size, e.policy)
except MemoryWriteException as e:
self._segmentation_fault(e.addr_list)
except InvalidPCException as e:
self._invalid_instruction(e.pc)
except ProcessExit:
logger.info("Exiting process.")
current_state = self.current_state
self.current_state = None
try:
current_state.event_visitor.visit_exe_exit()
except StopExecutionException:
pass
except Deadlock:
logger.error("Deadlocked.")
current_state = self.current_state
self.current_state = None
try:
current_state.event_visitor.visit_exe_error()
except StopExecutionException:
pass
except SolverException:
logger.info("Solver exception in state {}: {}".format(
self.current_state.co, traceback.format_exc()))
self.current_state = None
except MaxConsecutiveIntructions:
logger.info("Executed {} instructions in state {}".format(
MAX_ICOUNT, self.current_state.co))
if len(self.states):
self.save_state(self.current_state)
self.current_state = None
else:
self.current_state.event_visitor.visit_exe_fork()
self.current_state.event_visitor.visit_exe_resume()
except StopExecutionException as e:
logger.info("Stopping current state execution.")
current_state = self.current_state
self.current_state = None
if e.report:
try:
current_state.event_visitor.visit_exe_stop()
except StopExecutionException:
pass
except StopAllExecutionException:
logger.info("Stopping ALL state executions.")
return
except Exception, e:
logger.error("THIS SHOULD NOT REACHABLE! Exception in user code: {}\n{}".format(
str(e), traceback.format_exc()) )
current_state = self.current_state
self.current_state = None
if current_state:
try:
current_state.event_visitor.visit_exe_error()
except StopExecutionException:
pass
def run(self):
while True:
try:
self._run()
logger.info("Returning from run loop.")
except StopExecutionException:
logger.info("Stopping execution of current state: {}".format(
traceback.format_exc()))
self.current_state = None
except StopAllExecutionException:
logger.info("Stopping ALL execution.")
return
except Exception as e:
logger.error("Caught unknown exception {}: {}".format(
e, traceback.format_exc()))
raise
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment