-
-
Save pgoodman/f1eb082936d433d32c65a2596bce7c12 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2013, Felipe Andres Manzano | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are met: | |
# | |
# * Redistributions of source code must retain the above copyright notice, | |
# this list of conditions and the following disclaimer. | |
# * Redistributions in binary form must reproduce the above copyright | |
# notice,this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the distribution. | |
# * Neither the name of the copyright holder nor the names of its | |
# contributors may be used to endorse or promote products derived from | |
# this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
# POSSIBILITY OF SUCH DAMAGE. | |
import sys | |
import time | |
import os | |
import tempfile | |
import copy | |
#import pickle | |
import cPickle as pickle | |
import random | |
import logging | |
import traceback | |
import signal | |
import argparse | |
import collections | |
import math | |
from cpu import Cpu, ConcretizeRegister, ConcretizeMemory, InvalidPCException, SymbolicPCException | |
from memory import MemoryWriteException | |
from decree import SDecree, ProcessExit, Deadlock | |
from smtlib import Symbol, Solver, BitVec, Array, Bool, Operators, SolverException | |
from visitor import StopExecutionException, StopAllExecutionException, EventVisitor | |
from coverage import EmptyCoverage | |
logger = logging.getLogger("EXECUTOR") | |
MAX_ICOUNT = 10000 | |
STATE_COUNT = 0 | |
START_TIME = int(time.time()) | |
def next_state_id(): | |
global STATE_COUNT | |
ni = STATE_COUNT | |
STATE_COUNT += 1 | |
return ni | |
class State(object): | |
def __init__(self, programs, event_visitor): | |
logger.info("Loading CGC program %s", programs) | |
self.solver = Solver() | |
self.os = SDecree(self.solver, programs, event_visitor) | |
self.co = next_state_id() | |
self.visited = collections.defaultdict(int) | |
self.event_visitor = event_visitor | |
self.num_symbolic_branches = 0 | |
self.icount = 0 | |
event_visitor.init(self) | |
def __getstate__(self): | |
return self.__dict__ | |
def __setstate__(self, d): | |
self.__dict__.update(d) | |
@property | |
def current(self): | |
return self.os.current | |
def processes(self): | |
return self.os.procs | |
@property | |
def mem(self): | |
return self.os.current.mem | |
def execute(self, execute_visitor): | |
return self.os.execute(execute_visitor) | |
def can_be_equal(self, A, B): | |
solver = self.solver | |
assert len(A) == len(B) | |
solver.push() | |
for i in range(len(A)): | |
solver.add(A[i] == B[i]) | |
can_be_equal = 'sat' == solver.check() | |
solver.pop() | |
return can_be_equal | |
def can_be_different(self, A, B): | |
solver = self.solver | |
assert len(A) == len(B) and len(A)>0 | |
solver.push() | |
cond = A[0] != B[0] | |
for i in range(1,len(A)): | |
cond = Operators.OR(cond, A[i] != B[i]) | |
solver.add(cond) | |
can_be_different = 'sat' == solver.check() | |
solver.pop() | |
return can_be_different | |
def _try_simplify_eq(self, sym, val): | |
self.solver.push() | |
try: | |
self.solver.add(sym == val) | |
ret = self.solver.check() == 'sat' | |
finally: | |
self.solver.pop() | |
return ret | |
def _try_simplify(self): | |
#Try to do some symplifications to shrink symbolic footprint | |
#logger.info("Trying to simplify state {} before fork".format(self.co)) | |
try: | |
bvals = [] | |
for reg_name in ['ZF', 'CF', 'OF', 'SF']: | |
reg = self.current.read_register(reg_name) | |
if not isinstance(reg, Symbol): | |
continue | |
can_be_true = self._try_simplify_eq(reg, True) | |
can_be_false = self._try_simplify_eq(reg, False) | |
if can_be_true and not can_be_false: | |
bvals.append((reg_name, reg, True)) | |
elif not can_be_true and can_be_false: | |
bvals.append((reg_name, reg, False)) | |
# Concretize the flags. | |
for reg_name, reg, val in bvals: | |
self.current.write_register(reg_name, val) | |
self.solver.add(reg == val) | |
except Exception,e: | |
logger.debug("Exception trying to simplify %r",e) | |
def concretize(self, symbolic, policy): | |
if not isinstance(symbolic, Symbol): | |
return [symbolic] | |
vals = set() | |
if policy == 'MIN': | |
vals.add(self.solver.min(symbolic)) | |
elif policy == 'MAX': | |
vals.add(self.solver.max(symbolic)) | |
elif policy == 'MINMAX': | |
vals.update(self.solver.minmax(symbolic)) | |
elif policy == 'SAMPLED' or policy == 'PAGE_SAMPLED' or policy == 'POSITIVE_SAMPLED': | |
m, M = self.solver.minmax(symbolic) | |
vals.add(m) | |
vals.add(M) | |
if M-m > 3: | |
if self.solver.can_be(symbolic, (m+M)/2): | |
vals.add((m+M)/2) | |
if M-m > 100: | |
vals.update(self.solver.getallvalues(symbolic, maxcnt=5)) | |
if 'PAGE_SAMPLED' == policy: | |
page_vals = set() | |
new_vals = set() | |
vals = list(vals) | |
vals.sort(reverse=True) | |
for val in vals: | |
page = val & ~4095 | |
if page in page_vals: | |
continue | |
page_vals.add(page) | |
new_vals.add(val) | |
vals = new_vals | |
elif 'POSITIVE_SAMPLED' == policy: | |
if 0 in vals: | |
vals.remove(0) | |
else: | |
assert policy == 'ALL' | |
vals.update(self.solver.getallvalues(symbolic, maxcnt=100)) | |
return list(vals) | |
class MaxConsecutiveIntructions(Exception): | |
pass | |
def null_transition(state): | |
return state | |
class Executor(object): | |
def __init__(self, programs, workspace, event_visitor, coverage=None): | |
assert isinstance(event_visitor, EventVisitor) | |
self.programs = programs | |
self.coverage = coverage or EmptyCoverage() | |
#Make working directory | |
self.workspace = workspace | |
self.error_states = [] # First priority; crashes. | |
self.recovered_states = [] # Second priority; memory error recovery. | |
self.new_coverage_states = [] # Third priority; new global code coverage. | |
self.states = [] # Third priority; normal states. | |
self.test_number = 0 | |
self.visited = collections.defaultdict(int) | |
meg = 1024 * 1024 | |
gig = 1024 * meg | |
self.max_storage = 80 * gig | |
self.total_used_storage = collections.defaultdict(int) | |
self.state_storage_size = {} | |
self.current_state = State(self.programs, event_visitor) | |
def _state_filename(self, co): | |
return os.path.join(self.workspace, 'state_{}'.format(co)) | |
def _do_save_state(self, state, co): | |
file_size = 0 | |
source_name = self._state_filename(state.co) | |
# No state file, or stale state file; (re)create it. | |
if not os.path.exists(source_name) or int(os.path.getctime(source_name)) < START_TIME: | |
if 'sat' != state.solver.check(): | |
raise SolverException("Can't sat before fork.") | |
state.event_visitor.visit_exe_fork() | |
with open(source_name, 'wb') as f: | |
pickle.dump(state, f, pickle.HIGHEST_PROTOCOL) | |
file_size = f.tell() | |
self.state_storage_size[state.co] = file_size | |
self.total_used_storage[file_size] += 1 | |
else: | |
file_size = self.state_storage_size[state.co] | |
dest_name = self._state_filename(co) | |
if os.path.exists(dest_name): | |
os.remove(dest_name) | |
os.link(source_name, dest_name) | |
self.total_used_storage[file_size] += 1 | |
self.state_storage_size[co] = file_size | |
return file_size, dest_name | |
def save_state(self, state, next_pc=None, transition_func=null_transition): | |
if not self._can_store_more(): | |
logger.info("Maximum state storage budget exceeded. Ignoring state.") | |
return False | |
cpu = state.current | |
is_covered = True | |
branch_info = (0, 0, 0) | |
if next_pc is None: | |
next_pc = cpu.read_register('PC') | |
assert not isinstance(next_pc, Symbol) | |
else: | |
state.num_symbolic_branches += 1 | |
last_branch_block_pc = state.concretize(cpu.branch_info[0], 'ALL')[0] | |
block_pc_of_branch = state.concretize(cpu.branch_info[1], 'ALL')[0] | |
branch_target_block_pc = next_pc | |
cpu.branch_info = (last_branch_block_pc, block_pc_of_branch, cpu.branch_info[2]) | |
branch_info = (last_branch_block_pc, block_pc_of_branch, branch_target_block_pc) | |
logger.info("Saving state for path {:08x} -> {:08x} -> {:08x}".format(*branch_info)) | |
is_covered = self.coverage.is_covered(*branch_info) | |
try: | |
co = next_state_id() | |
file_size, file_name = self._do_save_state(state, co) | |
except SolverException: | |
logger.info("Can't sat before fork.") | |
return True | |
except StopExecutionException: | |
return True | |
logger.info("Saved state {} (derived from {}) to file {}".format(co, state.co, file_name)) | |
num_recoveries = 0 | |
for process in state.processes(): | |
if process: | |
num_recoveries += process.mem.num_recoveries | |
new_state = { | |
'num_received_bytes' : state.os.num_received_bytes, | |
'num_transmitted_bytes': state.os.num_transmitted_bytes, | |
'num_symbolic_branches': state.num_symbolic_branches, | |
'num_syscalls': state.os.num_syscalls, | |
'num_instructions': sum(state.visited.values()), | |
'num_unique_instructions': len(state.visited), | |
'file_size': file_size, | |
'co': co, | |
'source_co': state.co, | |
'transition_func': transition_func, | |
'next_pc': next_pc, | |
'num_recoveries': num_recoveries, | |
'event_score': state.event_visitor.visit_exe_score(), | |
} | |
if not state.current.mem.is_executable(next_pc): | |
self.error_states.append(new_state) | |
elif num_recoveries: | |
self.recovered_states.append(new_state) | |
elif not is_covered: | |
logger.info("!!! Found new coverage state {:08x} -> {:08x} -> {:08x}".format( | |
*branch_info)) | |
self.new_coverage_states.append(new_state) | |
else: | |
self.states.append(new_state) | |
#logger.info("Adding state %s to processing list. State list size: %d", co, len(self.states)) | |
return True | |
def _make_reg_transition(self, reg_name, symbol, val): | |
def transition(state): | |
logger.debug("Concretizing {} = 0x{:02x} in state {}".format(reg_name, val, state.co)) | |
state.current.write_register(reg_name, val) | |
if not state.event_visitor.visit_exe_assert_reg(reg_name, symbol, val): | |
raise SolverException("Cannot concretize {} = {:02x} at PC {:08x}".format( | |
reg_name, val, state.current.read_register('PC'))) | |
return state | |
return transition | |
MEMORY_SIZE_NAME = {8: "BYTE", 16: "WORD", 32: "DWORD", 64: "QWORD", 128: "XMMWORD", 256: "YMMWORD"} | |
def _make_mem_transition(self, addr, size, symbol, val): | |
def transition(state): | |
word = Executor.MEMORY_SIZE_NAME[size] | |
logger.debug("Concretizing {} PTR [{:08x}] = 0x{:02x} in state {}".format(word, addr, val, state.co)) | |
state.solver.add(symbol == val) | |
state.current.store(addr, val, size) | |
if 'sat' != state.solver.check(): | |
raise SolverException("Cannot concretize {} PTR [{:08x}] = {:02x} at PC {:08x}".format( | |
word, addr, val, state.current.read_register('PC'))) | |
return state | |
return transition | |
def _score_state(self, state): | |
return (state['event_score'], | |
state['num_recoveries'], | |
int(state['next_pc'] not in self.visited), | |
int(state['num_received_bytes'] < 25), | |
int(state['num_transmitted_bytes'] < 25), | |
state['num_received_bytes'] / (state['num_transmitted_bytes']+1.0), | |
state['num_unique_instructions'] / (state['num_instructions']+1.0)) | |
STATE_SELECTOR = 0 | |
def load_saved_state(self): | |
logger.info("Selecting a new state; processing list size is {} / {} / {} / {}".format( | |
len(self.error_states), len(self.recovered_states), | |
len(self.new_coverage_states), len(self.states))) | |
# Static prioritization. | |
state_list = self.error_states or \ | |
self.recovered_states or \ | |
self.new_coverage_states or \ | |
self.states | |
if not len(state_list): | |
return | |
# Dynamic prioritization | |
state_list.sort(key=self._score_state) | |
possible_states = [] | |
indexes = range(len(state_list)) | |
possible_states = list(set(indexes[-5:] + indexes[:5])) | |
random.shuffle(possible_states) | |
state_index = possible_states[0] | |
info = state_list.pop(state_index) | |
# possible_states.sort() | |
# state_index = Executor.STATE_SELECTOR % len(possible_states) | |
# info = state_list.pop(possible_states[state_index]) | |
# Executor.STATE_SELECTOR += 1 | |
# random.shuffle(possible_states) | |
#info = state_list.pop(possible_states.pop()) | |
# Garbage collect the old state file if it still exists. | |
file_name = self._state_filename(info['source_co']) | |
if os.path.exists(file_name): | |
os.remove(file_name) # TODO(pag): Change later to allow time-travelling. | |
self.total_used_storage[info['file_size']] -= 1 | |
del self.state_storage_size[info['source_co']] | |
# Deserialize the old state. | |
file_name = self._state_filename(info['co']) | |
with file(file_name, 'rb') as f: | |
source_state = pickle.load(f) | |
os.remove(file_name) | |
self.total_used_storage[info['file_size']] -= 1 | |
# Convert the old state into a new state. | |
source_state.co = info['co'] # rename. | |
try: | |
if 'sat' != source_state.solver.check(): | |
raise SolverException("Can't sat after resume at PC {:08x}".format( | |
source_state.current.read_register('PC'))) | |
dest_state = info['transition_func'](source_state) | |
except StopExecutionException: | |
return None | |
except: | |
logger.error("Unable to transition state {} to {}: {}".format( | |
info['source_co'], info['co'], traceback.format_exc())) | |
return None | |
logger.info("Selected state: %s", dest_state.co) | |
# Try to add new flag constraints given the application of the state | |
# transition function. | |
dest_state._try_simplify() | |
dest_state.event_visitor.visit_exe_resume() | |
return dest_state | |
# Figure out if we have the storage budget to save another state file. | |
def _can_store_more(self): | |
total = 0 | |
for size, yes in self.total_used_storage.items(): | |
if yes: | |
total += size | |
return total < self.max_storage | |
def _visit_execute(self, cpu): | |
pc = cpu.read_register('PC') | |
self.current_state.visited[pc] += 1 | |
self.visited[pc] += 1 | |
self.current_state.event_visitor.visit_cpu_execute(cpu) | |
def _num_pending_states(self): | |
return len(self.error_states) + \ | |
len(self.recovered_states) + \ | |
len(self.new_coverage_states) + \ | |
len(self.states) | |
def _try_execute_instructions(self): | |
if self.current_state is None: | |
self.current_state = self.load_saved_state() | |
while self.current_state: | |
self.current_state.execute(self._visit_execute) | |
self.current_state.icount += 1 | |
PC = self.current_state.current.read_register('PC') | |
if self.current_state.icount >= MAX_ICOUNT \ | |
and isinstance(PC, (int, long)): | |
self.current_state.icount = 0 | |
raise MaxConsecutiveIntructions() | |
def _concretize_register(self, reg_name, policy): | |
set_next_pc = reg_name in ('PC', 'EIP', 'RIP') | |
current_state = self.current_state | |
self.current_state = None | |
symbol = current_state.current.read_register(reg_name) | |
get_vals = lambda: current_state.concretize(symbol, policy) | |
try: | |
vals = current_state.event_visitor.visit_exe_concretize_reg( | |
reg_name, symbol, get_vals) | |
except StopExecutionException: | |
logger.info("Not concretizing register {} with policy {}".format( | |
reg_name, policy)) | |
return | |
if 1 == len(vals) and not len(self.states): | |
transition_func = self._make_reg_transition( | |
reg_name, symbol, vals[0]) | |
self.current_state = transition_func(current_state) | |
else: | |
for val in vals: | |
next_pc = None | |
if set_next_pc: | |
next_pc = val | |
self.save_state( | |
current_state, | |
next_pc=next_pc, | |
transition_func=self._make_reg_transition( | |
reg_name, symbol, val)) | |
def _concretize_memory(self, address, size, policy): | |
current_state = self.current_state | |
self.current_state = None | |
symbol = current_state.current.load(address, size) | |
get_vals = lambda: current_state.concretize(symbol, policy) | |
try: | |
vals = current_state.event_visitor.visit_exe_concretize_mem( | |
address, size, symbol, get_vals) | |
except StopExecutionException: | |
logger.info("Not concretizing memory {:08x} with policy {}".format( | |
address, policy)) | |
return | |
if 1 == len(vals) and not len(self.states): | |
transition_func = self._make_mem_transition( | |
address, size, symbol, vals[0]) | |
self.current_state = transition_func(current_state) | |
else: | |
for val in vals: | |
self.save_state( | |
current_state, | |
transition_func=self._make_mem_transition( | |
address, size, symbol, val)) | |
def _segmentation_fault(self, addr_list): | |
current_state = self.current_state | |
self.current_state = None | |
cpu = current_state.current | |
logger.error("Segmentation fault at PC {:08x}".format( | |
cpu.read_register('PC'))) | |
try: | |
current_state.event_visitor.visit_exe_error( | |
mem_write=addr_list) | |
except StopExecutionException: | |
pass | |
def _invalid_instruction(self, pc): | |
logger.error("Executing bad instruction at PC {:08x}".format(pc)) | |
current_state = self.current_state | |
self.current_state = None | |
try: | |
current_state.event_visitor.visit_exe_error(pc=pc) | |
except StopExecutionException: | |
pass | |
def _run(self): | |
global MAX_ICOUNT | |
logger.info("Starting cgc-symbolic-emulator main loop.") | |
while self.current_state or self._num_pending_states(): | |
try: | |
self._try_execute_instructions() | |
except ConcretizeRegister as e: | |
self._concretize_register(e.reg_name, e.policy) | |
except ConcretizeMemory, e: | |
logger.info(str(e)) | |
self._concretize_memory(e.address, e.size, e.policy) | |
except MemoryWriteException as e: | |
self._segmentation_fault(e.addr_list) | |
except InvalidPCException as e: | |
self._invalid_instruction(e.pc) | |
except ProcessExit: | |
logger.info("Exiting process.") | |
current_state = self.current_state | |
self.current_state = None | |
try: | |
current_state.event_visitor.visit_exe_exit() | |
except StopExecutionException: | |
pass | |
except Deadlock: | |
logger.error("Deadlocked.") | |
current_state = self.current_state | |
self.current_state = None | |
try: | |
current_state.event_visitor.visit_exe_error() | |
except StopExecutionException: | |
pass | |
except SolverException: | |
logger.info("Solver exception in state {}: {}".format( | |
self.current_state.co, traceback.format_exc())) | |
self.current_state = None | |
except MaxConsecutiveIntructions: | |
logger.info("Executed {} instructions in state {}".format( | |
MAX_ICOUNT, self.current_state.co)) | |
if len(self.states): | |
self.save_state(self.current_state) | |
self.current_state = None | |
else: | |
self.current_state.event_visitor.visit_exe_fork() | |
self.current_state.event_visitor.visit_exe_resume() | |
except StopExecutionException as e: | |
logger.info("Stopping current state execution.") | |
current_state = self.current_state | |
self.current_state = None | |
if e.report: | |
try: | |
current_state.event_visitor.visit_exe_stop() | |
except StopExecutionException: | |
pass | |
except StopAllExecutionException: | |
logger.info("Stopping ALL state executions.") | |
return | |
except Exception, e: | |
logger.error("THIS SHOULD NOT REACHABLE! Exception in user code: {}\n{}".format( | |
str(e), traceback.format_exc()) ) | |
current_state = self.current_state | |
self.current_state = None | |
if current_state: | |
try: | |
current_state.event_visitor.visit_exe_error() | |
except StopExecutionException: | |
pass | |
def run(self): | |
while True: | |
try: | |
self._run() | |
logger.info("Returning from run loop.") | |
except StopExecutionException: | |
logger.info("Stopping execution of current state: {}".format( | |
traceback.format_exc())) | |
self.current_state = None | |
except StopAllExecutionException: | |
logger.info("Stopping ALL execution.") | |
return | |
except Exception as e: | |
logger.error("Caught unknown exception {}: {}".format( | |
e, traceback.format_exc())) | |
raise |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment