Skip to content

Instantly share code, notes, and snippets.

@xunge
Last active April 6, 2022 02:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xunge/1fc3681dff20771d7a8997fd893636e0 to your computer and use it in GitHub Desktop.
Save xunge/1fc3681dff20771d7a8997fd893636e0 to your computer and use it in GitHub Desktop.
vuzzer64 angr bb weight
# Copyright (C) 2017
#
# Written by Ashley Lesdalons <ashley.lesdalons@etu.univ-grenoble-alpes.fr>
#
# ========LICENCE========
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# ========LICENCE========
import angr
import pickle
import ntpath
import sys
from capstone import *
from capstone.x86_const import X86_INS_CMP, X86_OP_IMM
import networkx as nx
from angrutils import plot_cfg
cmp_imm_operands = set()
fweight = dict()
def calculate_weight_edges(cfg):
for node in cfg.graph.nodes():
successors = cfg.get_successors(node, excluding_fakeret=True)
if len(successors) == 0: continue
weight = 1.0 / len(successors)
for s in successors: # an undirect jump/call represented by PathTerminator can have up to 250 successors
if s.name == "PathTerminator":
weight = 1.0
# assign weight to each outgoing edge
for s in successors:
cfg.graph.edge[node][s]['weight'] = weight
def calculate_weight_blocks(cfg):
global fweight
nodes_done = 0
total_nodes = len(cfg.graph.nodes())
stuck = 0 # this variable increases if the algorithm is stuck
while nodes_done < total_nodes:
stuck += 1
if stuck >= 2 * total_nodes:
algo_stuck(cfg)
for current_node in cfg.graph.nodes_iter():
if hasattr(current_node, 'weight'): continue
predecessors = cfg.get_predecessors(current_node, excluding_fakeret=True)
if len(predecessors) == 0: # if node has no predecessor, then it's a function call or an orphan
if current_node.addr in fweight:
current_node.weight = fweight[current_node.addr][0]
else:
current_node.weight = 1.0
nodes_done += 1
stuck = 0 # we made some progress, not stuck
continue
a = True
for pnode in predecessors:
a = a and hasattr(pnode, 'weight')
if a:
current_node.weight = 0.0
nodes_done += 1
stuck = 0 # we made some progress, not stuck
for pnode in predecessors:
current_node.weight += cfg.graph.edge[pnode][current_node]["weight"] * pnode.weight
def algo_stuck(cfg):
print(" The algorithm is stuck. There is probably at least one loop in the CFG.")
print(" A graphical view of the CFG will be generated in %s_cfg.png" % cfg.name)
try:
plot_cfg(cfg, "%s_cfg" % cfg.name, asminst=True, vexinst=False, debug_info=False, remove_imports=True,
remove_path_terminator=False)
print(" PNG generated sucessfully")
except:
print(" PNG generation failed. It is printed below:")
printCFG(cfg)
sys.exit(1)
def remove_cycles(cfg):
removed_edges = []
# we remove loops with only one node
for n1, n2 in cfg.graph.edges():
if n1 == n2:
cfg.graph.remove_edge(n1, n2)
removed_edges.append((n1, n2))
# it removes complex cycles with more than 2 nodes
while True:
try:
cycle = nx.find_cycle(cfg_f.graph)
for i in range(1, len(cycle) + 1):
n1, n2 = cycle[-i]
if len(cfg.get_predecessors(n2, excluding_fakeret=True)) > 1:
removed_edges.append((n1, n2))
cfg.graph.remove_edge(n1, n2)
break
# if no edge has been removed, by default we remove the last one
n1, n2 = cycle[-1]
removed_edges.append((n1, n2))
cfg.graph.remove_edge(n1, n2)
except:
break
# we remove simple loops with more than 2 nodes
while True:
try:
loop = next(nx.simple_cycles(cfg.graph))
except:
break
if len(loop) == 1: continue # WORKAROUND
for i in range(1, len(loop)):
n1 = loop[-i - 1]
n2 = loop[-i]
if cfg.graph.has_edge(n1, n2): # WORKAROUND
if len(cfg.get_predecessors(n2, excluding_fakeret=True)) > 1:
removed_edges.append((n1, n2))
cfg.graph.remove_edge(n1, n2)
break
# if no edge has been removed, by default we remove the last one
try:
n1 = loop[-2]
n2 = loop[-1]
removed_edges.append((n1, n2))
cfg.graph.remove_edge(n1, n2)
except:
pass
# print info
if len(removed_edges) == 0:
print(" ==> No cycles found")
else:
print(" ==> %d cycles found" % len(removed_edges))
# for n1,n2 in removed_edges:
# print " 0x%x (%s) --> 0x%x (%s)" % (n1.addr,n1.name,n2.addr,n2.name)
def remove_cycles_callgraph(callgraph):
while True:
try:
loop = next(nx.simple_cycles(callgraph.graph))
except:
break
if len(loop) == 1: continue # WORKAROUND
for i in range(1, len(loop)):
n1 = loop[-i - 1]
n2 = loop[-i]
if callgraph.graph.has_edge(n1, n2): # WORKAROUND
if len(callgraph.predecessors(n2)) > 1:
removed_edges.append((n1, n2))
callgraph.graph.remove_edge(n1, n2)
break
# if no edge has been removed, by default we remove the last one
try:
n1 = loop[-2]
n2 = loop[-1]
removed_edges.append((n1, n2))
callgraph.graph.remove_edge(n1, n2)
except:
pass
def remove_pathterminator(cfg):
for node in cfg.graph.nodes():
if node.name == "PathTerminator":
cfg.graph.remove_node(node)
for n1, n2 in cfg.graph.edges():
if n1.name == "PathTerminator" or n2.name == "PathTerminator":
cfg.graph.remove_edge(n1, n2)
def printCFG(cfg):
print("===== CFG of %s =====" % cfg.name)
print("== Nodes ==")
for n in cfg.graph.nodes():
if hasattr(n, "weight"):
print("%s (0x%x) weight=%.02f" % (n.name, n.addr, n.weight))
else:
print("%s (0x%x)" % (n.name, n.addr))
print("== Edges ==")
for n1, n2 in cfg.graph.edges():
if "weight" in cfg.graph.edge[n1][n2]:
print("0x%x (%s) --%.2f--> 0x%x (%s)" % (
n1.addr, n1.name, cfg.graph.edge[n1][n2]["weight"], n2.addr, n2.name))
else:
print("0x%x (%s) --> 0x%x (%s)" % (n1.addr, n1.name, n2.addr, n2.name))
print("")
def dump_analysis(filename):
global fweight, cmp_imm_operands
for addr in fweight:
a, b = fweight[addr]
fweight[addr] = (1.0 / a, b)
with open("%s.pkl" % filename, "wb") as f:
pickle.dump(fweight, f)
# dump cmp operands analysis
cmp_imm_operands_hex = []
cmp_imm_operands2 = list(cmp_imm_operands)
for op in cmp_imm_operands2:
cmp_imm_operands_hex += [op[i:i + 2] for i in range(2, len(op), 2)]
if len(op) == 3:
cmp_imm_operands.remove(op)
cmp_imm_operands_hex = set().union(cmp_imm_operands_hex)
with open("%s.names" % filename, 'wb') as f:
pickle.dump((cmp_imm_operands, cmp_imm_operands_hex), f)
def find_CMP_operands(proj, cfg, binName):
global cmp_imm_operands
for node in cfg.graph.nodes():
for inst in node.instruction_addrs:
insn = proj.factory.block(inst, num_inst=1).capstone.insns[0].insn
if insn.mnemonic == "cmp":
for op in insn.operands:
if op.type == X86_OP_IMM:
cmp_imm_operands.add("0x%X" % op.value.imm)
if __name__ == "__main__":
binaryPath = sys.argv[1]
binName = ntpath.basename(binaryPath)
print("[+] Opening binary %s" % binaryPath)
proj = angr.Project(binaryPath, load_options={'auto_load_libs': False, 'main_opts': {'custom_base_addr': 0x0}})
print("[+] Searching for all the functions (using CFGFast)")
full_cfg = proj.analyses.CFGFast(show_progressbar=True)
# we want to only keep functions defined in the main binary
functions = []
for addr, func in full_cfg.functions.items():
if proj.loader.main_object.contains_addr(addr) and addr not in proj.loader.main_object.reverse_plt:
functions.append((addr, func))
nb_functions = len(functions)
print(" ==> %d functions to process." % nb_functions)
callgraph = full_cfg.functions.callgraph
remove_cycles_callgraph(callgraph)
i = 1
stuck = 0
while len(functions) > 0:
for addr, func in functions:
stuck += 1
if stuck < 2 * len(functions): # Security precaution, in case the call graph still contains loops
if callgraph.has_node(addr): # sometimes the function isn't in the call graph
predecessors = callgraph.predecessors(addr)
if len([a for a in functions if a[0] in predecessors]) > 0:
continue
stuck = 0
print("[+] (%d/%d) Computing Accurate CFG for function %s (0x%x)" % (i, nb_functions, func.name, addr))
functions.remove((addr, func))
i += 1
cfg_f = proj.analyses.CFGEmulated(
# max_iterations=5,
starts=[addr],
context_sensitivity_level=1,
call_depth=0,
normalize=True,
enable_symbolic_back_traversal=True,
# enable_advanced_backward_slicing=True
)
cfg_f.remove_cycles() # works if max_iteration has a high value but then CFGEmulated becomes very slow
cfg_f.name = func.name
print(" [+] Removing cycles")
remove_cycles(cfg_f) # remove the loop-back edges
print(" [+] Searching for CMP operands")
find_CMP_operands(proj, cfg_f, binName)
print(" [+] Computing edges/vertices weight")
calculate_weight_edges(cfg_f)
calculate_weight_blocks(cfg_f)
# store data in export in fweight
for node in cfg_f.graph.nodes():
if node.addr in fweight:
w, e = fweight[node.addr]
fweight[node.addr] = (min(w, node.weight), e)
else:
if node.size is not None: # WORKAROUND: sometimes, the size is None for some reason :-(
fweight[node.addr] = (node.weight, node.addr + node.size)
else:
fweight[node.addr] = (node.weight, -1)
del cfg_f
break
print("[+] Dumping analysis to pickle files")
dump_analysis(binName)
print("[+] Done.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment