Skip to content

Instantly share code, notes, and snippets.

@HMPerson1
Created February 16, 2018 18:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HMPerson1/52cc2eb8e240da1e3a031921c802c3be to your computer and use it in GitHub Desktop.
Save HMPerson1/52cc2eb8e240da1e3a031921c802c3be to your computer and use it in GitHub Desktop.
Janky Python CFG Generator
#!/usr/bin/env python3
import dis
import opcode
import argparse
import io
OP_RETURN_VALUE = opcode.opmap['RETURN_VALUE']
OP_JUMP_ABSOLUTE = opcode.opmap['JUMP_ABSOLUTE']
OP_JUMP_FORWARD = opcode.opmap['JUMP_FORWARD']
OP_FOR_ITER = opcode.opmap['FOR_ITER']
OP_CONTINUE_LOOP = opcode.opmap['CONTINUE_LOOP']
OP_BREAK_LOOP = opcode.opmap['BREAK_LOOP']
OP_SETUP_LOOP = opcode.opmap['SETUP_LOOP']
OPS_SETUP_BLOCK = [opcode.opmap[x] for x in
['SETUP_WITH', 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY']]
OP_POP_BLOCK = opcode.opmap['POP_BLOCK']
class BasicBlock:
def __init__(self, n):
self.n = n
self.instrs = []
self.next = None
self.branch = None
class BlockMaker:
def __init__(self):
self.curblock = None
self.basicblocks = []
self.off_to_blkn = dict()
self.block_stack = []
def get_block(self, offset):
if offset in self.off_to_blkn:
return self.basicblocks[self.off_to_blkn[offset]]
ret = BasicBlock(len(self.basicblocks))
self.basicblocks.append(ret)
self.off_to_blkn[offset] = ret.n
return ret
def uncond_branch(self, to_offset):
self.curblock.next = self.get_block(to_offset).n
self.curblock = None
def cond_branch(self, next_offset, branch_offset):
self.curblock.next = self.get_block(next_offset).n
self.curblock.branch = branch_offset
self.curblock = None
def run(self, instrs):
for instr in instrs:
if self.curblock is None:
self.curblock = self.get_block(instr.offset)
if instr.is_jump_target and self.curblock.instrs:
nextblock = self.get_block(instr.offset)
self.curblock.next = nextblock.n
self.curblock = nextblock
self.curblock.instrs.append(instr)
if instr.opcode in OPS_SETUP_BLOCK:
self.block_stack.append(instr.offset + 2 + instr.arg)
elif instr.opcode == OP_POP_BLOCK:
del self.block_stack[-1]
elif instr.opcode == OP_RETURN_VALUE:
self.curblock = None
elif instr.opcode == OP_JUMP_ABSOLUTE or instr.opcode == OP_CONTINUE_LOOP:
self.uncond_branch(instr.arg)
elif instr.opcode in opcode.hasjabs:
self.cond_branch(instr.offset + 2, instr.arg)
elif instr.opcode == OP_JUMP_FORWARD:
self.uncond_branch(instr.offset + 2 + instr.arg)
elif instr.opcode == OP_FOR_ITER:
self.cond_branch(instr.offset + 2, instr.offset + 2 + instr.arg)
elif instr.opcode == OP_BREAK_LOOP:
self.uncond_branch(self.block_stack[-1])
else:
pass
for blk in self.basicblocks:
if blk.branch is not None:
blk.branch = self.off_to_blkn[blk.branch]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("src", help="the python file you want to get a control flow graph of")
parser.add_argument("out", help="the file where the graphviz output will be written")
args = parser.parse_args()
with open(args.src, "r") as f:
src_str = f.read()
src_lines = src_str.splitlines()
instrs = dis.get_instructions(src_str)
bm = BlockMaker()
bm.run(instrs)
out_nodes = io.StringIO()
out_fedges = io.StringIO()
out_bedges = io.StringIO()
for blk in bm.basicblocks:
out_nodes.write("bb%d[label=\"%s\\l\"];\n" % (blk.n, '\n'.join(
[src_lines[i.starts_line-1] for i in blk.instrs if i.starts_line is not None]
).encode("unicode-escape").decode("ascii").replace('"', '\\"').replace("\\n", "\\l")))
# out_nodes.write("bb%d[label=\"%r\"];\n" % (blk.n, blk.n))
if blk.next is not None:
if blk.next > blk.n:
out_fedges.write("bb%d:s -> bb%d:n;\n" % (blk.n, blk.next))
else:
out_bedges.write("bb%d:s -> bb%d:n;\n" % (blk.n, blk.next))
if blk.branch is not None:
if blk.branch > blk.n:
out_fedges.write("bb%d:w -> bb%d:n;\n" % (blk.n, blk.branch))
else:
out_bedges.write("bb%d:e -> bb%d:n;\n" % (blk.n, blk.branch))
if blk.next is None and blk.branch is None:
out_nodes.write("{ rank=max; bb%d; }\n" % (blk.n))
with open(args.out, "w") as f:
f.write("digraph {\n")
f.write("node [shape=\"box\"];\n")
f.write("{ rank=min; bb0; }\n")
f.write(out_nodes.getvalue())
f.write(out_fedges.getvalue())
f.write(out_bedges.getvalue())
f.write("}\n")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment