Created
February 16, 2018 18:54
-
-
Save HMPerson1/52cc2eb8e240da1e3a031921c802c3be to your computer and use it in GitHub Desktop.
Janky Python CFG Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import dis | |
import opcode | |
import argparse | |
import io | |
OP_RETURN_VALUE = opcode.opmap['RETURN_VALUE'] | |
OP_JUMP_ABSOLUTE = opcode.opmap['JUMP_ABSOLUTE'] | |
OP_JUMP_FORWARD = opcode.opmap['JUMP_FORWARD'] | |
OP_FOR_ITER = opcode.opmap['FOR_ITER'] | |
OP_CONTINUE_LOOP = opcode.opmap['CONTINUE_LOOP'] | |
OP_BREAK_LOOP = opcode.opmap['BREAK_LOOP'] | |
OP_SETUP_LOOP = opcode.opmap['SETUP_LOOP'] | |
OPS_SETUP_BLOCK = [opcode.opmap[x] for x in | |
['SETUP_WITH', 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY']] | |
OP_POP_BLOCK = opcode.opmap['POP_BLOCK'] | |
class BasicBlock: | |
def __init__(self, n): | |
self.n = n | |
self.instrs = [] | |
self.next = None | |
self.branch = None | |
class BlockMaker: | |
def __init__(self): | |
self.curblock = None | |
self.basicblocks = [] | |
self.off_to_blkn = dict() | |
self.block_stack = [] | |
def get_block(self, offset): | |
if offset in self.off_to_blkn: | |
return self.basicblocks[self.off_to_blkn[offset]] | |
ret = BasicBlock(len(self.basicblocks)) | |
self.basicblocks.append(ret) | |
self.off_to_blkn[offset] = ret.n | |
return ret | |
def uncond_branch(self, to_offset): | |
self.curblock.next = self.get_block(to_offset).n | |
self.curblock = None | |
def cond_branch(self, next_offset, branch_offset): | |
self.curblock.next = self.get_block(next_offset).n | |
self.curblock.branch = branch_offset | |
self.curblock = None | |
def run(self, instrs): | |
for instr in instrs: | |
if self.curblock is None: | |
self.curblock = self.get_block(instr.offset) | |
if instr.is_jump_target and self.curblock.instrs: | |
nextblock = self.get_block(instr.offset) | |
self.curblock.next = nextblock.n | |
self.curblock = nextblock | |
self.curblock.instrs.append(instr) | |
if instr.opcode in OPS_SETUP_BLOCK: | |
self.block_stack.append(instr.offset + 2 + instr.arg) | |
elif instr.opcode == OP_POP_BLOCK: | |
del self.block_stack[-1] | |
elif instr.opcode == OP_RETURN_VALUE: | |
self.curblock = None | |
elif instr.opcode == OP_JUMP_ABSOLUTE or instr.opcode == OP_CONTINUE_LOOP: | |
self.uncond_branch(instr.arg) | |
elif instr.opcode in opcode.hasjabs: | |
self.cond_branch(instr.offset + 2, instr.arg) | |
elif instr.opcode == OP_JUMP_FORWARD: | |
self.uncond_branch(instr.offset + 2 + instr.arg) | |
elif instr.opcode == OP_FOR_ITER: | |
self.cond_branch(instr.offset + 2, instr.offset + 2 + instr.arg) | |
elif instr.opcode == OP_BREAK_LOOP: | |
self.uncond_branch(self.block_stack[-1]) | |
else: | |
pass | |
for blk in self.basicblocks: | |
if blk.branch is not None: | |
blk.branch = self.off_to_blkn[blk.branch] | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("src", help="the python file you want to get a control flow graph of") | |
parser.add_argument("out", help="the file where the graphviz output will be written") | |
args = parser.parse_args() | |
with open(args.src, "r") as f: | |
src_str = f.read() | |
src_lines = src_str.splitlines() | |
instrs = dis.get_instructions(src_str) | |
bm = BlockMaker() | |
bm.run(instrs) | |
out_nodes = io.StringIO() | |
out_fedges = io.StringIO() | |
out_bedges = io.StringIO() | |
for blk in bm.basicblocks: | |
out_nodes.write("bb%d[label=\"%s\\l\"];\n" % (blk.n, '\n'.join( | |
[src_lines[i.starts_line-1] for i in blk.instrs if i.starts_line is not None] | |
).encode("unicode-escape").decode("ascii").replace('"', '\\"').replace("\\n", "\\l"))) | |
# out_nodes.write("bb%d[label=\"%r\"];\n" % (blk.n, blk.n)) | |
if blk.next is not None: | |
if blk.next > blk.n: | |
out_fedges.write("bb%d:s -> bb%d:n;\n" % (blk.n, blk.next)) | |
else: | |
out_bedges.write("bb%d:s -> bb%d:n;\n" % (blk.n, blk.next)) | |
if blk.branch is not None: | |
if blk.branch > blk.n: | |
out_fedges.write("bb%d:w -> bb%d:n;\n" % (blk.n, blk.branch)) | |
else: | |
out_bedges.write("bb%d:e -> bb%d:n;\n" % (blk.n, blk.branch)) | |
if blk.next is None and blk.branch is None: | |
out_nodes.write("{ rank=max; bb%d; }\n" % (blk.n)) | |
with open(args.out, "w") as f: | |
f.write("digraph {\n") | |
f.write("node [shape=\"box\"];\n") | |
f.write("{ rank=min; bb0; }\n") | |
f.write(out_nodes.getvalue()) | |
f.write(out_fedges.getvalue()) | |
f.write(out_bedges.getvalue()) | |
f.write("}\n") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment