Last active
August 16, 2024 09:33
-
-
Save bin2415/15028e78d5cf0c708fe1ab82fc252799 to your computer and use it in GitHub Desktop.
Python script of Ghidra to dump cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#TODO write a description for this script | |
#@author Chengbin, MyriaCore | |
#@category Functions | |
#@keybinding | |
#@menupath | |
#@toolbar | |
#TODO Add User Code Here | |
# reference https://github.com/NationalSecurityAgency/ghidra/issues/826 | |
from __future__ import division | |
import logging | |
import site | |
import sys | |
import os | |
from ghidra.program.model.block import BasicBlockModel | |
from ghidra.program.model.block import CodeBlockIterator | |
from ghidra.program.model.block import CodeBlockReference | |
from ghidra.program.model.block import CodeBlockReferenceIterator | |
from ghidra.program.model.listing import CodeUnitIterator; | |
from ghidra.program.model.listing import Function; | |
from ghidra.program.model.listing import FunctionManager; | |
from ghidra.program.model.listing import Listing; | |
from ghidra.program.database.code import InstructionDB | |
def addBB(bb, G, bb_func_map): | |
listing = currentProgram.getListing(); | |
# iter over the instructions | |
codeUnits = listing.getCodeUnits(bb, True) | |
lastInstStart = 0x0 | |
lastInstEnd = 0x0 | |
bb_tbl_rows = '' | |
i = 0 | |
while codeUnits.hasNext(): | |
codeUnit = codeUnits.next() | |
# check if the code unit is the instruction | |
if not isinstance(codeUnit, InstructionDB): | |
continue | |
# Record address of first instruction | |
if i == 0: | |
firstInstStart = codeUnit.getAddress().getOffset() | |
lastInstStart = codeUnit.getAddress().getOffset() | |
lastInstEnd = lastInstStart + codeUnit.getLength() | |
bb_tbl_rows += (''' | |
<TR> | |
<TD PORT="insn_%x" ALIGN="RIGHT"><FONT FACE="monospace">%x: </FONT></TD> | |
<TD ALIGN="LEFT"><FONT FACE="monospace">%s</FONT></TD> | |
<TD> </TD> // for spacing | |
</TR>''' % (lastInstStart, lastInstStart, str(codeUnit))) | |
i += 1 # Bump Counter | |
bb_tbl_node = (''' bb_%x [shape=plaintext label=< | |
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0">%s | |
</TABLE>>];\n''' % (bb.getMinAddress().getOffset(), bb_tbl_rows)) | |
bb_func_map[bb.getMinAddress().getOffset()] = \ | |
'bb_%x:insn_%x' % (bb.getMinAddress().getOffset(), firstInstStart) | |
# add node | |
G += bb_tbl_node | |
return G | |
def addSuccessors(bb_func_set, bb_func_map, G): | |
listing = currentProgram.getListing(); | |
for bb in bb_func_set: | |
codeUnits = listing.getCodeUnits(bb, True) | |
lastInstStart = 0x0 | |
lastInstEnd = 0x0 | |
cur_bb_str = bb_func_map[bb.getMinAddress().getOffset()] | |
while codeUnits.hasNext(): | |
codeUnit = codeUnits.next() | |
if not isinstance(codeUnit, InstructionDB): | |
continue | |
lastInstStart = codeUnit.getAddress().getOffset() | |
lastInstEnd = lastInstStart + codeUnit.getLength() | |
successors = bb.getDestinations(monitor) | |
idx = 0 | |
sucSet = set() | |
while successors.hasNext(): | |
sucBBRef = successors.next() | |
sucBBRefAddr = sucBBRef.getReferent().getOffset() | |
# the reference is not in the last instruction | |
if sucBBRefAddr < lastInstStart or sucBBRefAddr >= lastInstEnd: | |
continue | |
sucBB = sucBBRef.getDestinationBlock() | |
sucOffset = sucBB.getFirstStartAddress().getOffset() | |
if sucOffset in sucSet: | |
continue | |
if sucOffset not in bb_func_map: | |
continue | |
idx += 1 | |
currInsnAddr = sucBBRef.getReferent().getOffset() | |
currBBAddr = bb.getMinAddress().getOffset() | |
flowType = sucBBRef.getFlowType() | |
if (flowType.isJump() and flowType.isUnConditional()) or flowType.isFallthrough(): | |
edgeAttrs = 'color=gray style=dashed' | |
elif flowType.isCall() and flowType.isUnConditional(): | |
edgeAttrs = 'color=cyan4 style=dashed' | |
elif flowType.isJump() and flowType.isConditional(): | |
edgeAttrs = 'color=gray style=solid' | |
elif flowType.isCall() and flowType.isConditional(): | |
edgeAttrs = 'color=cyan4 style=solid' | |
else: | |
edgeAttrs = 'color=gray style=dotted' | |
edgeAttrs += ' tooltip="%s"' % str(flowType) | |
G += ((' bb_%x:insn_%x -> %s [%s];\n') \ | |
% (currBBAddr, currInsnAddr, bb_func_map[sucOffset], | |
edgeAttrs)) | |
sucSet.add(sucOffset) | |
return G | |
def dumpBlocks(): | |
bbModel = BasicBlockModel(currentProgram) | |
functionManager = currentProgram.getFunctionManager() | |
# record the basic block that has been added by functions | |
bb_set = set() | |
# get all functions | |
funcs_set = set() | |
for func in functionManager.getFunctions(True): | |
# we skip external functions | |
if func.isExternal(): | |
continue | |
func_va = func.getEntryPoint().getOffset() | |
if func_va in funcs_set: | |
continue | |
G = ('''digraph "func 0x%x" { | |
newrank=true; | |
// Flow Type Legend | |
subgraph cluster_01 { | |
rank=same; | |
node [shape=plaintext] | |
label = "Legend"; | |
key [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0"> | |
<tr><td align="right" port="i1">Jump/Fallthrough</td></tr> | |
<tr><td align="right" port="i2">Call</td></tr> | |
<tr><td align="right" port="i3">Conditional Jump</td></tr> | |
<tr><td align="right" port="i4">Conditional Call</td></tr> | |
<tr><td align="right" port="i5">Other</td></tr> | |
</table>>]; | |
key2 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0"> | |
<tr><td port="i1"> </td></tr> | |
<tr><td port="i2"> </td></tr> | |
<tr><td port="i3"> </td></tr> | |
<tr><td port="i4"> </td></tr> | |
<tr><td port="i5"> </td></tr> | |
</table>>]; | |
key:i1:e -> key2:i1:w [color=gray style=dashed]; | |
key:i2:e -> key2:i2:w [color=cyan4 style=dashed]; | |
key:i3:e -> key2:i3:w [color=gray]; | |
key:i4:e -> key2:i4:w [color=cyan4]; | |
key:i5:e -> key2:i5:w [color=gray style=dotted]; | |
} | |
''' % func_va) | |
funcs_set.add(func_va) | |
codeBlockIterator = bbModel.getCodeBlocksContaining(func.getBody(), monitor); | |
# iter over the basic blocks | |
bb_func_map = dict() | |
bb_func_set = set() | |
while codeBlockIterator.hasNext(): | |
bb = codeBlockIterator.next() | |
bb_set.add(bb.getMinAddress().getOffset()) | |
bb_func_set.add(bb) | |
G = addBB(bb, G, bb_func_map) | |
G = addSuccessors(bb_func_set, bb_func_map, G) | |
G += '}' | |
with open('/tmp/cfg/%s.dot' % func.getName(), 'w') as dot_output: | |
dot_output.write(G) | |
if __name__ == "__main__": | |
dumpBlocks() |
Hey, I've since made the formatting for this script a bit better. Please update the gist with the following patch applied:
View Patch
--- ghidraCFG.py.1 2020-11-18 17:05:39.162670627 -0500
+++ ghidraCFG.py 2020-11-18 17:05:32.006670627 -0500
@@ -32,23 +32,37 @@
lastInstStart = 0x0
lastInstEnd = 0x0
- cur_bb_str = ''
+ bb_tbl_rows = ''
+ i = 0
while codeUnits.hasNext():
codeUnit = codeUnits.next()
# check if the code unit is the instruction
if not isinstance(codeUnit, InstructionDB):
continue
+ # Record address of first instruction
+ if i == 0:
+ firstInstStart = codeUnit.getAddress().getOffset()
lastInstStart = codeUnit.getAddress().getOffset()
lastInstEnd = lastInstStart + codeUnit.getLength()
- cur_bb_str += ('%x: %s\n' % (lastInstStart, str(codeUnit)))
+ bb_tbl_rows += ('''
+ <TR>
+ <TD PORT="insn_%x" ALIGN="RIGHT"><FONT FACE="monospace">%x: </FONT></TD>
+ <TD ALIGN="LEFT"><FONT FACE="monospace">%s</FONT></TD>
+ <TD> </TD> // for spacing
+ </TR>''' % (lastInstStart, lastInstStart, str(codeUnit)))
+ i += 1 # Bump Counter
+
+ bb_tbl_node = (''' bb_%x [shape=plaintext label=<
+ <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0">%s
+ </TABLE>>];\n''' % (bb.getMinAddress().getOffset(), bb_tbl_rows))
-
- bb_func_map[bb.getMinAddress().getOffset()] = cur_bb_str
+ bb_func_map[bb.getMinAddress().getOffset()] = \
+ 'bb_%x:insn_%x' % (bb.getMinAddress().getOffset(), firstInstStart)
# add node
- G += ('"%s" [shape=square];\n' % cur_bb_str)
+ G += bb_tbl_node
return G
@@ -92,7 +106,25 @@
idx += 1
- G += (('"%s" -> "%s";\n') % (cur_bb_str, bb_func_map[sucOffset]))
+ currInsnAddr = sucBBRef.getReferent().getOffset()
+ currBBAddr = bb.getMinAddress().getOffset()
+ flowType = sucBBRef.getFlowType()
+
+ if (flowType.isJump() and flowType.isUnConditional()) or flowType.isFallthrough():
+ edgeAttrs = 'color=gray style=dashed'
+ elif flowType.isCall() and flowType.isUnConditional():
+ edgeAttrs = 'color=cyan4 style=dashed'
+ elif flowType.isJump() and flowType.isConditional():
+ edgeAttrs = 'color=gray style=solid'
+ elif flowType.isCall() and flowType.isConditional():
+ edgeAttrs = 'color=cyan4 style=solid'
+ else:
+ edgeAttrs = 'color=gray style=dotted'
+
+ edgeAttrs += ' tooltip="%s"' % str(flowType)
+ G += ((' bb_%x:insn_%x -> %s [%s];\n') \
+ % (currBBAddr, currInsnAddr, bb_func_map[sucOffset],
+ edgeAttrs))
sucSet.add(sucOffset)
@@ -115,7 +147,34 @@
if func_va in funcs_set:
continue
- G = ('strict digraph "func 0x%x" {' % func_va)
+ G = ('''digraph "func 0x%x" {
+ newrank=true;
+ // Flow Type Legend
+ subgraph cluster_01 {
+ rank=same;
+ node [shape=plaintext]
+ label = "Legend";
+ key [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
+ <tr><td align="right" port="i1">Jump/Fallthrough</td></tr>
+ <tr><td align="right" port="i2">Call</td></tr>
+ <tr><td align="right" port="i3">Conditional Jump</td></tr>
+ <tr><td align="right" port="i4">Conditional Call</td></tr>
+ <tr><td align="right" port="i5">Other</td></tr>
+ </table>>];
+ key2 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
+ <tr><td port="i1"> </td></tr>
+ <tr><td port="i2"> </td></tr>
+ <tr><td port="i3"> </td></tr>
+ <tr><td port="i4"> </td></tr>
+ <tr><td port="i5"> </td></tr>
+ </table>>];
+ key:i1:e -> key2:i1:w [color=gray style=dashed];
+ key:i2:e -> key2:i2:w [color=cyan4 style=dashed];
+ key:i3:e -> key2:i3:w [color=gray];
+ key:i4:e -> key2:i4:w [color=cyan4];
+ key:i5:e -> key2:i5:w [color=gray style=dotted];
+ }
+''' % func_va)
funcs_set.add(func_va)
codeBlockIterator = bbModel.getCodeBlocksContaining(func.getBody(), monitor);
@@ -134,9 +193,9 @@
G += '}'
- with open('/tmp/cfg/func_%x.dot' % func_va, 'w') as dot_output:
+ with open('/tmp/cfg/%s.dot' % func.getName(), 'w') as dot_output:
dot_output.write(G)
if __name__ == "__main__":
- dumpBlocks()
\ No newline at end of file
+ dumpBlocks()
Hey, I've since made the formatting for this script a bit better. Please update the gist with the following patch applied:
View Patch
Great, thanks! The script has been updated.
Dope! For those who are interested in how I'm converting all these to images, I'm doing the following after running the script:
#!/bin/bash
cd /cfg/tmp
for file in *.dot; do dot -Tsvg ${file/.dot/.svg}
zip cfg.zip *.svg
mv cfg.zip ~/
rm *
cd ~
for file in *.dot; do dot -Tsvg ${file/.dot/.svg}
This did not work for me. Don't know if this is just with my system.
However, a simple change in the command got it working. The files will be named as filename.dot.svg.
Hope this helps someone!
cd /cfg/tmp for file in *.dot; do dot -Tsvg $file -o $file.svg; done zip cfg.zip *.svg mv cfg.zip rm *.dot rm *.svg cd ~
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to run:
cd
the folder.mkdir /tmp/cfg
analyzeHeadless ~/ghidra/project tmp_pro -scriptPATH $PWD -postScript ghidraCFG.py -deleteProject -import /path/to/binary
/tmp/cfg