Skip to content

Instantly share code, notes, and snippets.

@alexander-hanel
Created September 24, 2018 21:29
Show Gist options
  • Save alexander-hanel/7ee68959b80fb9d23acc10a8e583a3cc to your computer and use it in GitHub Desktop.
Save alexander-hanel/7ee68959b80fb9d23acc10a8e583a3cc to your computer and use it in GitHub Desktop.
a simple recursive traversal disassembly using capstone and pefile. Only follows code execution.
import sys
import re
import pefile
import string
import struct
from capstool import CapsTool
from capstone import *
from capstone.x86 import *
BCC = ["je", "jne", "js", "jns", "jp", "jnp", "jo", "jno", "jl", "jle", "jg",
"jge", "jb", "jbe", "ja", "jae", "jcxz", "jecxz", "jrcxz", "loop", "loopne",
"loope", "call", "lcall"]
END = ["ret", "retn", "retf", "iret", "int3"]
BNC = ["jmp", "jmpf", "ljmp"]
def get_pe_data(_data):
bit = 0
try:
pe = pefile.PE(data=_data)
pe_entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint
rva = pe_entry_point - pe.OPTIONAL_HEADER.ImageBase
entry_point = pe.get_offset_from_rva(pe_entry_point)
except Exception as e:
print e
return False, None, None
if pe.FILE_HEADER.Machine == 0x14c:
bit = 32
elif pe.FILE_HEADER.Machine == 0x8664:
bit = 64
else:
return False, None, None
return True, entry_point, bit
def to_signed_32(n):
n = n & 0xffffffff
return (n ^ 0x80000000) - 0x80000000
def to_signed_64(n):
n = n & 0xffffffffffffffff
return (n ^ 0x8000000000000000) - 0x8000000000000000
def get_op_dist(bit, addr):
opp = cs.get_operand_value(addr, 0)
# check if operand is a register or some other non-int value
if not isinstance(opp, int):
return False, None
# convert to unsigned int based off of bit
elif bit == 32:
op_dist = to_signed_32(opp)
elif bit == 64:
op_dist = to_signed_64(opp)
return True, op_dist
def get_false_key(addr_bcc):
for key in addr_bcc:
if addr_bcc[key] is False:
return True, key
return False, None
def disassemble(addr, cs, debug=False):
visited = []
addr_bcc = {}
strings = {}
while True:
instr = cs.get_mnem(addr)
if debug:
print hex(addr), instr , addr_bcc # , [hex(x) for x in visited]
if instr is None or cs.dword(addr) == 0x0:
status, t_addr = get_false_key(addr_bcc)
if status:
addr = t_addr
continue
else:
break
if addr in addr_bcc:
if addr_bcc[addr] is False:
addr_bcc[addr] = True
else:
status, t_addr = get_false_key(addr_bcc)
if status:
addr = t_addr
continue
else:
break
if addr not in visited:
visited.append(addr)
if instr in BNC:
status, op_dist = get_op_dist(bit, addr)
if status:
addr = addr + op_dist
if addr in visited:
if addr in addr_bcc:
if addr_bcc[addr] is False:
addr_bcc[addr] = True
else:
addr_bcc[addr] = False
status, t_addr = get_false_key(addr_bcc)
if status:
addr = t_addr
continue
continue
elif instr in BCC:
if cs.word(addr) != 0x15ff:
status, op_dist = get_op_dist(bit, addr)
if status:
cal_addr = addr + op_dist
if cal_addr not in addr_bcc:
if cal_addr not in visited:
addr_bcc[cal_addr] = False
if cs.byte(cal_addr - 1) == 0x00:
temp_data = cs.get_many_bytes(addr + 5, op_dist - 6)
if temp_data:
if all(c in string.printable for c in temp_data):
strings[addr] = temp_data
status, t_addr = get_false_key(addr_bcc)
if status:
addr = t_addr
continue
elif instr in END:
status, t_addr = get_false_key(addr_bcc)
if status:
addr = t_addr
continue
else:
break
addr = cs.next_head(addr)
return visited, strings
with open(sys.argv[1], "rb") as infile:
data = infile.read()
status, addr, bit = get_pe_data(data)
cs = CapsTool("\x00\x00" + data[2:], bit)
yy, ss = disassemble(addr, cs)
for x in yy:
print hex(x), cs.get_disasm(x)
print ss
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment