Skip to content

Instantly share code, notes, and snippets.

@tomrittervg
Last active May 22, 2018 13:32
Show Gist options
  • Save tomrittervg/c5e05ae1d1f470c081cfe8af4c12d4e5 to your computer and use it in GitHub Desktop.
Save tomrittervg/c5e05ae1d1f470c081cfe8af4c12d4e5 to your computer and use it in GitHub Desktop.
Really rough and ugly code to parse a PE File and DWARF Information. Doesn't support a ton of stuff, only supports enough to do the one thing I needed it to do.
#!/usr/bin/env python
DW_CHILDREN_no = 0x00
DW_CHILDREN_yes = 0x01
DW_LANGs = {}
DW_LANGs[0x0004] = 'DW_LANG_C_plus_plus'
DW_LANGs[0x000C] = 'DW_LANG_C99'
def lookup(val, db):
for k in db:
if db[k] == val:
return k
return False
def DW_FORM(val):
r = lookup(val, DW_FORMs)
if not r:
raise Exception("Could not locate the value " + hex(val) + " in the table")
return r
DW_FORMs = {}
DW_FORMs['DW_FORM_addr'] = 0x01
DW_FORMs['DW_FORM_block2'] = 0x03
DW_FORMs['DW_FORM_block4'] = 0x04
DW_FORMs['DW_FORM_data2'] = 0x05
DW_FORMs['DW_FORM_data4'] = 0x06
DW_FORMs['DW_FORM_data8'] = 0x07
DW_FORMs['DW_FORM_string'] = 0x08
DW_FORMs['DW_FORM_block'] = 0x09
DW_FORMs['DW_FORM_block1'] = 0x0a
DW_FORMs['DW_FORM_data1'] = 0x0b
DW_FORMs['DW_FORM_flag'] = 0x0c
DW_FORMs['DW_FORM_sdata'] = 0x0d
DW_FORMs['DW_FORM_strp'] = 0x0e
DW_FORMs['DW_FORM_udata'] = 0x0f
DW_FORMs['DW_FORM_ref_addr'] = 0x10
DW_FORMs['DW_FORM_ref1'] = 0x11
DW_FORMs['DW_FORM_ref2'] = 0x12
DW_FORMs['DW_FORM_ref4'] = 0x13
DW_FORMs['DW_FORM_ref8'] = 0x14
DW_FORMs['DW_FORM_ref_udata'] = 0x15
DW_FORMs['DW_FORM_indirect'] = 0x16
DW_FORMs['DW_FORM_sec_offset'] = 0x17
DW_FORMs['DW_FORM_exprloc'] = 0x18
DW_FORMs['DW_FORM_flag_present'] = 0x19
DW_FORMs['DW_FORM_ref_sig8'] = 0x20
def DW_TAG(val):
r = lookup(val, DW_TAGs)
if not r:
if val >= 0x4080 and val <= 0xffff:
return 'DW_TAG_Unknown_' + format(val, "x")
else:
raise Exception("Could not locate the value " + hex(val) + " in the table")
return r
DW_TAGs = {}
DW_TAGs['DW_TAG_array_type'] = 0x01
DW_TAGs['DW_TAG_class_type'] = 0x02
DW_TAGs['DW_TAG_entry_point'] = 0x03
DW_TAGs['DW_TAG_enumeration_type'] = 0x04
DW_TAGs['DW_TAG_formal_parameter'] = 0x05
DW_TAGs['DW_TAG_imported_declaration'] = 0x08
DW_TAGs['DW_TAG_label'] = 0x0a
DW_TAGs['DW_TAG_lexical_block'] = 0x0b
DW_TAGs['DW_TAG_member'] = 0x0d
DW_TAGs['DW_TAG_pointer_type'] = 0x0f
DW_TAGs['DW_TAG_reference_type'] = 0x10
DW_TAGs['DW_TAG_compile_unit'] = 0x11
DW_TAGs['DW_TAG_string_type'] = 0x12
DW_TAGs['DW_TAG_structure_type'] = 0x13
DW_TAGs['DW_TAG_subroutine_type'] = 0x15
DW_TAGs['DW_TAG_typedef'] = 0x16
DW_TAGs['DW_TAG_union_type'] = 0x17
DW_TAGs['DW_TAG_unspecified_parameters'] = 0x18
DW_TAGs['DW_TAG_variant'] = 0x19
DW_TAGs['DW_TAG_common_block'] = 0x1a
DW_TAGs['DW_TAG_common_inclusion'] = 0x1b
DW_TAGs['DW_TAG_inheritance'] = 0x1c
DW_TAGs['DW_TAG_inlined_subroutine'] = 0x1d
DW_TAGs['DW_TAG_module'] = 0x1e
DW_TAGs['DW_TAG_ptr_to_member_type'] = 0x1f
DW_TAGs['DW_TAG_set_type'] = 0x20
DW_TAGs['DW_TAG_subrange_type'] = 0x21
DW_TAGs['DW_TAG_with_stmt'] = 0x22
DW_TAGs['DW_TAG_access_declaration'] = 0x23
DW_TAGs['DW_TAG_base_type'] = 0x24
DW_TAGs['DW_TAG_catch_block'] = 0x25
DW_TAGs['DW_TAG_const_type'] = 0x26
DW_TAGs['DW_TAG_constant'] = 0x27
DW_TAGs['DW_TAG_enumerator'] = 0x28
DW_TAGs['DW_TAG_file_type'] = 0x29
DW_TAGs['DW_TAG_friend'] = 0x2a
DW_TAGs['DW_TAG_namelist'] = 0x2b
DW_TAGs['DW_TAG_namelist_item'] = 0x2c
DW_TAGs['DW_TAG_packed_type'] = 0x2d
DW_TAGs['DW_TAG_subprogram'] = 0x2e
DW_TAGs['DW_TAG_template_type_parameter'] = 0x2f
DW_TAGs['DW_TAG_template_value_parameter'] = 0x30
DW_TAGs['DW_TAG_thrown_type'] = 0x31
DW_TAGs['DW_TAG_try_block'] = 0x32
DW_TAGs['DW_TAG_variant_part'] = 0x33
DW_TAGs['DW_TAG_variable'] = 0x34
DW_TAGs['DW_TAG_volatile_type'] = 0x35
DW_TAGs['DW_TAG_DWarf_procedure'] = 0x36
DW_TAGs['DW_TAG_restrict_type'] = 0x37
DW_TAGs['DW_TAG_interface_type'] = 0x38
DW_TAGs['DW_TAG_namespace'] = 0x39
DW_TAGs['DW_TAG_imported_module'] = 0x3a
DW_TAGs['DW_TAG_unspecified_type'] = 0x3b
DW_TAGs['DW_TAG_partial_unit'] = 0x3c
DW_TAGs['DW_TAG_imported_unit'] = 0x3d
DW_TAGs['DW_TAG_condition'] = 0x3f
DW_TAGs['DW_TAG_shared_type'] = 0x40
DW_TAGs['DW_TAG_type_unit'] = 0x41
DW_TAGs['DW_TAG_rvalue_reference_type'] = 0x42
DW_TAGs['DW_TAG_template_alias'] = 0x43
def DW_AT(val):
r = lookup(val, DW_ATs)
if not r:
if val >= 0x2000 and val <= 0x3fff:
return 'DW_AT_Unknown_' + format(val, "x")
else:
raise Exception("Could not locate the value " + hex(val) + " in the table")
return r
DW_ATs = {}
DW_ATs['DW_AT_sibling'] = 0x01
DW_ATs['DW_AT_location'] = 0x02
DW_ATs['DW_AT_name'] = 0x03
DW_ATs['DW_AT_ordering'] = 0x09
DW_ATs['DW_AT_byte_size'] = 0x0b
DW_ATs['DW_AT_bit_offset'] = 0x0c
DW_ATs['DW_AT_bit_size'] = 0x0d
DW_ATs['DW_AT_stmt_list'] = 0x10
DW_ATs['DW_AT_low_pc'] = 0x11
DW_ATs['DW_AT_high_pc'] = 0x12
DW_ATs['DW_AT_language'] = 0x13
DW_ATs['DW_AT_discr'] = 0x15
DW_ATs['DW_AT_discr_value'] = 0x16
DW_ATs['DW_AT_visibility'] = 0x17
DW_ATs['DW_AT_import'] = 0x18
DW_ATs['DW_AT_string_length'] = 0x19
DW_ATs['DW_AT_common_reference'] = 0x1a
DW_ATs['DW_AT_comp_dir'] = 0x1b
DW_ATs['DW_AT_const_value'] = 0x1c
DW_ATs['DW_AT_containing_type'] = 0x1d
DW_ATs['DW_AT_default_value'] = 0x1e
DW_ATs['DW_AT_inline'] = 0x20
DW_ATs['DW_AT_is_optional'] = 0x21
DW_ATs['DW_AT_lower_bound'] = 0x22
DW_ATs['DW_AT_producer'] = 0x25
DW_ATs['DW_AT_prototyped'] = 0x27
DW_ATs['DW_AT_return_addr'] = 0x2a
DW_ATs['DW_AT_start_scope'] = 0x2c
DW_ATs['DW_AT_bit_stride'] = 0x2e
DW_ATs['DW_AT_upper_bound'] = 0x2f
DW_ATs['DW_AT_abstract_origin'] = 0x31
DW_ATs['DW_AT_accessibility'] = 0x32
DW_ATs['DW_AT_address_class'] = 0x33
DW_ATs['DW_AT_artificial'] = 0x34
DW_ATs['DW_AT_base_types'] = 0x35
DW_ATs['DW_AT_calling_convention'] = 0x36
DW_ATs['DW_AT_count'] = 0x37
DW_ATs['DW_AT_data_member_location'] = 0x38
DW_ATs['DW_AT_decl_column'] = 0x39
DW_ATs['DW_AT_decl_file'] = 0x3a
DW_ATs['DW_AT_decl_line'] = 0x3b
DW_ATs['DW_AT_declaration'] = 0x3c
DW_ATs['DW_AT_discr_list'] = 0x3d
DW_ATs['DW_AT_encoding'] = 0x3e
DW_ATs['DW_AT_external'] = 0x3f
DW_ATs['DW_AT_frame_base'] = 0x40
DW_ATs['DW_AT_friend'] = 0x41
DW_ATs['DW_AT_identifier_case'] = 0x42
DW_ATs['DW_AT_macro_info'] = 0x43
DW_ATs['DW_AT_namelist_item'] = 0x44
DW_ATs['DW_AT_priority'] = 0x45
DW_ATs['DW_AT_segment'] = 0x46
DW_ATs['DW_AT_specification'] = 0x47
DW_ATs['DW_AT_static_link'] = 0x48
DW_ATs['DW_AT_type'] = 0x49
DW_ATs['DW_AT_use_location'] = 0x4a
DW_ATs['DW_AT_variable_parameter'] = 0x4b
DW_ATs['DW_AT_virtuality'] = 0x4c
DW_ATs['DW_AT_vtable_elem_location'] = 0x4d
DW_ATs['DW_AT_allocated'] = 0x4e
DW_ATs['DW_AT_associated'] = 0x4f
DW_ATs['DW_AT_data_location'] = 0x50
DW_ATs['DW_AT_byte_stride'] = 0x51
DW_ATs['DW_AT_entry_pc'] = 0x52
DW_ATs['DW_AT_use_UTF8'] = 0x53
DW_ATs['DW_AT_extension'] = 0x54
DW_ATs['DW_AT_ranges'] = 0x55
DW_ATs['DW_AT_trampoline'] = 0x56
DW_ATs['DW_AT_call_column'] = 0x57
DW_ATs['DW_AT_call_file'] = 0x58
DW_ATs['DW_AT_call_line'] = 0x59
DW_ATs['DW_AT_description'] = 0x5a
DW_ATs['DW_AT_binary_scale'] = 0x5b
DW_ATs['DW_AT_decimal_scale'] = 0x5c
DW_ATs['DW_AT_small'] = 0x5d
DW_ATs['DW_AT_decimal_sign'] = 0x5e
DW_ATs['DW_AT_digit_count'] = 0x5f
DW_ATs['DW_AT_picture_string'] = 0x60
DW_ATs['DW_AT_mutable'] = 0x61
DW_ATs['DW_AT_threads_scaled'] = 0x62
DW_ATs['DW_AT_explicit'] = 0x63
DW_ATs['DW_AT_object_pointer'] = 0x64
DW_ATs['DW_AT_endianity'] = 0x65
DW_ATs['DW_AT_elemental'] = 0x66
DW_ATs['DW_AT_pure'] = 0x67
DW_ATs['DW_AT_recursive'] = 0x68
DW_ATs['DW_AT_signature'] = 0x69
DW_ATs['DW_AT_main_subprogram'] = 0x6a
DW_ATs['DW_AT_data_bit_offset'] = 0x6b
DW_ATs['DW_AT_const_expr'] = 0x6c
DW_ATs['DW_AT_enum_class'] = 0x6d
DW_ATs['DW_AT_linkage_name'] = 0x6e
DW_ATs['DW_AT_string_length_bit_size'] = 0x6f
DW_ATs['DW_AT_string_length_byte_size'] = 0x70
DW_ATs['DW_AT_rank'] = 0x71
DW_ATs['DW_AT_str_offsets_base'] = 0x72
DW_ATs['DW_AT_addr_base'] = 0x73
DW_ATs['DW_AT_rnglists_base'] = 0x74
DW_ATs['DW_AT_dwo_name'] = 0x76
DW_ATs['DW_AT_reference'] = 0x77
DW_ATs['DW_AT_rvalue_reference'] = 0x78
DW_ATs['DW_AT_macros'] = 0x79
DW_ATs['DW_AT_call_all_calls'] = 0x7a
DW_ATs['DW_AT_call_all_source_calls'] = 0x7b
DW_ATs['DW_AT_call_all_tail_calls'] = 0x7c
DW_ATs['DW_AT_call_return_pc'] = 0x7d
DW_ATs['DW_AT_call_value'] = 0x7e
DW_ATs['DW_AT_call_origin'] = 0x7f
DW_ATs['DW_AT_call_parameter'] = 0x80
DW_ATs['DW_AT_call_pc'] = 0x81
DW_ATs['DW_AT_call_tail_call'] = 0x82
DW_ATs['DW_AT_call_target'] = 0x83
DW_ATs['DW_AT_call_target_clobbered'] = 0x84
DW_ATs['DW_AT_call_data_location'] = 0x85
DW_ATs['DW_AT_call_data_value'] = 0x86
DW_ATs['DW_AT_noreturn'] = 0x87
DW_ATs['DW_AT_alignment'] = 0x88
DW_ATs['DW_AT_export_symbols'] = 0x89
DW_ATs['DW_AT_deleted'] = 0x8a
DW_ATs['DW_AT_defaulted'] = 0x8b
DW_ATs['DW_AT_loclists_base'] = 0x8c
DW_ATEs = {}
DW_ATEs[0x01] = 'DW_ATE_address'
DW_ATEs[0x02] = 'DW_ATE_boolean'
DW_ATEs[0x03] = 'DW_ATE_complex_float'
DW_ATEs[0x04] = 'DW_ATE_float'
DW_ATEs[0x05] = 'DW_ATE_signed'
DW_ATEs[0x06] = 'DW_ATE_signed_char'
DW_ATEs[0x07] = 'DW_ATE_unsigned'
DW_ATEs[0x08] = 'DW_ATE_unsigned_char'
DW_ATEs[0x09] = 'DW_ATE_imaginary_float'
DW_ATEs[0x0a] = 'DW_ATE_packed_decimal'
DW_ATEs[0x0b] = 'DW_ATE_numeric_string'
DW_ATEs[0x0c] = 'DW_ATE_edited'
DW_ATEs[0x0d] = 'DW_ATE_signed_fixed'
DW_ATEs[0x0e] = 'DW_ATE_unsigned_fixed'
DW_ATEs[0x0f] = 'DW_ATE_decimal_float'
DW_ATEs[0x10] = 'DW_ATE_UTF'
DW_ATEs[0x11] = 'DW_ATE_UCS'
DW_ATEs[0x12] = 'DW_ATE_ASCII'
#!/usr/bin/env python
import os
import sys
import struct
import binascii
import argparse
from collections import OrderedDict
from dwarf_constants import *
class DebugAbbrev:
def __init__(self, f, originalOffset, data):
self.fileRef = f
self.originalOffset = originalOffset
self.data = data
self.index = 0
self.offset_index = 0
self.offsets = {1 : 0}
def translateToFileOffset(self, entryIndx):
if entryIndx not in self.offsets:
raise Exception("Got a translation request for an offset (" + str(entryIndx) + ") I haven't seen and stored.")
return self.originalOffset + self.offsets[entryIndx]
def resetToOffset(self, indx):
self.offsets = {1 : 0}
self.index = indx
def checkOffset(self, entryIndx):
# Read the next entry and store its offset no matter what
saveIndex = self.index
nextIndx = self.read_uleb128()
self.offsets[nextIndx] = saveIndex
if entryIndx in self.offsets:
self.index = self.offsets[entryIndx]
else:
raise Exception("Got a request for an offset (" + str(entryIndx) + ") I haven't seen and stored." +
" File Offset " + str(f.tell()) + " .debug_abbrev max offset " +
str(max(self.offsets.keys())) + " file offset ") #+ str(self.translateToFileOffset(self.offsets[max(self.offsets.keys())])))
self.read_uleb128()
def read_uleb128(self):
advance, value = read_uleb128(self.data[self.index:])
self.index += advance
return value
def readBytes(f, numBytes, convert=True):
bString = f.read(numBytes)
if not convert:
return bytearray(bString)
else:
if numBytes == 1:
return struct.unpack("<B", bString)[0]
if numBytes == 2:
return struct.unpack("<H", bString)[0]
elif numBytes == 4:
return struct.unpack("<I", bString)[0]
elif numBytes == 8:
return struct.unpack("<Q", bString)[0]
else:
raise "Unknown length for conversion"
def read_uleb128(f):
value = 0
for i in xrange(0,5):
if type(f) == file:
byte_value = readBytes(f, 1)
else:
byte_value = f[i]
tmp = byte_value & 0x7f
value = tmp << (i * 7) | value
if (byte_value & 0x80) != 0x80:
break
if i == 4 and (tmp & 0xf0) != 0:
print "parse error on uleb128 number"
sys.exit(1)
return (i+1, value)
def read_leb128_(f):
mask=[0xffffff80,0xffffc000,0xffe00000,0xf0000000,0]
bitmask=[0x40,0x40,0x40,0x40,0x8]
value = 0
for i in xrange(0,5):
if type(f) == file:
byte_value = readBytes(f, 1)
else:
byte_value = f[i]
print hex(byte_value)
tmp = byte_value & 0x7f
value = tmp << (i * 7) | value
if (byte_value & 0x80) != 0x80:
if bitmask[i] & tmp:
value |= mask[i]
break
if i == 4 and (tmp & 0xf0) != 0:
print "parse error on sleb128 number at file offset ", f.tell() - 4 if type(f) == file else "(not a file)"
sys.exit(1)
buffer = struct.pack("I",value)
value, = struct.unpack("i", buffer)
return (i+1, value)
def read_leb128( f ):
ret_val = None
bytes_used = 0
cont = True
while cont:
if type(f) == file:
byte_value = readBytes(f, 1)
else:
byte_value = f[bytes_used]
val = byte_value
if(( val & 0x80 ) == 0):
cont = False
val = val & 0x7F
if ret_val is None:
ret_val = 0
ret_val = ret_val | (val << (7*bytes_used))
bytes_used = bytes_used + 1
if( val & 0x40 ):
ret_val |= (-1 << (7*bytes_used))
return (bytes_used, ret_val)
def readCString(f):
s = ""
b = str(readBytes(f, 1, False))
while b != "\x00":
s += b
b = str(readBytes(f, 1, False))
return s
def expect(f, expected, message=""):
got = readBytes(f, len(expected), False)
if expected != got:
print "Parsing", message + ": from file position", f.tell() - len(expected), "I expected to get", binascii.hexlify(expected), "but got", binascii.hexlify(got)
print message
sys.exit(1)
def discard(f, size, type=""):
got = readBytes(f, size, False)
def camel(varName):
varName = varName.title().replace(" ", "")
varName = varName[0].lower() + varName[1:]
return varName
def uncamel(varName):
a = []
i = 0
for c in varName:
if i == 0:
a.append(c.upper())
else:
if c.isupper():
a.append(" ")
a.append(c)
i+=1
return "".join(a)
def readNoPrint(strName, f, numBytes, convert=True):
varName = camel(strName)
globals()[varName] = readBytes(f, numBytes, convert)
return varName
def readPrint(strName, f, numBytes, convert=True):
varName = readNoPrint(strName, f, numBytes, convert)
print strName, globals()[varName]
def printAll(d):
print "---------------"
for k in d:
print uncamel(k), d[k]
# ================================================
def dumpStringTable(dbIsh):
i = 0
s = []
while i < len(dbIsh):
if i < 4:
pass
else:
if chr(dbIsh[i]) == "\x00":
print "".join(s)
s = []
else:
s.append(chr(dbIsh[i]))
i += 1
if len(s) > 0:
print "Leftovers in String Table:", s
def sectionNameStr(name):
s = ""
name = name.strip("\x00")
if name == ".text":
s = "Executable Code"
elif name == ".data":
s = "global initialized data".title()
elif name == ".rdata":
s = "global read-only data".title()
elif name == ".edata":
s = "export tables".title()
elif name == ".idata":
s = "import tables".title()
elif name == ".pdata":
s = "exception handling information".title()
elif name == ".xdata":
s = "exception information, free format".title()
elif name == ".reloc":
s = "information for relocation of library files".title()
elif name == ".rsrc":
s = "resources of the executable".title()
elif name == ".drective":
s = "linker options".title()
elif name == ".bss":
s = "uninitialized data, free format".title()
elif name == ".debug_aranges":
s = "Lookup table for mapping addresses to compilation units"
elif name == ".debug_frame":
s = "Call frame information"
elif name == ".debug_info":
s = "Core DWARF information section"
elif name == ".debug_line":
s = "Line number information"
elif name == ".debug_loc":
s = "Location lists used in the DW_AT_location attributes"
elif name == ".debug_macinfo":
s = "Macro information"
elif name == ".debug_pubnames":
s = "Lookup table for global objects and functions"
elif name == ".debug_pubtypes":
s = "Lookup table for global types"
elif name == ".debug_ranges":
s = "Address ranges used in the DW_AT_ranges attributes"
elif name == ".debug_str":
s = "String table used in .debug_info"
elif name == ".debug_types":
s = "Type descriptions "
if s:
return name + " (" + s + ")"
return name
def resourceTypeStr(type):
if type == 1:
return "cursor".title()
if type == 2:
return "bitmap".title()
if type == 3:
return "icon".title()
if type == 4:
return "menu".title()
if type == 5:
return "dialog box".title()
if type == 6:
return "string table entry".title()
if type == 7:
return "font directory".title()
if type == 8:
return "font".title()
if type == 9:
return "accelerator table".title()
if type == 10:
return "application defined resource (raw data)".title()
if type == 11:
return "message table entry".title()
if type == 12:
return "group cursor".title()
if type == 14:
return "group icon".title()
if type == 16:
return "version information".title()
if type == 17:
return "dlginclude".title()
if type == 19:
return "plug and play resource".title()
if type == 20:
return "VXD".title()
if type == 21:
return "animated cursor".title()
if type == 22:
return "animated icon".title()
if type == 23:
return "HTML".title()
if type == 24:
return "side-by-side assembly manifest".title()
return "Unknown"
DW_FORM_FUNCs = {}
DW_FORM_FUNCs['DW_FORM_addr'] = lambda f : readBytes(f, 4)
DW_FORM_FUNCs['DW_FORM_block2'] = lambda f : readBytes(f, readBytes(f, 2), False)
DW_FORM_FUNCs['DW_FORM_block4'] = lambda f : readBytes(f, readBytes(f, 4), False)
DW_FORM_FUNCs['DW_FORM_data2'] = lambda f : readBytes(f, 2)
DW_FORM_FUNCs['DW_FORM_data4'] = lambda f : readBytes(f, 4)
DW_FORM_FUNCs['DW_FORM_data8'] = lambda f : readBytes(f, 8)
DW_FORM_FUNCs['DW_FORM_string'] = lambda f : readCString(f)
DW_FORM_FUNCs['DW_FORM_block'] = lambda f : readBytes(f, read_uleb128(f)[1], False)
DW_FORM_FUNCs['DW_FORM_block1'] = lambda f : readBytes(f, readBytes(f, 1), False)
DW_FORM_FUNCs['DW_FORM_data1'] = lambda f : readBytes(f, 1)
DW_FORM_FUNCs['DW_FORM_flag'] = lambda f : readBytes(f, 1)
DW_FORM_FUNCs['DW_FORM_sdata'] = lambda f : read_leb128(f)[1]
DW_FORM_FUNCs['DW_FORM_strp'] = lambda f : readBytes(f, 4) if dwarfFormat == 32 else readBytes(f, 8)
DW_FORM_FUNCs['DW_FORM_udata'] = lambda f : read_uleb128(f)[1]
DW_FORM_FUNCs['DW_FORM_ref_addr'] = lambda f : Exception("DW_FORM_ref_addr not implemented")
DW_FORM_FUNCs['DW_FORM_ref1'] = lambda f : readBytes(f, 1)
DW_FORM_FUNCs['DW_FORM_ref2'] = lambda f : readBytes(f, 2)
DW_FORM_FUNCs['DW_FORM_ref4'] = lambda f : readBytes(f, 4)
DW_FORM_FUNCs['DW_FORM_ref8'] = lambda f : readBytes(f, 8)
DW_FORM_FUNCs['DW_FORM_ref_udata'] = lambda f : read_uleb128(f)[1]
DW_FORM_FUNCs['DW_FORM_indirect'] = lambda f : Exception("DW_FORM_indirect not implemented")
DW_FORM_FUNCs['DW_FORM_sec_offset'] = lambda f : readBytes(f, 4) if dwarfFormat == 32 else readBytes(f, 8)
DW_FORM_FUNCs['DW_FORM_exprloc'] = lambda f : readBytes(f, read_uleb128(f)[1], False)
DW_FORM_FUNCs['DW_FORM_flag_present'] = lambda f : 1
DW_FORM_FUNCs['DW_FORM_ref_sig8'] = lambda f : Exception("DW_FORM_ref_sig8 not implemented")
DW_ATTRIBUTE_FUNCs = {}
DW_ATTRIBUTE_FUNCs['DW_AT_language'] = lambda s : DW_LANGs[s]
DW_ATTRIBUTE_FUNCs['DW_AT_low_pc'] = lambda s : hex(s)
DW_ATTRIBUTE_FUNCs['DW_AT_high_pc'] = lambda s : hex(s)
DW_ATTRIBUTE_FUNCs['DW_AT_encoding'] = lambda s : DW_ATEs[s]
DW_ATTRIBUTE_FUNCs['DW_AT_location'] = lambda s : hex(s) if type(s) == type(1) else binascii.hexlify(s)
# ================================================
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('pefiles', metavar='F', type=argparse.FileType('rb', 0), nargs='+',
help='pe files to parse')
parser.add_argument('-d', '--dwarf', required=False, action="store_true", help='parse dwarf sections')
parser.add_argument('-s', '--dwarf-skip', required=False, action="store_true", help='skip through dwarf compilation units')
parser.add_argument('--dwarf-force', required=False, action="store_true", help='Force-parses the first few units in an errored CU')
parser.add_argument('--dwarf-ciu', required=False, type=int, action="append", help='parse this specific dwarf compilation unit')
args = parser.parse_args()
if args.dwarf_ciu:
if args.dwarf_force or args.dwarf_skip:
print "--dwarf-ciu cannot be used with other dwarf arguments"
sys.exit(1)
args.dwarf_skip = True
for f in args.pefiles:
print "==============================================="
print "Parsing", f.name
fileSize = 0
f.seek(0, os.SEEK_END)
fileSize = f.tell()
f.seek(0, os.SEEK_SET)
print "--------------------------------"
print "DOS Header"
expect(f, 'MZ')
discard(f, 0x40 - 6)
expect(f, bytearray([0x80, 0x00, 0x00, 0x00]), "PE Header Offset")
# Skip over the "This program cannot be run..."
discard(f, 0x80 - f.tell())
print "--------------------------------"
print "PE Header"
expect(f, "PE\x00\x00")
readPrint("Machine", f, 2)
if machine == 0x14c:
print "\tIMAGE_FILE_MACHINE_I386"
elif machine == 0x8664:
print "\tIMAGE_FILE_MACHINE_AMD64"
else:
print "\tUnknown Machine Type"
readPrint("Number of Sections", f, 2)
discard(f, 4, "Time Date Stamp")
readPrint("Symbol Table Pointer", f, 4)
readPrint("Number Of Symbols", f, 4)
readPrint("Size Of Optional Header", f, 2)
readPrint("Characteristics", f, 2)
# Go Parse the Symbol Table
print "--------------------------------"
print "Taking a detour and parsing the string table...."
origLocation = f.tell()
f.seek(symbolTablePointer, os.SEEK_SET)
print symbolTablePointer, "+ (", numberOfSymbols, "* 18)"
discard(f, numberOfSymbols * 18)
readPrint("String Table Size", f, 4)
f.seek(f.tell() - 4, os.SEEK_SET)
stringDatabaseIsh = readBytes(f, stringTableSize, False)
f.seek(origLocation, os.SEEK_SET)
#dumpStringTable(stringDatabaseIsh)
print "--------------------------------"
print "Optional Header"
readPrint("PE Format", f, 2)
if peFormat == 0x10b:
peFormat = "PE32"
variableAddressSize = 4
elif peFormat == 0x20b:
peFormat = "PE32+"
variableAddressSize = 8
else:
raise Exception("Unknown PE Format")
print "PE Format", peFormat
discard(f, 1, "Major Linker Version")
discard(f, 1, "Minor Linker Version")
discard(f, 4, "Size of Code")
discard(f, 4, "Size of Init Data")
discard(f, 4, "Size of UnInit Data")
discard(f, 4, "Address of Entry Point")
discard(f, 4, "Base of Code")
if peFormat == "PE32":
discard(f, 4, "Base of Data")
discard(f, variableAddressSize, "Image Base")
discard(f, 4, "Section Alignment")
discard(f, 4, "File Alignment")
discard(f, 2, "Major OS Version")
discard(f, 2, "Minor OS Version")
discard(f, 2, "Major Image Version")
discard(f, 2, "Minor Image Version")
discard(f, 2, "Major SubSystem Version")
discard(f, 2, "Minor SubSystem Version")
discard(f, 4, "Win32 Version Value")
readPrint("Size Of Image", f, 4)
readPrint("Size Of Headers", f, 4)
discard(f, 4, "checksum")
discard(f, 2, "Subsystem")
discard(f, 2, "DLL Characteristics")
discard(f, variableAddressSize, "Size of Stack Reserve")
discard(f, variableAddressSize, "Size of Stack Commit")
discard(f, variableAddressSize, "Size of Heap Reserve")
discard(f, variableAddressSize, "Size of Heap Commit")
discard(f, 4, "Loader Flags")
discard(f, 4, "Number of RVA and Sizes")
print "--------------------------------"
print "Data Directories"
directoryNames = ["Export", "Import", "Resource", "Exception", "Security", "BaseRelocationTable",
"DebugDirectory", "CopyrightOrArchitectureSpecificData", "GobalPtr", "TLSDirectory",
"LoadConfigurationDiectory", "BoundImportDirectory", "ImportAddressTable",
"DelayLoadImportDescriptors", "COMRuntimedescriptor", "Reserved"]
print len(directoryNames), len(directoryNames) * 8
dataDirectories = {}
for d in directoryNames:
dataDirectories[d] = OrderedDict([
('name', d),
('virtualAddress', readBytes(f, 4)),
('size', readBytes(f, 4))
])
if dataDirectories[d]['size']:
printAll(dataDirectories[d])
print "--------------------------------"
print "Sections"
sections = []
for i in range(numberOfSections):
readNoPrint("Section Name", f, 8, False)
readNoPrint("Virtual Size", f, 4)
readNoPrint("Virtual Address", f, 4)
readNoPrint("Size Of Raw Data", f, 4)
readNoPrint("Pointer To Raw Data", f, 4)
readNoPrint("Pointer To Relocations", f, 4)
readNoPrint("Pointer To Line Numbers", f, 4)
readNoPrint("Number Of Relocations", f, 2)
readNoPrint("Number Of Line Numbers", f, 2)
readNoPrint("Characteristics", f, 4)
realSectionName = str(sectionName).strip("\x00")
if realSectionName and realSectionName[0] == "/":
indx = int(realSectionName[1:])
s = stringDatabaseIsh[indx:]
s_end = s.find("\x00")
realSectionName = s[:s_end]
section = OrderedDict([
('sectionName', realSectionName),
('sectionNameOriginal', str(sectionName)),
('sectionNameDetailed', sectionNameStr(realSectionName)),
('virtualSize', virtualSize),
('virtualAddress', virtualAddress),
('sizeOfRawData', sizeOfRawData),
('pointerToRawData', pointerToRawData),
('pointerToRelocations', pointerToRelocations),
('pointerToLineNumbers', pointerToLineNumbers),
('numberOfRelocations', numberOfRelocations),
('numberOfLineNumbers', numberOfLineNumbers),
('characteristics', characteristics)
])
printAll(section)
sections.append(section)
if pointerToRawData > fileSize:
print "Error: Pointer To Raw Data for this section is greater than the filesize!"
sys.exit(1)
elif pointerToRawData + sizeOfRawData > fileSize:
print "Error: Size of Raw Data for this section is greater than the filesize!"
sys.exit(1)
print "--------------------------------"
print "Individual Sections"
for s in sections:
if s['sectionName'] == ".debug_abbrev":
print "--------------------------------"
print ".debug_abbrev"
f.seek(s['pointerToRawData'], os.SEEK_SET)
debug_abbrev = DebugAbbrev(f, s['pointerToRawData'], readBytes(f, s['sizeOfRawData'], False))
debug_abbrev_size = s['sizeOfRawData']
for s in sections:
if s['sectionName'] == ".debug_info":
print "--------------------------------"
print ".debug_info"
f.seek(s['pointerToRawData'], os.SEEK_SET)
while f.tell() < s['pointerToRawData'] + s['sizeOfRawData']:
compileUnitBegin = f.tell()
dwarfFormat = 32
readNoPrint("Compile Unit Length", f, 4)
if compileUnitLength == 0xFFFFFFFF:
dwarfFormat = 64
readNoPrint("Compile Unit Length", f, 8)
if compileUnitLength == 0:
print "I am", (s['pointerToRawData'] + s['sizeOfRawData']) - f.tell(), "bytes away from the end of the section."
print "Reading zero's until I reach it."
while f.tell() < s['pointerToRawData'] + s['sizeOfRawData']:
b = readBytes(f, 1)
if b != 0:
raise Exception("I was reading padding but then I found a non-zero byte at file offset " + str(f.tell()-1))
continue
readNoPrint("Version", f, 2)
if version not in (2, 4):
raise Exception("I can't parse DWARF Version " + str(version))
if dwarfFormat == 32:
readNoPrint("Debug Abbrev Offset", f, 4)
else:
readNoPrint("Debug Abbrev Offset", f, 8)
debug_abbrev.resetToOffset(debugAbbrevOffset)
readNoPrint("Address Size", f, 1)
nextUnitAt = compileUnitBegin + compileUnitLength + 4
thisUnitErrors = debugAbbrevOffset > debug_abbrev_size
print "0x" + format(compileUnitBegin - s['pointerToRawData'], "08x") + ": Compile Unit: length =",
print "0x" + format(compileUnitLength, "08x"), "version = 0x" + format(version, "04x"),
print "abbr_offset = 0x" + format(debugAbbrevOffset, "04x"), "addr_size = 0x" + format(addressSize, "02x"),
print "(next unit at 0x" + format(nextUnitAt - s['pointerToRawData'], "08x") + ")", "file position =", compileUnitBegin,
print "__ERROR__" if thisUnitErrors else ""
if args.dwarf_skip:
if not args.dwarf_ciu or compileUnitBegin not in args.dwarf_ciu:
f.seek(nextUnitAt, os.SEEK_SET)
elif args.dwarf_force and not thisUnitErrors:
f.seek(nextUnitAt, os.SEEK_SET)
indent = ""
while f.tell() < nextUnitAt:
if not thisUnitErrors:
_, tagOffset = read_uleb128(f)
if tagOffset == 0:
if len(indent) >= 2:
indent = indent[:-2]
continue
debug_abbrev.checkOffset(tagOffset)
top_tag = DW_TAG(debug_abbrev.read_uleb128())
children = debug_abbrev.read_uleb128() == DW_CHILDREN_yes
if args.dwarf or args.dwarf_ciu:
print indent, top_tag, "[" + str(tagOffset) + "]", "*" if children else ""
indent += " "
while True:
attribute_name = debug_abbrev.read_uleb128()
#print indent, "Attribute Name", hex(attribute_name)
attribute_form = debug_abbrev.read_uleb128()
#print indent, "Attribute Form", hex(attribute_form)
if attribute_name == 0 and attribute_form == 0:
if not children:
indent = indent[:-2]
break
try:
value = DW_FORM_FUNCs[DW_FORM(attribute_form)](f)
if args.dwarf or args.dwarf_ciu:
print indent, (DW_AT(attribute_name) + " [" + hex(attribute_name) + "]").ljust(30),
print (DW_FORM(attribute_form) + " [" + hex(attribute_form) + "]").ljust(30),
print DW_ATTRIBUTE_FUNCs.get(DW_AT(attribute_name), lambda s:s)(value)
except:
print "Exception raised."
print "\t File position:", f.tell()
print "\t Top Tag", top_tag, "[" + str(tagOffset) + "]", "*" if children else ""
print "\t Debug Abbrev Offset", debug_abbrev.translateToFileOffset(tagOffset)
print "\t Attribute Name", hex(attribute_name)
print "\t Attribute Form", hex(attribute_form)
print "\t Attribute Name", DW_AT(attribute_name)
print "\t Attribute Form", DW_FORM(attribute_form)
if args.dwarf:
print ""
else: # thisUnitErrors
_, tagOffset = read_uleb128(f)
if tagOffset == 0:
if len(indent) >= 2:
indent = indent[:-2]
continue
# Guesses
# Type to parse based on common first tags, using DW_AT_language as an
# indicator if this was successfull or not
def langBased(f):
output = ""
indent = ""
top_tag = 'DW_TAG_compile_unit'
children = True
output += indent + " " + top_tag + " [" + str(tagOffset) + "] " + ("*\n" if children else "\n")
indent += " "
children = [
(0x25, 0x08), #('DW_AT_producer', 'DW_FORM_string'),
(0x13, 0x0b), #('DW_AT_language', 'DW_FORM_data1'),
(0x03, 0x08), #('DW_AT_name', 'DW_FORM_string'),
(0x1b, 0x08), #('DW_AT_comp_dir', 'DW_FORM_string')
]
for c in children:
attribute_name = c[0]
attribute_form = c[1]
try:
value = DW_FORM_FUNCs[DW_FORM(attribute_form)](f)
output += indent + " Recovered: " + (DW_AT(attribute_name) + " [" + hex(attribute_name) + "]").ljust(30)
output += (DW_FORM(attribute_form) + " [" + hex(attribute_form) + "]").ljust(30)
output += DW_ATTRIBUTE_FUNCs.get(DW_AT(attribute_name), lambda s:s)(value) + "\n"
except:
return False
print output
return True
def stringScan(f):
byteLimit = 250
bytesRead = 0
output = ""
this_str = ""
while bytesRead < byteLimit or not this_str:
b = readBytes(f, 1)
if 0x20 <= b and b <= 0x7E:
this_str += chr(b)
else:
if this_str:
# If a string ends in a null byte we believe it is a string
if b == 0 and len(this_str) > 1:
output += " Recovered: " + this_str + "\n"
this_str = ""
# If it didn't end in a null byte, we assume it was a fluke
else:
this_str = ""
bytesRead += 1
print output
return True
guessTypes = [langBased, stringScan]
for g in guessTypes:
if not g(f):
continue
else:
break
f.seek(nextUnitAt, os.SEEK_SET)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment