Skip to content

Instantly share code, notes, and snippets.

@jquirke
Forked from joevt/joedwarftohpt.py
Last active October 30, 2022 19:18
Show Gist options
  • Save jquirke/132d18b7bba9e3f96368bbc43230b755 to your computer and use it in GitHub Desktop.
Save jquirke/132d18b7bba9e3f96368bbc43230b755 to your computer and use it in GitHub Desktop.
Python script to process dSYM information from dwarfdump output
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import tempfile
import subprocess
import re
import uuid
import pprint
#import lldb
#=========================================================================================
class Hopper(dict):
BaseTypes = [
# Base types that exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72c00", "name":"void" , "size":None , "preferred":False, "encoding":None },
{"uuid":"054086d7b17b4685971643925db72c01", "name":"int8_t" , "size":1 , "preferred":True , "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c02", "name":"uint8_t" , "size":1 , "preferred":True , "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c03", "name":"int16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c04", "name":"uint16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c05", "name":"int32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c06", "name":"uint32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c07", "name":"int64_t" , "size":8 , "preferred":True , "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c08", "name":"uint64_t" , "size":8 , "preferred":True , "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c09", "name":"float" , "size":4 , "preferred":True , "encoding":"DW_ATE_float" },
{"uuid":"054086d7b17b4685971643925db72c0a", "name":"double" , "size":8 , "preferred":True , "encoding":"DW_ATE_float" },
{"uuid":"054086d7b17b4685971643925db72c0b", "name":"int" , "size":None , "preferred":False, "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c0c", "name":"unsigned int" , "size":None , "preferred":False, "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c0d", "name":"long" , "size":8 , "preferred":False, "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c0e", "name":"unsigned long" , "size":8 , "preferred":False, "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c0f", "name":"long long" , "size":8 , "preferred":False, "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c10", "name":"unsigned long long" , "size":8 , "preferred":False, "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c11", "name":"char" , "size":1 , "preferred":True , "encoding":"DW_ATE_signed_char" },
{"uuid":"054086d7b17b4685971643925db72c12", "name":"short" , "size":2 , "preferred":False, "encoding":"DW_ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c13", "name":"unsigned char" , "size":1 , "preferred":True , "encoding":"DW_ATE_unsigned_char"},
{"uuid":"054086d7b17b4685971643925db72c14", "name":"unsigned short" , "size":2 , "preferred":False, "encoding":"DW_ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c15", "name":"bool" , "size":1 , "preferred":True , "encoding":"DW_ATE_boolean" },
# Base types that don't exist in Hopper with substitutes that exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72c04", "name":"char16_t" , "size":2 , "preferred":True , "encoding":"DW_ATE_UTF" },
{"uuid":"054086d7b17b4685971643925db72c06", "name":"char32_t" , "size":4 , "preferred":True , "encoding":"DW_ATE_UTF" },
# Base types that don't exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72e00", "name":"long double" , "size":16 , "preferred":True , "encoding":"DW_ATE_float" },
]
class Type(dict):
# Type : 16 byte type uuid, 4 byte len + name, 2 byte type
TYPE_pointer = 0x0011 # type uuid
TYPE_struct = 0x0012 # 4 byte numfields * { 4 byte len + name, type uuid, byte format, 4 byte len + comment } null
TYPE_union = 0x0013 # 4 byte numUnions * { 4 byte len + name, type uuid, byte format, 4 byte null }
TYPE_array = 0x0014 # 4 byte count, type uuid
TYPE_typedef = 0x0015 # 4 byte len + name, type uuid
TYPE_function = 0x001b # flag1 (ff), return type uuid, 2 byte numParams * {4 byte len + name, type uuid, byte format? }, ff=variadic, extra1 (6 null bytes), ff=no return, extra2 (0700=user input, or 0100=header import)
TYPE_enumeration = 0x001c # extra1 (00=user enum, 04=built-in enums), 4 byte numEnums * { 4 byte len + name, 8 byte signed value }
FORMAT_DEFAULT = 0
FORMAT_HEXADECIMAL = 1
FORMAT_DECIMAL = 2
FORMAT_OCTAL = 3
FORMAT_CHARACTER = 4
FORMAT_STACKVARIABLE = 5
FORMAT_OFFSET = 6
FORMAT_ADDRESS = 7
FORMAT_FLOAT = 8
FORMAT_BINARY = 9
FORMAT_STRUCTURED = 10
FORMAT_ENUM = 11
FORMAT_ADDRESS_DIFF=12
FORMAT_NEGATE = 0x20
FORMAT_LEADINGZEROES = 0x40
FORMAT_SIGNED = 0x80
Types = []
UUIDs = {}
def NewUUID(self, DIE):
if hasattr(DIE, 'uuid'):
print ("Error: uuid already created :0x%08x:" % DIE.address)
else:
if DIE.compile_unit.AT_comp_dir in DIE.compile_unit.AT_name:
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_name + (":0x%08x" % DIE.address))
else:
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_comp_dir + DIE.compile_unit.AT_name + (":0x%08x" % DIE.address))
if DIE.uuid in self.UUIDs:
print ("Error: uuid collision :0x%08x:" % DIE.address)
else:
self.UUIDs[DIE.uuid] = DIE
def AddType(self, name, typetype, DIE):
type = self.Type()
type.type = typetype
type.DIE = DIE
type.name = name
type.DIE.type = type
self.NewUUID(type.DIE)
self.Types.append(type)
return type
def AddFunctionPointer(self, DIE, name, attype):
# Find artifical
# ^0x\w+: +TAG_formal_parameter.*\n( +AT_.*\n)* +AT_artificial.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_function, DIE)
if attype.HasType():
type.returntype = attype.GetType()
else:
type.returntype = None
type.variadic = False
type.params = []
for child in attype.children:
if child.tag == "TAG_formal_parameter":
if child.HasType():
if hasattr(child, "AT_artificial"):
if hasattr(child, "AT_name"):
type.params.append({"name":child.AT_name, "attype":child.GetType()})
else:
type.params.append({"name":"this", "attype":child.GetType()})
elif hasattr(child, "AT_name"):
type.params.append({"name":child.AT_name, "attype":child.GetType()})
else:
type.params.append({"name":None, "attype":child.GetType()})
else:
print("Error: unknown parameter type :0x%08x:" % child.address)
elif child.tag == "TAG_unspecified_parameters":
type.variadic = True
else:
print("Error: unknown parameter type :0x%08x:" % child.address)
def AddPointerToMember(self, DIE, name, attype):
# Find TAG_ptr_to_member_type
# ^0x\w+: +TAG_ptr_to_member_type.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_struct, DIE)
DIEf = DIEDict()
DIEf.dSYM = DIE.dSYM
DIEf.address = DIE.address+1
DIEf.tag = "TAG_pointer_type"
DIEf.attype = DIE.attype
DIEf.compile_unit = DIE.compile_unit
DIEf.dSYM.DIELookup[DIEf.address] = DIEf
DIEc = DIEDict()
DIEc.dSYM = DIE.dSYM
DIEc.address = DIE.address+2
DIEc.tag = "TAG_pointer_type"
DIEc.attype = DIE.GetContainerType()
DIEc.compile_unit = DIE.compile_unit
DIEc.dSYM.DIELookup[DIEc.address] = DIEc
AddFunctionPointer(DIEf, None, attype)
AddPointer(DIEc, None, DIEc.attype)
type.fields = [{"name":None, "attype":DIEf}, {"name":None, "attype":DIEc}]
def AddPointer(self, DIE, name, attype):
type = self.AddType(name, self.Type.TYPE_pointer, DIE)
type.attype = attype
def AddStruct(self, DIE, name, child):
# Find multiple inheritance
# 0x\w+: +TAG_inheritance.*\n( +AT_.*\n)*\n0x(\w+): +TAG_inheritance.*
# Find bit fields
# 0x\w+: +TAG_.*\n( +AT_.*\n)* +AT_\w*bit_offset.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_struct, DIE)
type.fields = []
def AddClass(self, DIE, name, child):
type = self.AddType(name, self.Type.TYPE_struct, DIE)
type.fields = []
def AddTypedef(self, DIE, name, attype, atuuid):
# attype is None for base type, use uuid instead
type = self.AddType(name, self.Type.TYPE_typedef, DIE)
type.attype = attype
type.atuuid = uuid
def AddBaseType(self, DIE):
found = False
for basetype in self.BaseTypes:
#print(basetype)
if basetype['name'] == DIE.AT_name and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding:
found = True
DIE.uuid = basetype['uuid']
DIE.baseHopperType = True
break
if found == False:
found = False
for basetype in self.BaseTypes:
if basetype['preferred'] == True and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding:
found = True
self.AddTypedef(DIE, DIE.AT_name, None, basetype['uuid'])
break
if found == False:
print("Error: cannot find a base type :0x%08x:" % DIE.address)
def AddArray(self, DIE, name, attype):
# Find multiple sub ranges examples:
# ^0x\w+:([ ]+)TAG_subrange_type.*\n( +AT_.*\n)*\n0x\w+:\1TAG
curType = attype
for i in range(len(DIE.children) - 1, 0, -1):
child = DIE.children[i]
if child.tag == "TAG_subrange_type":
if hasattr(child, "AT_count"):
if i == 0:
type = self.AddType(name, self.Type.TYPE_aray, DIE)
else:
type = self.AddType(None, self.Type.TYPE_aray, child)
type.attype = curType
type.count = DIE.AT_count
else:
print ("Error getting count :0x%08x:" % child.address)
else:
print ("Error getting count :0x%08x:" % self.address)
curType = child
def AddEnumeration(self, DIE, name):
# Find multiple enumerations examples:
# ^0x\w+:([ ]+)TAG_enumerator.*\n( +AT_.*\n)*\n0x\w+:\1TAG
type = self.AddType(name, self.Type.TYPE_enumeration, DIE)
type.size = DIE.AT_byte_size
type.enumerations = []
for child in DIE.children:
if child.tag == "TAG_enumerator":
enumerations.append({"name":child.AT_name, "value":child.AT_const_value})
else:
print ("Error getting enumeration :0x%08x:" % child.address)
def DumpHex(self):
# create types for unknown base types such as "long double"
# go through all Types and delete duplicates
# go through all pointers, if pointer to hopper base type then replace pointer uuid with base type uuid
# pointer with no type void *
return
#=========================================================================================
class lldb(dict):
eTypeClassClass = -1
eTypeClassUnion = -2
eTypeClassStruct = -3
#=========================================================================================
class DIEDict(dict):
def GetOffsetInBytes(self):
if hasattr(self, "AT_data_member_location"):
if (self.AT_data_member_location.__class__.__name__ != "int"):
print("Error in tag :%08x: %s (AT_data_member_location) containing value (%s)" % (self.address, self.tag, self.AT_data_member_location))
return 0
return self.AT_data_member_location
if hasattr(self, "AT_data_bit_offset"):
return self.AT_data_bit_offset >> 3
return None
def GetOffsetInBits(self):
if hasattr(self, "AT_bit_offset"):
return self.AT_bit_offset
if hasattr(self, "AT_data_bit_offset"):
return self.AT_data_bit_offset & 7
return 0
def GetClass(self):
ctype = None
if self.tag == "TAG_class_type":
ctype = lldb.eTypeClassClass
elif self.tag == "TAG_union_type":
ctype = lldb.eTypeClassUnion
elif self.tag == "TAG_structure_type":
ctype = lldb.eTypeClassStruct
return ctype
def GetNumberOfDirectBaseClasses(self):
if not hasattr(self, "DirectBaseClasses"):
return 0
return len(self.DirectBaseClasses)
def GetDirectBaseClassAtIndex(self, i):
return self.DirectBaseClasses[i]
def GetNumberOfVirtualBaseClasses(self):
if not hasattr(self, "VirtualBaseClasses"):
return 0
return len(self.VirtualBaseClasses)
def GetVirtualBaseClassAtIndex(self, i):
return self.VirtualBaseClasses[i]
def GetNumberOfFields(self):
if not hasattr(self, "Fields"):
return 0
return len(self.Fields)
def GetFieldAtIndex(self, i):
return self.Fields[i]
def HasType(self):
if hasattr(self, "AT_type"):
return True
return False
def GetType(self):
if self.HasType():
return self.dSYM.DIELookup[self.AT_type]
else:
print("Error getting type :0x%08x:" % self.address)
return None
def GetBaseType(self):
if self.HasType():
result = self.GetType()
if result.tag == "TAG_typedef":
return result.GetBaseType()
return result
else:
print("Error getting base type :0x%08x:" % self.address)
return None
def GetContainerType(self):
if hasattr(self, "AT_containing_type"):
return self.dSYM.DIELookup[self.AT_containing_type]
else:
print("Error getting conter type :0x%08x:" % self.address)
return None
def GetNameForType(self, forType):
if not forType and hasattr(self, "AT_name"):
return self.AT_name
elif self.tag == "TAG_structure_type":
return "(anonymous struct)"
elif self.tag == "TAG_union_type":
return "(anonymous union)"
elif self.tag == "TAG_class_type":
return "(anonymous class)"
elif self.tag == "TAG_enumeration_type":
return "(anonymous enum)"
elif self.tag == "TAG_const_type":
if self.HasType():
return "const " + self.GetType().GetName()
else:
return "const void"
elif self.tag == "TAG_volatile_type":
if self.HasType():
return "volatile " + self.GetType().GetName()
else:
return "volatile void"
elif self.tag == "TAG_pointer_type":
if self.HasType():
result = self.GetType().GetName()
if result[-1:] == "*":
return result + "*"
else:
return result + " *"
else:
return "void *"
elif self.tag == "TAG_reference_type":
if self.HasType():
return "&" + self.GetType().GetName()
else:
return "& void"
elif self.tag == "TAG_ptr_to_member_type":
if self.HasType():
result = self.GetType().GetName()
else:
print("Error getting type :0x%08x:" % self.address)
result = "void"
if result[-1:] == "*":
return result + "*"
else:
return result + " *"
elif self.tag == "TAG_array_type":
counts = self.GetCounts()
if self.HasType():
arrtype = self.GetType().GetName()
else:
print("Error getting type :0x%08x:" % self.address)
arrtype = "void"
countstr = ""
for count in counts:
if count == None:
countstr += "[]"
else:
countstr += "[%d]" % count
return arrtype + countstr
elif self.tag == "TAG_subroutine_type" or self.tag == "TAG_subprogram":
if self.HasType():
returntype = self.GetType().GetName()
else:
returntype = "void"
i = 0
result = returntype + " ()("
for child in self.children:
i += 1
if child.HasType():
if hasattr(child, "AT_artificial"):
if i < len(self.children):
result += "/* " + child.GetType().GetName() + ", */ "
else:
result += "/* " + child.GetType().GetName() + " */"
else:
result += child.GetType().GetName()
if i < len(self.children):
result += ", "
elif child.tag == "TAG_unspecified_parameters":
result += "..."
else:
print("Error: unknown parameter type :0x%08x:" % self.address)
result += ")"
return result
elif self.tag == "TAG_member":
if self.GetType().GetClass() == lldb.eTypeClassUnion:
return "" # unnamed union member
else:
print("Error getting name for :0x%08x: %s" % (self.address, self.tag))
return ""
def GetName(self):
return self.GetNameForType(False)
def GetByteSizeForAlign(self, forAlign, class_depth=0):
if forAlign and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type" or self.tag == "TAG_union_type"):
max_align = 1
numClasses = self.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = self.GetDirectBaseClassAtIndex(i)
m_type = member.GetBaseType()
m_align = m_type.GetAlign(class_depth+1)
if m_align > max_align:
max_align = m_align
numFields = self.GetNumberOfFields()
for i in range(numFields):
member = self.GetFieldAtIndex(i)
m_type = member.GetBaseType()
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
if class_depth == 0 and hasattr(self, "AllVirtualBaseClasses"):
for virtualbaseclassinfo in self.AllVirtualBaseClasses:
member = virtualbaseclassinfo.member
m_type = member.GetBaseType()
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
return max_align
if hasattr(self, "AT_byte_size"):
if self.AT_byte_size == 1 and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type") and not hasattr(self, "Fields"):
# classes have size 1 when they don't contain any fields
return 0
#print("byte size for :0x%08x: class_depth:%d forAlign:%d" % (self.address, class_depth, forAlign))
return self.AT_byte_size
if self.tag == "TAG_const_type":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_volatile_type":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_typedef":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_pointer_type":
return self.compile_unit.addr_size
if self.tag == "TAG_ptr_to_member_type":
return self.compile_unit.addr_size * 2
if self.tag == "TAG_array_type":
if forAlign:
return self.GetType().GetAlign()
size = self.GetType().GetByteSize()
counts = self.GetCounts()
for count in counts:
if count == None:
count = 1
size *= count
return size
if self.HasType():
if forAlign:
return self.GetType().GetAlign()
return self.GetType().GetByteSize()
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"):
return (self.AT_data_bit_offset & 7 + self.AT_bit_size) >> 3
print("Error getting byte size for :0x%08x: %s %s class_depth:%d forAlign:%d" % (self.address, self.tag, self.AT_name, class_depth, forAlign))
return 0
def GetByteSize(self):
return self.GetByteSizeForAlign(False)
def GetCompactSize(self, class_depth=0):
# doesn't include virtual classes
if self.tag == "TAG_class_type" or self.tag == "TAG_structure_type":
m_offset = None
numFields = self.GetNumberOfFields()
if numFields > 0:
member = self.GetFieldAtIndex(numFields-1)
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"):
m_offset = (member.AT_data_bit_offset + member.AT_bit_size + 7) >> 3
else:
m_offset = member.GetOffsetInBytes() + member.GetByteSize()
if m_offset == None:
numClasses = self.GetNumberOfDirectBaseClasses()
if numClasses > 0:
member = self.GetDirectBaseClassAtIndex(numClasses - 1)
m_type = member.GetBaseType()
m_offset = member.GetOffsetInBytes() + member.GetCompactSize(class_depth + 1)
if m_offset == None:
m_offset = 0
return m_offset
return self.GetByteSize()
def GetAlign(self, class_detph=0):
m_size = self.GetByteSizeForAlign(True, class_detph)
if m_size >= 8:
m_align = 8
elif m_size >= 4:
m_align = 4
elif m_size >= 2:
m_align = 2
else:
m_align = 1
return m_align
def GetCounts(self):
counts = []
for child in self.children:
if child.tag == "TAG_subrange_type":
if hasattr(child, "AT_count"):
counts.append(child.AT_count)
else:
counts.append(None)
else:
print ("Error getting count :0x%08x:" % self.address)
return counts
def GetBitSize(self):
if hasattr(self, "AT_bit_size"):
return self.AT_bit_size
else:
return self.GetByteSize() * 8
#=========================================================================================
class VTableInfo(dict):
pass
class VTableItem(dict):
pass
class DerivationItem(dict):
pass
class VirtualBaseClassInfo(dict):
pass
#=========================================================================================
class DSYM_Reader:
compileUnitRE = re.compile('(0x[0-9a-f]+): Compile Unit: .* addr_size = (0x[0-9a-f]+) .*\n')
tagRE = re.compile('(0x[0-9a-f]+):( +)(?:Unknown )?(?:DW_)?(TAG(?:_\w+| constant: 0x[0-9a-f]+)).*\n')
nullRE = re.compile('(0x[0-9a-f]+): +(NULL)\n')
blankRE = re.compile('\n')
AT_locationRE = re.compile(' +(.*?) *(\))?\n')
AT_rangesRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(\)?))| *(End \))\n')
AT_byte_sizeRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(?::?[^)\n]*)(\)?))| *(End \))\n')
AT_REList = [
re.compile(' +(?:DW_)?(AT_location)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until ')\n' is found
re.compile(' +(?:DW_)?(AT_ranges)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until 'End )\n' is found
re.compile(' +(?:DW_)?(AT_byte_size)\t?\( *(0x[0-9a-f]+):? *(\n)'), # loop until 'End )\n' is found
re.compile(' +(?:DW_)?(AT_type)\t?\( *\{(0x[0-9a-f]+)\} \( .*? *\)\n'),
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *<(0x[0-9a-f]+)> ([0-9a-f]{2}) ([0-9a-f]{2})(?: ([0-9a-f]{2})?)? *\)\n'),
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *(?:DW_)?(OP_constu) (0x[0-9a-f]+) *\)\n'),
re.compile(' +(?:DW_)?(AT_data_member_location)\t?\( *(?:DW_)?(OP_plus_uconst) (0x[0-9a-f]+) *\)\n'), # found this in 10.10.5_14F2511 kernel
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(0x[0-9a-f]+)\}".*" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\[(.*)\] *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(.*)\} *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *"(.*)" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(0x\w+) ".*" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(.*) *\)\n'),
]
neghexRE = re.compile('^0x[8-9a-f][0-9a-f]{15} *$')
hexRE = re.compile('^(0x[0-9a-f]+):? *$')
decRE = re.compile('^[-+]?\d+ *$')
def ReadDIEList(self, f, parent, dSYM):
indent = None
unexpectedlist = False
if len(parent.children) == 1:
indent = parent.children[0].indent
elif len(parent.children) != 0:
print("Error: unexpected list:0x%08x %s" % (parent.address, parent.tag))
for child in parent.children:
print(" :0x%08x %s" % (child.address, child.tag))
unexpectedlist = True
while True:
DIE = self.ReadNextDIE(f, dSYM)
if DIE == None:
break
if unexpectedlist:
print("Error: first item of unexpected list %s :0x%08x" % (DIE.tag, DIE.address))
unexpectedlist = False
if indent == None:
indent = DIE.indent
if DIE.indent > indent:
# indent increased, this record is the first child of the last added record
#print("{ %d" % DIE.indent)
DIE.parent = parent.children[-1]
DIE.parent.children.append(DIE)
elif DIE.indent < indent:
print("Error: indentation")
break
else:
DIE.parent = parent
parent.children.append(DIE)
if DIE.tag == "TAG_inheritance":
if hasattr(DIE, "AT_virtuality"):
if not hasattr(DIE.parent, "VirtualBaseClasses"):
DIE.parent.VirtualBaseClasses = []
DIE.parent.VirtualBaseClasses.append(DIE)
#print("Adding virtual base class :0x%08x:" % DIE.address)
else:
if not hasattr(DIE.parent, "DirectBaseClasses"):
DIE.parent.DirectBaseClasses = []
DIE.parent.DirectBaseClasses.append(DIE)
elif DIE.tag == "TAG_friend":
if not hasattr(DIE.parent, "Friends"):
DIE.parent.Friends = []
DIE.parent.Friends.append(DIE)
elif hasattr(DIE, "AT_data_member_location") or hasattr(DIE, "AT_data_bit_offset"):
if not hasattr(DIE.parent, "Fields"):
DIE.parent.Fields = []
DIE.parent.Fields.append(DIE)
elif hasattr(DIE, "AT_vtable_elem_location"):
if not hasattr(DIE.parent, "VTableEntries"):
DIE.parent.VTableEntries = {}
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries:
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName()
namenew = DIE.GetName()
print("Error: duplicate VTableEntries 0x%x %s %s" % (DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, nameold, namenew))
# workaround problem for Mammal and WingedAnimal examples
if DIE.AT_vtable_elem_location == 0 and namenew[:1] == "~" and not nameold[:1] == "~" and not 1 in DIE.parent.VTableEntries:
DIE.parent.VTableEntries[1] = DIE
else:
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE
if DIE.indent > indent:
self.ReadDIEList(f, DIE.parent, dSYM)
#print("} %d" % DIE.indent)
def ReadNextDIE(self, f, dSYM):
DIE = None
for line in f:
#print (line)
if self.nullRE.match(line):
break
c = self.compileUnitRE.match(line)
if c:
addr_size = int(c.group(2), 16)
continue
t = self.tagRE.match(line)
if t:
DIE = DIEDict()
DIE.dSYM = dSYM
DIE.address = int(t.group(1), 16)
DIE.indent = len(t.group(2))
#print("indent: %d" % DIE.indent)
DIE.tag = t.group(3)
DIE.children = []
#print("Added DIE :%08x:" % DIE.address)
for line in f:
if self.blankRE.match(line):
break
for atRE in self.AT_REList:
m = atRE.match(line)
if m:
if atRE.groups == 2:
if m.group(1) == "AT_bit_offset" and self.neghexRE.match(m.group(2)):
DIE.AT_bit_offset = -int(2**64 - int(m.group(2),16))
else:
m2 = self.hexRE.match(m.group(2))
if m2:
setattr(DIE, m.group(1), int(m2.group(1), 16))
elif self.decRE.match(m.group(2)):
setattr(DIE, m.group(1), int(m.group(2), 10))
else:
setattr(DIE, m.group(1), m.group(2))
# the rest of these have more than 2 capture groups (sometimes the third capture group
# is the linefeed just so we can do the following special processing)
elif m.group(1) == "AT_data_member_location":
if m.group(2) == "OP_plus_uconst":
thenum = int(m.group(3),16)
DIE.AT_data_member_location = thenum
elif m.group(1) == "AT_vtable_elem_location":
#print ("AT_vtable_elem_location «%s•%s•%s»" % (m.group(1), m.group(2), m.group(3)))
if m.group(2) == "OP_constu":
thenum = int(m.group(3),16)
else:
numbytes = int(m.group(2),16)
thenum = 0
for i in range(numbytes + 2, 3, -1):
part = int(m.group(i),16)
if (i == numbytes + 2) == (part & 128 != 0):
print("Error: unexpected high bit of elem location byte (%s) :%08x:" % (m.group(3), DIE.address))
thenum = thenum * 128 + (part & 127)
if m.group(3) != "10":
print("Error: unexpected elem location type (%s) :%08x:" % (m.group(3), DIE.address))
DIE.AT_vtable_elem_location = thenum
elif m.group(1) == "AT_location":
setattr(DIE, m.group(1), int(m.group(2), 16))
lines = []
for line in f:
m = self.AT_locationRE.match(line)
if m:
lines.append(m.group(1))
#print ("AT_location «%s•%s»" % (m.group(1), m.group(2)))
if m.group(2) == ")":
break # AT_location_list finished with error
else:
print("Error in tag :%08x: (AT_location) with line %s" % (DIE.address, line))
break # AT_location_list finished with error
DIE.AT_location_list = lines
elif m.group(1) == "AT_ranges":
DIE.AT_ranges = int(m.group(2), 16)
lines = []
for line in f:
m = self.AT_rangesRE.match(line)
if m:
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4)))
if m.group(4) == 'End )':
break # AT_ranges_list finished
lines.append([m.group(1), m.group(2)])
if m.group(3) == ')':
break # AT_ranges_list finished
else:
print("Error in tag :%08x: (AT_ranges_list) with line %s" % (DIE.address, line))
break # AT_ranges_list finished with error
DIE.AT_ranges_list = lines
elif m.group(1) == "AT_byte_size":
DIE.AT_byte_size = int(m.group(2), 16)
lines = []
for line in f:
m = self.AT_byte_sizeRE.match(line)
if m:
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4)))
if m.group(4) == 'End )':
break # AT_byte_size_list finished
lines.append([m.group(1), m.group(2)])
if m.group(3) == ')':
break # AT_byte_size_list finished
else:
print("Error in tag :%08x: (AT_byte_size_list) with line %s" % (DIE.address, line))
break # AT_byte_size_list finished with error
DIE.AT_byte_size_list = lines
else:
print("Error in tag :%08x: with line %s" % (DIE.address, line))
break # AT_ created
dSYM.DIELookup[DIE.address] = DIE
if DIE.tag == "TAG_compile_unit":
dSYM.CompileUnits.append(DIE)
dSYM.currentCompileUnit = DIE
DIE.addr_size = addr_size
else:
DIE.compile_unit = dSYM.currentCompileUnit
break # DIE created
return DIE
def CheckVTables(self, msg, parent):
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance)
for child in parent.children:
numClasses = child.GetNumberOfVirtualBaseClasses()
for i in range(numClasses):
member = child.GetVirtualBaseClassAtIndex(i)
print("%d :0x%08x: Got virtual base class :0x%08x: %s" % (i, child.address, member.address, msg))
m_type = member.GetBaseType()
self.CheckVTables(msg, child)
def dumpderivationpath(self, derivationPath):
s = ""
for derivationItem in derivationPath:
baseClass = derivationItem.DIE
if len(s) > 0:
s += ","
if derivationItem.isvirtual:
v = "virtual "
else:
v = ""
s += "%d:%s%s" % (derivationItem.index, v, baseClass.GetName())
return s
def MakeVTables(self, derivationPath, begin_offset=0):
prefix = "%*s" %(4 * len(derivationPath), "")
thefirst = derivationPath[0].DIE
thelast = derivationPath[-1].DIE
# Does this class contain a vPtr?
numFields = thelast.GetNumberOfFields()
vPtrOffset = None
for i in range(numFields):
member = thelast.GetFieldAtIndex(i)
thename = member.GetName()
if thename != None and "vptr" in thename and hasattr(member, "AT_artificial"):
vPtrOffset = member.GetOffsetInBytes() + begin_offset
print("%sfound vptr (%s) classoffset:0x%x vptroffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, vPtrOffset))
break
if vPtrOffset == None:
# No vPtr exists, follow base classes
numClasses = thelast.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = thelast.GetDirectBaseClassAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_type = member.GetBaseType()
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = i
derivationItem.isvirtual = False
derivationItem.VTables = thefirst.VTables
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 1)
derivationPath.append(derivationItem)
print("%s[ derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset))
self.MakeVTables(derivationPath, m_offset)
print("%s] derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset))
derivationPath.pop()
numClasses = thelast.GetNumberOfVirtualBaseClasses()
for i in range(numClasses):
member = thelast.GetVirtualBaseClassAtIndex(i)
print("%s%d Checking virtual base class :0x%08x: numv:%d" % (prefix, i, member.address, len(thefirst.AllVirtualBaseClasses)))
m_type = member.GetBaseType()
if m_type.address in thefirst.IncludedVirtualBaseClasses:
virtualbaseclassinfo = thefirst.IncludedVirtualBaseClasses[m_type.address]
m_offset = virtualbaseclassinfo.offset
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = 0
derivationItem.isvirtual = True
derivationItem.VTables = thefirst.VTablesVirtual
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 31)
derivationPath.append(derivationItem)
print("%s[ virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address))
self.MakeVTables(derivationPath, m_offset)
print("%s] virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address))
derivationPath.pop()
else:
m_offset = thefirst.VirtualBaseClassOffset
m_align = m_type.GetAlign()
m_offset = ((m_offset + m_align-1) & -m_align)
thefirst.VirtualBaseClassOffset += m_type.GetByteSize()
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = 0
derivationItem.isvirtual = True
derivationItem.VTables = thefirst.VTablesVirtual
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 61)
virtualbaseclassinfo = VirtualBaseClassInfo()
virtualbaseclassinfo.member = member
virtualbaseclassinfo.offset = m_offset
thefirst.AllVirtualBaseClasses.append(virtualbaseclassinfo)
thefirst.IncludedVirtualBaseClasses[m_type.address] = virtualbaseclassinfo
derivationPath.append(derivationItem)
print("%s[ virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address))
self.MakeVTables(derivationPath, m_offset)
print("%s] virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member:%08x: type:%08x:" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, m_offset, member.address, m_type.address))
derivationPath.pop()
if vPtrOffset != None:
# A vPtr exists, make a vtable for it.
if vPtrOffset in thefirst.VTablesByOffset:
vtableinfo = thefirst.VTablesByOffset[vPtrOffset]
max = vtableinfo.max
else:
vtableinfo = VTableInfo()
vtableinfo.vPtrOffset = vPtrOffset
vtableinfo.mergedVTableEntries = {}
vtableinfo.derivationPathText = None
derivationPath[-1].VTables.append(vtableinfo)
thefirst.VTablesByOffset[vPtrOffset] = vtableinfo
max = -1
mergedVTableEntries = vtableinfo.mergedVTableEntries
gotmultiinherit = False
gotvirtual = False
for derivationItem in reversed(derivationPath):
baseClass = derivationItem.DIE
if hasattr(baseClass, "VTableEntries"):
print("%sProcessing %s path:%s previouspath:%s" % (prefix, baseClass.GetName(), derivationItem.derivationPathText, vtableinfo.derivationPathText))
if vtableinfo.derivationPathText == None or len(derivationItem.derivationPathText) < len(vtableinfo.derivationPathText):
if gotvirtual or gotmultiinherit:
# Trying to build vtable of multiple inheritance is hard.
# This is probably wrong - maybe check virtuality, and parameters, but then I might as well try to code a C++ compiler.
# We'll just check the name.
for k,w in baseClass.VTableEntries.iteritems():
namesuper = w.GetName()
print("%s Looking for vtableitem %s" % (prefix, namesuper))
for j,v1 in mergedVTableEntries.iteritems():
v = v1.DIE
namebase = v.GetName()
if (namesuper == namebase or (namesuper[:1] == "~" and namebase[:1] == "~")):
# a "non-virtual thunk" to w is what this probably is:
v1.DIE = w
if gotvirtual:
v1.ThunkType = "virtual thunk to "
else:
v1.ThunkType = "non-virtual thunk to "
print("%s Changed vtableitem 0x%x %s%s" % (prefix, v1.DIE.compile_unit.addr_size * j, v1.ThunkType, namesuper))
break
else:
for j,v in baseClass.VTableEntries.iteritems():
namesuper = v.GetName()
print("%s Adding vtableitem 0x%x %s" % (prefix, v.compile_unit.addr_size * j, namesuper))
if j in mergedVTableEntries:
namebase = mergedVTableEntries[j].DIE.GetName()
if namesuper != namebase and not (namesuper[:1] == "~" and namebase[:1] == "~"):
if namesuper[:1] == "~" and j == 0 and 1 in mergedVTableEntries and mergedVTableEntries[1].DIE.GetName()[:1] == "~":
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[1] = vtableitem
print("Error: performed workaround for vtable entry function name :%08x:%s differing from super :%08x:%s" % (v.address, namebase, mergedVTableEntries[j].DIE.address, namesuper))
else:
print("Error: vtable entry function name :%08x:%s differs from super :%08x:%s" % (v.address, namebase, mergedVTableEntries[j].DIE.address, namesuper))
else:
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[j] = vtableitem
else:
if j > max:
max = j
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[j] = vtableitem
else:
print("%sSkipping" % prefix)
if derivationItem.index > 0 and not gotmultiinherit:
# index is > 0 for non primary base class of multiple inheritance class. These requires a different method to build vtable.
print("%sgotmultiinherit max:0x%x" % (prefix, max * baseClass.compile_unit.addr_size))
gotmultiinherit = True
if derivationItem.isvirtual and not gotvirtual:
print("%sgotvirtual max:0x%x" % (prefix, max * baseClass.compile_unit.addr_size))
gotvirtual = True
vtableinfo.max = max
if vtableinfo.derivationPathText == None:
vtableinfo.derivationPathText = derivationPath[-1].derivationPathText
print("%sadded vtable (%s) classoffset:0x%x vptroffset:0x%x max:%d numvtables:%d numvirtualvtables:%d" % (prefix, self.dumpderivationpath(derivationPath), begin_offset, vPtrOffset, max, len(thefirst.VTables), len(thefirst.VTablesVirtual)))
def MakeAllVTables(self, parent):
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance)
for child in parent.children:
if child.tag == "TAG_class_type" or child.tag == "TAG_structure_type":
child.VTables = []
child.VTablesByOffset = {}
child.VTablesVirtual = []
child.AllVirtualBaseClasses = []
child.IncludedVirtualBaseClasses = {}
child.VirtualBaseClassOffset = child.GetCompactSize()
derivationItem = DerivationItem()
derivationItem.DIE = child
derivationItem.index = 0
derivationItem.isvirtual = False
derivationItem.VTables = child.VTables
derivationItem.derivationPathText = "%02d" % 1
derivationPath = [derivationItem]
print("[ starting (parent:0x%08x: child:0x%08x: %s)" % (parent.address, child.address, self.dumpderivationpath(derivationPath)))
self.MakeVTables(derivationPath)
print("]")
self.MakeAllVTables(child)
def Process_dSYM(self, filename):
if re.match(".*\.txt", filename):
f = open(filename, "r")
else:
f = tempfile.NamedTemporaryFile()
#print("Created temp file: %s" % f.name)
subprocess.call(["dwarfdump", filename], stdout=f)
f.seek(0)
#print("Processing file: %s" % f.name)
dSYM = DIEDict()
dSYM.currentCompileUnit = None
dSYM.DIELookup = {}
dSYM.CompileUnits = []
dSYM.filename = filename
dSYM.children = []
self.ReadDIEList(f, dSYM, dSYM)
del dSYM.currentCompileUnit
f.close()
self.MakeAllVTables(dSYM)
return dSYM
#=========================================================================================
def MakeOffsetStr(offset):
#return "%4d" % offset
return "%6s" % ("0x%x" % offset)
# from /Library/Developer/KDKs/KDK_10.11.5_15F34.kdk/System/Library/Kernels/kernel.dSYM/Contents/Resources/Python/lldbmacros/structanalyze.py
def _showStructPacking(symbol, typename, fieldname, prefix, depth, class_depth, begin_offset=0):
classType = symbol.GetClass()
if classType == lldb.eTypeClassClass :
ctype = "class"
elif classType == lldb.eTypeClassUnion :
ctype = "union"
elif classType == lldb.eTypeClassStruct :
ctype = "struct"
else:
ctype = "_unknown_%x" % (classType or 0)
if typename == None:
typename = symbol.GetName() or "_anon_%x" % symbol.address
if fieldname != None:
outstr = "[%4d] (%s) %s %s {" % (symbol.GetByteSize(), ctype, typename, fieldname) + "\n"
else:
outstr = "[%4d] (%s) %s {" % (symbol.GetByteSize(), ctype, typename) + "\n"
if hasattr(symbol, "Friends"):
for friend in symbol.Friends:
friendType = None
friendClass = None
if friend.HasType():
friendType = friend.GetType()
elif hasattr(friend, "AT_friend"):
friendType = friend.dSYM.DIELookup[friend.AT_friend]
if friendType != None:
friendClass = friendType.GetClass()
if friendClass == lldb.eTypeClassClass :
ctype = "class"
elif friendClass == lldb.eTypeClassUnion :
ctype = "union"
elif friendClass == lldb.eTypeClassStruct :
ctype = "struct"
else:
ctype = "_unknown_%x" % (classType or 0)
outstr = outstr + prefix + " friend %s %s\n" % (ctype, friendType.GetName() or "_anon_%x" % friendType.address)
"""
Mac OS X 10.8:
0x009fcff0: DW_TAG_structure_type
DW_AT_name ("IOStatistics")
DW_AT_declaration (0x01)
0x009fd4d4: DW_TAG_friend
DW_AT_type (0x009fcff0 "IOStatistics")
DW_AT_data_member_location (DW_OP_plus_uconst 0x0)
DW_AT_accessibility (DW_ACCESS_public)
Mac OS X 10.9:
0x00b67148: DW_TAG_class_type
DW_AT_name ("IOStatistics")
DW_AT_declaration (0x01)
0x00b675ca: DW_TAG_friend
DW_AT_friend (0x00b67148)
"""
_compact_offset = begin_offset
max_union_member_size = 0
max_align = 1
m_align = 1
numClasses = symbol.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = symbol.GetDirectBaseClassAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
m_size = m_type.GetByteSize()
warningstr = ""
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size))
#print(prefix, "V", membertypename, debugstr)
if _compact_offset > m_offset:
warningstr = " *** Possible memory overlap ***"
elif _compact_offset < m_offset:
align_offset = ((_compact_offset + m_align-1) & -m_align)
if align_offset != m_offset :
# ignore memory holes that may be caused by field alignment
#_has_memory_hole = True
warningstr = " *** Possible memory hole (msize:%d align:%d calc:%d calcaligned:%d actual:%d) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset)
_compact_offset = m_offset
s, compact_size, m_align = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, class_depth + 1, m_offset)
if m_align > max_align:
max_align = m_align
#print(prefix, "V", membertypename, "m_align:%d max_align:%d compact_size:%d" % (m_align, max_align, compact_size))
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n"
_compact_offset += compact_size
numFields = symbol.GetNumberOfFields()
#_has_memory_hole = False
inBitField = False
totalBits = 0
bitFieldStartByte = -1
bitFieldTotalBytes = -1
used_bits = 0
next_used_bits = 0
next_totalBits = 0
for i in range(numFields):
member = symbol.GetFieldAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_size_bits = member.GetBitSize()
m_offset_bits = member.GetOffsetInBits()
isBitField = hasattr(member, "AT_bit_size")
m_name = member.GetName()
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
membertypeclass = m_type.GetClass()
m_size = m_type.GetByteSize()
if inBitField:
# continuing previously started bit fields?
if (not isBitField) or classType == lldb.eTypeClassUnion or (m_offset >= (bitFieldStartByte + bitFieldTotalBytes)):
# no, finish previously started bit field
if classType != lldb.eTypeClassUnion:
_compact_offset += bitFieldTotalBytes
inBitField = False
bitFieldStartByte = m_offset
used_bits = 0
totalBits = 0
if isBitField:
if not inBitField:
# new set of bit fields started
inBitField = True
bitFieldStartByte = m_offset
bitFieldTotalBytes = m_size
used_bits = next_used_bits
totalBits = next_totalBits
next_used_bits = 0
next_totalBits = 0
if next_used_bits:
print("Error: have carry over bits but not in new bit field next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address))
next_used_bits = 0
next_totalBits = 0
totalBits += m_size_bits
m_offset_bits += (m_offset - bitFieldStartByte) * 8
m_offset = bitFieldStartByte
# the type of a bitfield does not always mean the total size of all bit fields because you can mix sized types in a sequence of bit fields
while bitFieldTotalBytes * 8 < m_offset_bits + m_size_bits:
#print("[ bitFieldTotalBytes %d" % bitFieldTotalBytes)
bitFieldTotalBytes *= 2
#print("] bitFieldTotalBytes %d" % bitFieldTotalBytes)
if m_offset_bits >= 0:
bitfield = " : %d // %d..%d" % (m_size_bits, m_offset_bits, m_offset_bits + m_size_bits - 1)
elif m_size_bits + m_offset_bits > 0:
bitfield = " : %d // %d..%d,%d..%d" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1, 0, m_size_bits + m_offset_bits - 1)
else:
bitfield = " : %d // %d..%d,nothing" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1)
else:
bitFieldStartByte = m_offset
bitFieldTotalBytes = m_size
used_bits = 0
totalBits = 0
bitfield = ""
if next_used_bits:
print("Error: have carry over bits but not in bit field next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address))
next_used_bits = 0
next_totalBits = 0
warningstr = ""
try:
if m_offset_bits >= 0:
thebits = (~(-1 << m_size_bits)) << m_offset_bits
else:
next_totalBits = -m_offset_bits
next_used_bits = (~(-1 << next_totalBits)) << (bitFieldTotalBytes * 8 + m_offset_bits)
thebits = (~(-1 << (m_size_bits + m_offset_bits))) << 0
except:
# negative bit offset means something like bit field overlaps next member... complicated
print("Error with bits used_bits:0x%08x thebits(previous):0x%08x size:%d offset:%d type:%s at :%08x:" % (used_bits, thebits, m_size_bits, m_offset_bits, m_offset_bits.__class__.__name__, member.address))
thebits = 0
if ((thebits & used_bits) != 0) or m_size_bits < 0 or m_size_bits + m_offset_bits > bitFieldTotalBytes * 8:
warningstr = " *** Possible bit field error ***"
used_bits = 0
used_bits |= thebits
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_offset_bits, m_size, m_size_bits, thebits, used_bits))
if membertypeclass == lldb.eTypeClassStruct or membertypeclass == lldb.eTypeClassUnion or membertypeclass == lldb.eTypeClassClass :
s, compact_size, m_align = _showStructPacking(m_type, membertypename, m_name, prefix+" ", depth + 1, 0, m_offset)
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s
else:
outstr += prefix + ("+%s,[%4d] (%s) %s%s" % (MakeOffsetStr(m_offset), m_size, membertypename, m_name, bitfield))
compact_size = m_size
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
#print(prefix, membertypename, m_name, "calcoff:0x%x actualoff:0x%x calcsize:%d actualsize:%d m_align:%d max_align:%d" % (_compact_offset, m_offset, compact_size, m_size, m_align, max_align))
if _compact_offset > m_offset:
warningstr = " *** Possible memory overlap (msize:%d align:%d calc:%d actual:%d) ***" % (m_size, m_align, _compact_offset, m_offset)
elif _compact_offset < m_offset:
align_offset = ((_compact_offset + m_align-1) & -m_align)
if align_offset != m_offset :
# ignore memory holes that may be caused by field alignment
#_has_memory_hole = True
warningstr = " *** Possible memory hole (msize:%d align:%d calcoff:0x%x calcaligned:0x%x actualoff:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset)
_compact_offset = m_offset
if classType == lldb.eTypeClassUnion:
if m_size > max_union_member_size:
max_union_member_size = m_size
elif inBitField == False:
_compact_offset += m_size
outstr += warningstr + debugstr + "\n"
if next_used_bits:
print("Error: have carry over bits after fields next_used_bits:0x%08x at :%08x:" % (next_used_bits, member.address))
next_used_bits = 0
next_totalBits = 0
if classType != lldb.eTypeClassUnion and inBitField:
_compact_offset += bitFieldTotalBytes
inBitField = False
if class_depth == 0 and hasattr(symbol, "AllVirtualBaseClasses"):
for virtualbaseclassinfo in symbol.AllVirtualBaseClasses:
member = virtualbaseclassinfo.member
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
m_size = m_type.GetByteSize()
m_align = m_type.GetAlign()
m_offset = ((_compact_offset + m_align-1) & -m_align)
warningstr = " virtual"
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size))
#print(prefix, "V", membertypename, debugstr)
_compact_offset = m_offset
s, compact_size, a = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, 0, m_offset)
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n"
_compact_offset += compact_size
outstr += prefix + "}"
if classType == lldb.eTypeClassUnion:
_compact_offset += max_union_member_size
#if _has_memory_hole == True :
# outstr += " *** Warning: Struct layout leaves memory hole ***"
return outstr, _compact_offset - begin_offset, max_align
vtableFunctionRE = re.compile("(.*?) \(\)\(/\*( const)? (.*?) \*,? \*/ ?(.*)\)")
# group(1) = function return type
# group(2) = ' const'
# group(3) = class from artificial parameter
# group(4) = parameters
def doOneVTable(symbol, prefix, vtableinfo):
outstr = ""
numEntries = vtableinfo.max + 1
if numEntries > 0:
if vtableinfo.vPtrOffset == 0:
outstr = outstr + prefix + "vtable for %s {\n" % (symbol.GetName())
else:
outstr = outstr + prefix + "vtable for %s 0x%x {\n" % (symbol.GetName(), vtableinfo.vPtrOffset)
for i in range(numEntries):
if i in vtableinfo.mergedVTableEntries:
vtableitem = vtableinfo.mergedVTableEntries[i]
member = vtableitem.DIE
m_name = member.GetName()
membertypename = member.GetNameForType(True)
containertype = member.GetContainerType()
if containertype != None:
containertypename = containertype.GetName()
containertypenamequalifed = containertypename+"::"
else:
containertypename = ""
containertypenamequalifed = ""
if hasattr(vtableitem, "ThunkType"):
namemodify = vtableitem.ThunkType
else:
namemodify = ""
else:
m_name = None
m_size = symbol.compile_unit.addr_size
if m_name == None:
outstr += prefix + ("+%s,[%4d]\n" % (MakeOffsetStr(i * m_size), m_size))
else:
m = vtableFunctionRE.match(membertypename)
if m:
#print("vtablefunc", m.group(0), m.group(1), m.group(2), m.group(3), m.group(4))
if containertypename == m.group(3):
if m.group(2) == None:
constpart = ""
else:
constpart = m.group(2)
if m.group(1) == "void":
typepart = ""
else:
typepart = m.group(1)
#outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name))
outstr += prefix + ("+%s,[%4d] %s %s%s::%s(%s)%s\n" % (MakeOffsetStr(i * m_size), m_size, typepart, namemodify, containertypename, m_name, m.group(4), constpart))
else:
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name))
print("Error: containertype '%s' doesn't match artifical parameter '%s'" % (containertypename, m.group(3)))
else:
#print("unknownfunc", membertypename)
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containertypenamequalifed, m_name))
outstr += prefix + "}"
return outstr
def _showVTablePacking(symbol, prefix):
outstr = ""
if hasattr(symbol, "VTables"):
for vtableinfo in symbol.VTables:
vstr = doOneVTable(symbol, prefix, vtableinfo)
if len(vstr) > 0:
if len(outstr) > 0:
outstr += "\n\n"
outstr += "%s" % vstr
if hasattr(symbol, "VTablesVirtual"):
for vtableinfo in symbol.VTablesVirtual:
vstr = doOneVTable(symbol, prefix, vtableinfo)
if len(vstr) > 0:
if len(outstr) > 0:
outstr += "\n\n"
outstr += "%s" % vstr
return outstr
def DumpAllStructs(parent,names):
for child in parent.children:
if hasattr(child, "AT_name"):
# only dump named types
#print("address :%08x:" % member.address)
# also dump typedef'd structs too
member = child
membertypename = member.GetName()
if member.tag == "TAG_typedef":
while member.tag == "TAG_typedef" and member.HasType():
member = member.GetType() # follow typedefs
if member.GetName() == membertypename:
# don't do typedef if struct has same name, we'll do the struct when we get there
member = None
if member != None and member.GetClass() != None and len(member.children) > 0:
if names == None or member.GetName() in names:
print("==========================================================================================")
if hasattr(child, "AT_decl_file"):
print('0x%08x: "%s"\n' % (child.address, child.AT_decl_file))
else:
print("0x%08x:\n" % child.address)
s, n, a = _showStructPacking(member, membertypename, None, "", 0, 0, 0)
print(s)
print
s = _showVTablePacking(member, "")
if s != "":
print(s)
print
print
#if member.GetName() = "_lck_grp_": break
DumpAllStructs(child, names)
def DumpAllTypes(Hopper, parent):
for child in parent.children:
name = None
if hasattr(child, "AT_name"):
name = child.AT_name
attype = None
if child.HasType():
attype = child.GetType()
if child.tag == "TAG_compile_unit":
pass
elif child.tag == "TAG_variable":
pass
elif child.tag == "TAG_inheritance": # handled by TAG_class_type
pass
elif child.tag == "TAG_member": # handled by TAG_class_type, TAG_structure_type, TAG_union_type (AT_data_member_location)
pass
elif child.tag == "TAG_subprogram": # handled by TAG_class_type (AT_data_member_location)
pass
elif child.tag == "TAG_formal_parameter":
pass
elif child.tag == "TAG_subroutine_type": # handled by TAG_pointer_type
pass
elif child.tag == "TAG_subrange_type": # handled by TAG_array_type
pass
elif child.tag == "TAG_unspecified_parameters": # handled by TAG_subroutine_type
pass
elif child.tag == "TAG_enumerator": # handled by TAG_enumeration_type
pass
elif child.tag == "TAG_lexical_block":
pass
elif child.tag == "TAG_inlined_subroutine":
pass
elif child.tag == "TAG_GNU_template_parameter_pack": # template
pass
elif child.tag == "TAG_imported_declaration":
pass
elif child.tag == "TAG_imported_module": # points to TAG_namespace
pass
elif child.tag == "TAG_namespace":
pass
elif child.tag == "TAG_template_type_parameter":
pass
elif child.tag == "TAG_template_value_parameter":
pass
elif child.tag == "TAG_unspecified_type":
pass
elif child.tag == "TAG_pointer_type":
if attype != None and attype.tag == "TAG_subroutine_type":
Hopper.AddFunctionPointer(child, name, attype)
else:
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_reference_type":
if name == None:
if attype == None:
name = "&"
else:
name = child.GetType().GetName() + " &"
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_rvalue_reference_type":
# I don't know whan an rvalue_reference_type looks like so I use &_
if name == None:
if attype == None:
name = "&_"
else:
name = child.GetType().GetName() + " &_"
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_const_type":
if name == None:
if attype == None:
name = 'const'
else:
name = 'const ' + child.GetType().GetName()
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_volatile_type":
if name == None:
if attype == None:
name = 'volatile'
else:
name = 'volatile ' + child.GetType().GetName()
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_class_type":
Hopper.AddClass(child, name, child)
# might just be a declaration AT_declaration( true )
# or it might contain children with AT_data_member_location
# Direct inheritance TAG_inheritance DirectBaseClasses
elif child.tag == "TAG_structure_type":
Hopper.AddStruct(child, name, child)
# might be a declaration - replace with define if it exists in same compileunit
elif child.tag == "TAG_union_type":
Hopper.AddStruct(child, name, child)
# might be a declaration - replace with define if it exists in same compileunit
elif child.tag == "TAG_typedef":
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_base_type":
Hopper.AddBaseType(child)
elif child.tag == "TAG_ptr_to_member_type":
if attype != None and attype.tag == "TAG_subroutine_type":
Hopper.AddPointerToMember(child, name, attype)
else:
print("Error: unexpected tag :%08x:" % child.address)
elif child.tag == "TAG_array_type":
Hopper.AddArray(child, name, attype)
elif child.tag == "TAG_enumeration_type":
Hopper.AddEnumeration(child, name)
#bytesize
else:
print("Error: unknown tag '%s':" % child.tag)
DumpAllTypes(Hopper, child)
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Expected usage: {0} <dsym>'.format(sys.argv[0]))
sys.exit(1)
H = Hopper()
for i in range(1, len(sys.argv)):
print("==========================================================================================")
print("The file: %s\n" % sys.argv[i])
dSYMr = DSYM_Reader()
dSYM = dSYMr.Process_dSYM(sys.argv[i])
DumpAllStructs(dSYM, None)
#••••••• TO DO: Finish DumpAllTypes
#DumpAllTypes(H, dSYM)
'''
pp = pprint.PrettyPrinter(indent=4, depth=10)
pp.pprint(H.Types)
pp.pprint(H.UUIDs)
for k,v in H.UUIDs.items():
pp.pprint(k)
for attr, value in v.__dict__.iteritems():
print attr, value
print
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment