Skip to content

Instantly share code, notes, and snippets.

@joevt
Last active May 18, 2023 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save joevt/da0500cd574f00042f0db61f9af5512f to your computer and use it in GitHub Desktop.
Save joevt/da0500cd574f00042f0db61f9af5512f to your computer and use it in GitHub Desktop.
Process DWARF and stabs debug information
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import tempfile
import subprocess
import re
import uuid
import pprint
#import lldb
debug = 0
dbgindent = 0
def dbgprint(thestring):
global dbgindent
if thestring[0] == '}':
if dbgindent > 0:
dbgindent = dbgindent - 1
print("%*s%s" % (dbgindent * 4, "", thestring))
if thestring[0] == '{':
if dbgindent < 100:
dbgindent = dbgindent + 1
else:
dbgindent = 48
#=========================================================================================
class Hopper(dict):
BaseTypes = [
# Base types that exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72c00", "name":"void" , "size":None , "preferred":False, "encoding":None },
{"uuid":"054086d7b17b4685971643925db72c01", "name":"int8_t" , "size":1 , "preferred":True , "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c02", "name":"uint8_t" , "size":1 , "preferred":True , "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c03", "name":"int16_t" , "size":2 , "preferred":True , "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c04", "name":"uint16_t" , "size":2 , "preferred":True , "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c05", "name":"int32_t" , "size":4 , "preferred":True , "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c06", "name":"uint32_t" , "size":4 , "preferred":True , "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c07", "name":"int64_t" , "size":8 , "preferred":True , "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c08", "name":"uint64_t" , "size":8 , "preferred":True , "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c09", "name":"float" , "size":4 , "preferred":True , "encoding":"ATE_float" },
{"uuid":"054086d7b17b4685971643925db72c0a", "name":"double" , "size":8 , "preferred":True , "encoding":"ATE_float" },
{"uuid":"054086d7b17b4685971643925db72c0b", "name":"int" , "size":None , "preferred":False, "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c0c", "name":"unsigned int" , "size":None , "preferred":False, "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c0d", "name":"long" , "size":8 , "preferred":False, "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c0e", "name":"unsigned long" , "size":8 , "preferred":False, "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c0f", "name":"long long" , "size":8 , "preferred":False, "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c10", "name":"unsigned long long" , "size":8 , "preferred":False, "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c11", "name":"char" , "size":1 , "preferred":True , "encoding":"ATE_signed_char" },
{"uuid":"054086d7b17b4685971643925db72c12", "name":"short" , "size":2 , "preferred":False, "encoding":"ATE_signed" },
{"uuid":"054086d7b17b4685971643925db72c13", "name":"unsigned char" , "size":1 , "preferred":True , "encoding":"ATE_unsigned_char"},
{"uuid":"054086d7b17b4685971643925db72c14", "name":"unsigned short" , "size":2 , "preferred":False, "encoding":"ATE_unsigned" },
{"uuid":"054086d7b17b4685971643925db72c15", "name":"bool" , "size":1 , "preferred":True , "encoding":"ATE_boolean" },
# Base types that don't exist in Hopper with substitutes that exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72c04", "name":"char16_t" , "size":2 , "preferred":True , "encoding":"ATE_UTF" },
{"uuid":"054086d7b17b4685971643925db72c06", "name":"char32_t" , "size":4 , "preferred":True , "encoding":"ATE_UTF" },
# Base types that don't exist in Hopper:
{"uuid":"054086d7b17b4685971643925db72e00", "name":"long double" , "size":16 , "preferred":True , "encoding":"ATE_float" },
]
class Type(dict):
# Type : 16 byte type uuid, 4 byte len + name, 2 byte type
TYPE_pointer = 0x0011 # type uuid
TYPE_struct = 0x0012 # 4 byte numfields * { 4 byte len + name, type uuid, byte format, 4 byte len + comment } null
TYPE_union = 0x0013 # 4 byte numUnions * { 4 byte len + name, type uuid, byte format, 4 byte null }
TYPE_array = 0x0014 # 4 byte count, type uuid
TYPE_typedef = 0x0015 # 4 byte len + name, type uuid
TYPE_function = 0x001b # flag1 (ff), return type uuid, 2 byte numParams * {4 byte len + name, type uuid, byte format? }, ff=variadic, extra1 (6 null bytes), ff=no return, extra2 (0700=user input, or 0100=header import)
TYPE_enumeration = 0x001c # extra1 (00=user enum, 04=built-in enums), 4 byte numEnums * { 4 byte len + name, 8 byte signed value }
FORMAT_DEFAULT = 0
FORMAT_HEXADECIMAL = 1
FORMAT_DECIMAL = 2
FORMAT_OCTAL = 3
FORMAT_CHARACTER = 4
FORMAT_STACKVARIABLE = 5
FORMAT_OFFSET = 6
FORMAT_ADDRESS = 7
FORMAT_FLOAT = 8
FORMAT_BINARY = 9
FORMAT_STRUCTURED = 10
FORMAT_ENUM = 11
FORMAT_ADDRESS_DIFF=12
FORMAT_NEGATE = 0x20
FORMAT_LEADINGZEROES = 0x40
FORMAT_SIGNED = 0x80
Types = []
UUIDs = {}
def NewUUID(self, DIE):
if hasattr(DIE, 'uuid'):
print ("Error: uuid already created :%s:" % DIE.GetAddress())
else:
if DIE.compile_unit.AT_comp_dir in DIE.compile_unit.AT_name:
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_name + ("%s" % DIE.GetAddress()))
else:
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_comp_dir + DIE.compile_unit.AT_name + ("%s" % DIE.GetAddress()))
if DIE.uuid in self.UUIDs:
print ("Error: uuid collision %s" % DIE.GetAddress())
else:
self.UUIDs[DIE.uuid] = DIE
def AddType(self, name, typetype, DIE):
type = self.Type()
type.type = typetype
type.DIE = DIE
type.name = name
type.DIE.type = type
self.NewUUID(type.DIE)
self.Types.append(type)
return type
def AddFunctionPointer(self, DIE, name, attype):
# Find artifical
# ^0x\w+: +TAG_formal_parameter.*\n( +AT_.*\n)* +AT_artificial.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_function, DIE)
if attype.HasType():
type.returntype = attype.GetType()
else:
type.returntype = None
type.variadic = False
type.params = []
for child in attype.children:
if child.tag == "TAG_formal_parameter":
if child.HasType():
if hasattr(child, "AT_artificial"):
if hasattr(child, "AT_name"):
type.params.append({"name":child.AT_name, "attype":child.GetType()})
else:
type.params.append({"name":"this", "attype":child.GetType()})
elif hasattr(child, "AT_name"):
type.params.append({"name":child.AT_name, "attype":child.GetType()})
else:
type.params.append({"name":None, "attype":child.GetType()})
else:
print("Error: unknown parameter type %s" % child.GetAddress())
elif child.tag == "TAG_unspecified_parameters":
type.variadic = True
else:
print("Error: unknown parameter type %s" % child.GetAddress())
def AddPointerToMember(self, DIE, name, attype):
# Find TAG_ptr_to_member_type
# ^0x\w+: +TAG_ptr_to_member_type.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_struct, DIE)
DIEf = DIEDict()
DIEf.dSYM = DIE.dSYM
DIEf.address = ("%s_1" % DIE.GetAddress())
DIEf.tag = "TAG_pointer_type"
DIEf.attype = DIE.attype
DIEf.compile_unit = DIE.compile_unit
DIEf.dSYM.DIELookup[DIEf.address] = DIEf
DIEc = DIEDict()
DIEc.dSYM = DIE.dSYM
DIEc.address = ("%s_2" % DIE.GetAddress())
DIEc.tag = "TAG_pointer_type"
DIEc.attype = DIE.GetContainingType()
DIEc.compile_unit = DIE.compile_unit
DIEc.dSYM.DIELookup[DIEc.address] = DIEc
AddFunctionPointer(DIEf, None, attype)
AddPointer(DIEc, None, DIEc.attype)
type.fields = [{"name":None, "attype":DIEf}, {"name":None, "attype":DIEc}]
def AddPointer(self, DIE, name, attype):
type = self.AddType(name, self.Type.TYPE_pointer, DIE)
type.attype = attype
def AddStruct(self, DIE, name, child):
# Find multiple inheritance
# 0x\w+: +TAG_inheritance.*\n( +AT_.*\n)*\n0x(\w+): +TAG_inheritance.*
# Find bit fields
# 0x\w+: +TAG_.*\n( +AT_.*\n)* +AT_\w*bit_offset.*\n( +AT_.*\n)*
type = self.AddType(name, self.Type.TYPE_struct, DIE)
type.fields = []
def AddClass(self, DIE, name, child):
type = self.AddType(name, self.Type.TYPE_struct, DIE)
type.fields = []
def AddTypedef(self, DIE, name, attype, atuuid):
# attype is None for base type, use uuid instead
type = self.AddType(name, self.Type.TYPE_typedef, DIE)
type.attype = attype
type.atuuid = uuid
def AddBaseType(self, DIE):
found = False
for basetype in self.BaseTypes:
#print(basetype)
if basetype['name'] == DIE.AT_name and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding:
found = True
DIE.uuid = basetype['uuid']
DIE.baseHopperType = True
break
if not found:
found = False
for basetype in self.BaseTypes:
if basetype['preferred'] == True and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding:
found = True
self.AddTypedef(DIE, DIE.AT_name, None, basetype['uuid'])
break
if not found:
print("Error: cannot find a base type %s" % DIE.GetAddress())
def AddArray(self, DIE, name, attype):
# Find multiple sub ranges examples:
# ^0x\w+:([ ]+)TAG_subrange_type.*\n( +AT_.*\n)*\n0x\w+:\1TAG
curType = attype
for i in range(len(DIE.children) - 1, 0, -1):
child = DIE.children[i]
if child.tag == "TAG_subrange_type":
if hasattr(child, "AT_count"):
if i == 0:
type = self.AddType(name, self.Type.TYPE_aray, DIE)
else:
type = self.AddType(None, self.Type.TYPE_aray, child)
type.attype = curType
type.count = DIE.AT_count
else:
print ("Error getting count %s" % child.GetAddress())
else:
print ("Error getting count %s" % self.GetAddress())
curType = child
def AddEnumeration(self, DIE, name):
# Find multiple enumerations examples:
# ^0x\w+:([ ]+)TAG_enumerator.*\n( +AT_.*\n)*\n0x\w+:\1TAG
type = self.AddType(name, self.Type.TYPE_enumeration, DIE)
type.size = DIE.AT_byte_size
type.enumerations = []
for child in DIE.children:
if child.tag == "TAG_enumerator":
enumerations.append({"name":child.AT_name, "value":child.AT_const_value})
else:
print ("Error getting enumeration %s" % child.GetAddress())
def DumpHex(self):
# create types for unknown base types such as "long double"
# go through all Types and delete duplicates
# go through all pointers, if pointer to hopper base type then replace pointer uuid with base type uuid
# pointer with no type void *
return
#=========================================================================================
class lldb(dict):
eTypeClassClass = -1
eTypeClassUnion = -2
eTypeClassStruct = -3
#=========================================================================================
arrtypeRE = re.compile('^(.*?)((?:\[[\]\[\d]*)?)$')
#=========================================================================================
class DIEDict(dict):
def GetOffsetInBytes(self):
if hasattr(self, "AT_data_member_location"):
if (self.AT_data_member_location.__class__.__name__ != "int"):
print("Error in tag %s %s (AT_data_member_location) containing value (%s)" % (self.GetAddress(), self.tag, self.AT_data_member_location))
return 0
return self.AT_data_member_location
if hasattr(self, "AT_data_bit_offset"):
return self.AT_data_bit_offset >> 3
return None
def GetOffsetInBits(self):
if hasattr(self, "AT_bit_offset"):
return self.AT_bit_offset
if hasattr(self, "AT_data_bit_offset"):
return self.AT_data_bit_offset & 7
return 0
def GetClass(self):
ctype = None
if self.tag == "TAG_class_type":
ctype = lldb.eTypeClassClass
elif self.tag == "TAG_union_type":
ctype = lldb.eTypeClassUnion
elif self.tag == "TAG_structure_type":
ctype = lldb.eTypeClassStruct
return ctype
def GetNumberOfDirectBaseClasses(self):
if not hasattr(self, "DirectBaseClasses"):
return 0
return len(self.DirectBaseClasses)
def GetDirectBaseClassAtIndex(self, i):
return self.DirectBaseClasses[i]
def GetNumberOfVirtualBaseClasses(self):
if not hasattr(self, "VirtualBaseClasses"):
return 0
return len(self.VirtualBaseClasses)
def GetVirtualBaseClassAtIndex(self, i):
return self.VirtualBaseClasses[i]
def GetNumberOfFields(self):
if not hasattr(self, "Fields"):
return 0
return len(self.Fields)
def GetFieldAtIndex(self, i):
return self.Fields[i]
def HasType(self):
if hasattr(self, "AT_type"):
return True
return False
def GetType(self):
if self.HasType():
return self.dSYM.DIELookup[self.AT_type]
else:
print("Error getting type %s" % self.GetAddress())
return None
def GetBaseType(self):
if self.HasType():
result = self.GetType()
if result.tag == "TAG_typedef":
return result.GetBaseType()
return result
else:
print("Error getting base type %s" % self.GetAddress())
return None
def GetContainingType(self):
if hasattr(self, "AT_containing_type"):
return self.dSYM.DIELookup[self.AT_containing_type]
else:
print("Error getting conter type %s" % self.GetAddress())
return None
def GetNameForType(self, forType):
if (not forType) and hasattr(self, "AT_name"):
return self.AT_name
elif self.tag == "TAG_structure_type":
return "(anonymous struct)"
elif self.tag == "TAG_union_type":
return "(anonymous union)"
elif self.tag == "TAG_class_type":
return "(anonymous class)"
elif self.tag == "TAG_enumeration_type":
return "(anonymous enum)"
elif self.tag == "TAG_const_type":
if self.HasType():
return "const " + self.GetType().GetName()
else:
return "const void"
elif self.tag == "TAG_volatile_type":
if self.HasType():
return "volatile " + self.GetType().GetName()
else:
return "volatile void"
elif self.tag == "TAG_pointer_type":
if self.HasType():
result = self.GetType().GetName()
if result[-1:] == "*":
return result + "*"
else:
return result + " *"
else:
return "void *"
elif self.tag == "TAG_reference_type":
if self.HasType():
return "&" + self.GetType().GetName()
else:
return "& void"
elif self.tag == "TAG_ptr_to_member_type":
if self.HasType():
result = self.GetType().GetName()
else:
print("Error getting type %s" % self.GetAddress())
result = "void"
if result[-1:] == "*":
return result + "*"
else:
return result + " *"
elif self.tag == "TAG_array_type":
counts = self.GetCounts()
if self.HasType():
arrtype = self.GetType().GetName()
else:
print("Error getting type %s" % self.GetAddress())
arrtype = "void"
countstr = ""
for count in counts:
if count == None:
countstr += "[]"
else:
countstr += "[%d]" % count
p = arrtypeRE.match(arrtype)
if p:
# handles array of array
return p.group(1) + countstr + p.group(2)
else:
print("Error parsing type name for array %s %s" % (self.GetAddress(), arrtype))
return arrtype + countstr
elif self.tag == "TAG_subroutine_type" or self.tag == "TAG_subprogram":
if self.HasType():
returntype = self.GetType().GetName()
else:
returntype = "void"
i = 0
result = returntype + " ()("
if hasattr(self, "children"):
for child in self.children:
i += 1
if child.HasType():
if hasattr(child, "AT_artificial"):
if i < len(self.children):
result += "/* " + child.GetType().GetName() + ", */ "
else:
result += "/* " + child.GetType().GetName() + " */"
else:
result += child.GetType().GetName()
if i < len(self.children):
result += ", "
elif child.tag == "TAG_unspecified_parameters":
result += "..."
else:
print("Error: unknown parameter type %s" % self.GetAddress())
result += ")"
return result
elif self.tag == "TAG_member":
if self.GetType().GetClass() == lldb.eTypeClassUnion:
return "" # unnamed union member
elif self.tag == "TAG_typedef":
return self.GetType().GetNameForType(forType)
else:
print("Error getting name for %s %s" % (self.GetAddress(), self.tag))
return ""
def GetName(self):
return self.GetNameForType(False)
def GetByteSizeForAlign0(self, forAlign, class_depth=0):
if forAlign and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type" or self.tag == "TAG_union_type"):
max_align = 1
numClasses = self.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = self.GetDirectBaseClassAtIndex(i)
m_type = member.GetBaseType()
m_align = m_type.GetAlign(class_depth+1)
if m_align > max_align:
max_align = m_align
numFields = self.GetNumberOfFields()
for i in range(numFields):
member = self.GetFieldAtIndex(i)
m_type = member.GetBaseType()
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
if class_depth == 0 and hasattr(self, "AllVirtualBaseClasses"):
for virtualbaseclassinfo in self.AllVirtualBaseClasses:
member = virtualbaseclassinfo.member
m_type = member.GetBaseType()
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
return max_align
if hasattr(self, "AT_byte_size"):
if self.AT_byte_size == 1 and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type") and self.GetNumberOfFields() == 0:
# classes have size 1 when they don't contain any fields
return 0
#print("byte size for %s class_depth:%d forAlign:%d" % (self.GetAddress(), class_depth, forAlign))
return self.AT_byte_size
if self.tag == "TAG_const_type":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_volatile_type":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_typedef":
return self.GetType().GetByteSizeForAlign(forAlign)
if self.tag == "TAG_pointer_type":
return self.compile_unit.addr_size
if self.tag == "TAG_ptr_to_member_type":
return self.compile_unit.addr_size * 2
if self.tag == "TAG_array_type":
if forAlign:
return self.GetType().GetAlign()
size = self.GetType().GetByteSize()
counts = self.GetCounts()
for count in counts:
if count == None:
count = 0
size *= count
return size
if self.HasType():
if forAlign:
return self.GetType().GetAlign()
return self.GetType().GetByteSize()
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"):
return (self.AT_data_bit_offset & 7 + self.AT_bit_size) >> 3
if self.tag == "TAG_structure_type" and self.GetNumberOfFields() == 0:
return 0
print("Error getting byte size for %s %s %s class_depth:%d forAlign:%d" % (self.GetAddress(), self.tag, self.GetName(), class_depth, forAlign))
return 1
def GetByteSizeForAlign(self, forAlign, class_depth=0):
if hasattr(self, "GetByteSizeForAlignFlag"):
print("Error recursive GetByteSizeForAlign %s" % self.GetAddress())
return 1
self.GetByteSizeForAlignFlag = 1
result = self.GetByteSizeForAlign0(forAlign, class_depth)
del self.GetByteSizeForAlignFlag
return result
def GetByteSize(self):
return self.GetByteSizeForAlign(False)
def GetCompactSize(self, class_depth=0):
# doesn't include virtual classes
if self.tag == "TAG_class_type" or self.tag == "TAG_structure_type":
if hasattr(self, "GetCompactSizeFlag"):
print("Error recursive GetCompactSize %s" % self.GetAddress())
return 1
self.GetCompactSizeFlag = 1
m_offset = None
numFields = self.GetNumberOfFields()
if numFields > 0:
member = self.GetFieldAtIndex(numFields-1)
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"):
m_offset = (member.AT_data_bit_offset + member.AT_bit_size + 7) >> 3
else:
m_offset = member.GetOffsetInBytes() + member.GetByteSize()
if m_offset == None:
numClasses = self.GetNumberOfDirectBaseClasses()
if numClasses > 0:
member = self.GetDirectBaseClassAtIndex(numClasses - 1)
m_type = member.GetBaseType()
m_offset = member.GetOffsetInBytes() + member.GetCompactSize(class_depth + 1)
if m_offset == None:
m_offset = 0
del self.GetCompactSizeFlag
return m_offset
return self.GetByteSize()
def GetAlign(self, class_depth=0):
m_size = self.GetByteSizeForAlign(True, class_depth)
if m_size >= 8:
m_align = 8
elif m_size >= 4:
m_align = 4
elif m_size >= 2:
m_align = 2
else:
m_align = 1
return m_align
def GetCounts(self):
counts = []
if hasattr(self, "children"):
for child in self.children:
if child.tag == "TAG_subrange_type":
if hasattr(child, "AT_count"):
counts.append(child.AT_count)
elif hasattr(child, "AT_upper_bound"):
if hasattr(child, "AT_lower_bound"):
counts.append(child.AT_upper_bound + 1 - child.AT_lower_bound)
else:
counts.append(child.AT_upper_bound + 1)
else:
counts.append(None)
else:
print ("Error getting count %s" % self.GetAddress())
return counts
def GetBitSize(self):
if hasattr(self, "AT_bit_size"):
return self.AT_bit_size
else:
return self.GetByteSize() * 8
def UpdateBaseClassLists(self):
if hasattr(self, "AT_virtuality"):
if debug: dbgprint("UpdateBaseClassLists virtual base class %s" % self.parent.GetAddress())
if not hasattr(self.parent, "VirtualBaseClasses"):
self.parent.VirtualBaseClasses = []
self.parent.VirtualBaseClasses.append(self)
#print("Adding virtual base class %s" % self.GetAddress())
else:
if debug: dbgprint("UpdateBaseClassLists direct base class %s" % self.parent.GetAddress())
if not hasattr(self.parent, "DirectBaseClasses"):
self.parent.DirectBaseClasses = []
self.parent.DirectBaseClasses.append(self)
def CheckName(self):
if self.AT_name == " ":
self.isnameblank = 1
elif self.AT_name == "":
self.isnameblank = 1
else:
return
#print("Error: DIE (%s) blank name (%s)" % (self.GetAddress(), self.AT_name))
del self.AT_name
def SetName(self, AT_name):
if hasattr(self, "AT_name") and self.AT_name != AT_name:
print("Error: DIE (%s) name (%s) changed to (%s)" % (self.GetAddress(), self.AT_name, AT_name))
self.AT_name = AT_name
self.CheckName()
def SetNameWithClass(self, nameLong):
if hasattr(self, "nameLong") and self.nameLong != nameLong:
print("Error: DIE (%s) name (%s) changed to (%s)" % (self.GetAddress(), self.nameLong, nameLong))
self.nameLong = nameLong
def SetType(self, AT_type):
if hasattr(self, "AT_type") and self.AT_type != AT_type:
print("Error: DIE (%s) type (%s) changed to (%s)" % (self.GetAddress(), self.AT_type, AT_type))
self.AT_type = AT_type
def SetSize(self, AT_byte_size):
if hasattr(self, "AT_bit_size"):
print("Error: DIE (%s) size (%d bits) changed to (%d)" % (self.GetAddress(), self.AT_bit_size, AT_byte_size))
del self.AT_bit_size
elif (hasattr(self, "AT_byte_size") and self.AT_byte_size != AT_byte_size):
print("Error: DIE (%s) size (%d) changed to (%d)" % (self.GetAddress(), self.AT_byte_size, AT_byte_size))
self.AT_byte_size = AT_byte_size
def SetTag(self, tag):
if hasattr(self, "tag"):
if self.tag != tag:
if hasattr(self, "firsttag"):
del self.firsttag
else:
print("Error: DIE (%s) tag (%s) changed to (%s)" % (self.GetAddress(), self.tag, tag))
self.tag = tag
def Settypenumber(self, typenumber):
self.typenumber = typenumber
self.dSYM.DIELookup[typenumber] = self
if debug: dbgprint("Added to DIELookup «%s»" % (self.dSYM.DIELookup[typenumber].typenumber))
def AppendChild(self, DIE):
if not hasattr(self, "children"):
self.children = []
self.children.append(DIE)
def GetAddress(self):
if self.dSYM.sourceType == "stabs":
if hasattr(self, "typenumber"):
return ("[%d] «%s»" % (self.address, self.typenumber))
return ("[%d]" % (self.address))
else:
return (":%08x:" % (self.address))
#=========================================================================================
# make VTables
class VTableInfo(dict):
pass
class VTableItem(dict):
pass
class DerivationItem(dict):
pass
class VirtualBaseClassInfo(dict):
pass
def CheckVTables(msg, parent):
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance)
if hasattr(parent, "children"):
for child in parent.children:
numClasses = child.GetNumberOfVirtualBaseClasses()
for i in range(numClasses):
member = child.GetVirtualBaseClassAtIndex(i)
if debug: dbgprint("%d %s Got virtual base class %s %s" % (i, child.GetAddress(), member.GetAddress(), msg))
m_type = member.GetBaseType()
CheckVTables(msg, child)
def dumpderivationpath(derivationPath):
s = ""
for derivationItem in derivationPath:
baseClass = derivationItem.DIE
if len(s) > 0:
s += ","
if derivationItem.isvirtual:
v = "virtual "
else:
v = ""
s += "%d:%s%s" % (derivationItem.index, v, baseClass.GetName())
return s
def MakeVTables(derivationPath, begin_offset=0):
prefix = "%*s" % (4 * len(derivationPath), "")
thefirst = derivationPath[0].DIE
thelast = derivationPath[-1].DIE
#print("%sMakeVTables offset:%d first:%s last:%s" % (prefix, begin_offset, thefirst.GetAddress(), thelast.GetAddress()))
# Does this class contain a vPtr?
numFields = thelast.GetNumberOfFields()
vPtrOffset = None
for i in range(numFields):
member = thelast.GetFieldAtIndex(i)
thename = member.GetName()
if thename != None:
#print("%smember:%s" % (prefix, thename))
if "vptr" in thename and hasattr(member, "AT_artificial"):
vPtrOffset = member.GetOffsetInBytes() + begin_offset
if debug: print("%sfound vptr (%s) classoffset:0x%x vptroffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, vPtrOffset))
break
if vPtrOffset == None:
# No vPtr exists, follow base classes
numClasses = thelast.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = thelast.GetDirectBaseClassAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_type = member.GetBaseType()
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = i
derivationItem.isvirtual = False
derivationItem.VTables = thefirst.VTables
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 1)
derivationPath.append(derivationItem)
if debug: print("%s[ derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset))
MakeVTables(derivationPath, m_offset)
if debug: print("%s] derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset))
derivationPath.pop()
numClasses = thelast.GetNumberOfVirtualBaseClasses()
for i in range(numClasses):
member = thelast.GetVirtualBaseClassAtIndex(i)
if debug: print("%s%d Checking virtual base class %s numv:%d" % (prefix, i, member.GetAddress(), len(thefirst.AllVirtualBaseClasses)))
m_type = member.GetBaseType()
if m_type.GetAddress() in thefirst.IncludedVirtualBaseClasses:
virtualbaseclassinfo = thefirst.IncludedVirtualBaseClasses[m_type.GetAddress()]
m_offset = virtualbaseclassinfo.offset
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = 0
derivationItem.isvirtual = True
derivationItem.VTables = thefirst.VTablesVirtual
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 31)
derivationPath.append(derivationItem)
if debug: print("%s[ virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress()))
MakeVTables(derivationPath, m_offset)
if debug: print("%s] virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress()))
derivationPath.pop()
else:
m_offset = thefirst.VirtualBaseClassOffset
m_align = m_type.GetAlign()
m_offset = ((m_offset + m_align-1) & -m_align)
thefirst.VirtualBaseClassOffset += m_type.GetByteSize()
derivationItem = DerivationItem()
derivationItem.DIE = m_type
derivationItem.index = 0
derivationItem.isvirtual = True
derivationItem.VTables = thefirst.VTablesVirtual
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 61)
virtualbaseclassinfo = VirtualBaseClassInfo()
virtualbaseclassinfo.member = member
virtualbaseclassinfo.offset = m_offset
thefirst.AllVirtualBaseClasses.append(virtualbaseclassinfo)
thefirst.IncludedVirtualBaseClasses[m_type.address] = virtualbaseclassinfo
derivationPath.append(derivationItem)
if debug: print("%s[ virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress()))
MakeVTables(derivationPath, m_offset)
if debug: print("%s] virtual 1st derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress()))
derivationPath.pop()
if vPtrOffset != None:
# A vPtr exists, make a vtable for it.
if vPtrOffset in thefirst.VTablesByOffset:
vtableinfo = thefirst.VTablesByOffset[vPtrOffset]
vmax = vtableinfo.vmax
else:
vtableinfo = VTableInfo()
vtableinfo.vPtrOffset = vPtrOffset
vtableinfo.mergedVTableEntries = {}
vtableinfo.derivationPathText = None
derivationPath[-1].VTables.append(vtableinfo)
thefirst.VTablesByOffset[vPtrOffset] = vtableinfo
vmax = -1
mergedVTableEntries = vtableinfo.mergedVTableEntries
gotmultiinherit = False
gotvirtual = False
for derivationItem in reversed(derivationPath):
baseClass = derivationItem.DIE
if hasattr(baseClass, "VTableEntries"):
if debug: print("%sProcessing %s path:%s previouspath:%s" % (prefix, baseClass.GetName(), derivationItem.derivationPathText, vtableinfo.derivationPathText))
if vtableinfo.derivationPathText == None or len(derivationItem.derivationPathText) < len(vtableinfo.derivationPathText):
if gotvirtual or gotmultiinherit:
# Trying to build vtable of multiple inheritance is hard.
# This is probably wrong - maybe check virtuality, and parameters, but then I might as well try to code a C++ compiler.
# We'll just check the name.
for k,w in iter(baseClass.VTableEntries.items()):
namesuper = w.GetName()
if debug: print("%s Looking for vtableitem %s" % (prefix, namesuper))
for j,v1 in iter(mergedVTableEntries.items()):
v = v1.DIE
namebase = v.GetName()
if (namesuper == namebase or (namesuper[:1] == "~" and namebase[:1] == "~")):
# a "non-virtual thunk" to w is what this probably is:
v1.DIE = w
if gotvirtual:
v1.ThunkType = "virtual thunk to "
else:
v1.ThunkType = "non-virtual thunk to "
if debug: print("%s Changed vtableitem 0x%x %s%s" % (prefix, v1.DIE.compile_unit.addr_size * j, v1.ThunkType, namesuper))
break
else:
for j,v in iter(baseClass.VTableEntries.items()):
namesuper = v.GetName()
if debug: print("%s Adding vtableitem 0x%x %s" % (prefix, v.compile_unit.addr_size * j, namesuper))
if j in mergedVTableEntries:
namebase = mergedVTableEntries[j].DIE.GetName()
if namesuper != namebase and not (namesuper[:1] == "~" and namebase[:1] == "~"):
if namesuper[:1] == "~" and j == 0 and 1 in mergedVTableEntries and mergedVTableEntries[1].DIE.GetName()[:1] == "~":
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[1] = vtableitem
print("Error: performed workaround for vtable entry function name %s%s differing from super %s%s" % (v.GetAddress(), namebase, mergedVTableEntries[j].DIE.GetAddress(), namesuper))
else:
print("Error: vtable entry function name %s%s differs from super %s%s" % (v.GetAddress(), namebase, mergedVTableEntries[j].DIE.GetAddress(), namesuper))
else:
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[j] = vtableitem
else:
if j > vmax:
vmax = j
vtableitem = VTableItem()
vtableitem.DIE = v
mergedVTableEntries[j] = vtableitem
else:
if debug: print("%sSkipping" % prefix)
if derivationItem.index > 0 and not gotmultiinherit:
# index is > 0 for non primary base class of multiple inheritance class. These requires a different method to build vtable.
if debug: print("%sgotmultiinherit vmax:0x%x" % (prefix, vmax * baseClass.compile_unit.addr_size))
gotmultiinherit = True
if derivationItem.isvirtual and not gotvirtual:
if debug: print("%sgotvirtual vmax:0x%x" % (prefix, vmax * baseClass.compile_unit.addr_size))
gotvirtual = True
vtableinfo.vmax = vmax
if vtableinfo.derivationPathText == None:
vtableinfo.derivationPathText = derivationPath[-1].derivationPathText
if debug: print("%sadded vtable (%s) classoffset:0x%x vptroffset:0x%x vmax:%d numvtables:%d numvirtualvtables:%d" % (prefix, dumpderivationpath(derivationPath), begin_offset, vPtrOffset, vmax, len(thefirst.VTables), len(thefirst.VTablesVirtual)))
def MakeAllVTables(parent):
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance)
if hasattr(parent, "children"):
for child in parent.children:
if child.tag == "TAG_class_type" or child.tag == "TAG_structure_type":
child.VTables = []
child.VTablesByOffset = {}
child.VTablesVirtual = []
child.AllVirtualBaseClasses = []
child.IncludedVirtualBaseClasses = {}
child.VirtualBaseClassOffset = child.GetCompactSize()
derivationItem = DerivationItem()
derivationItem.DIE = child
derivationItem.index = 0
derivationItem.isvirtual = False
derivationItem.VTables = child.VTables
derivationItem.derivationPathText = "%02d" % 1
derivationPath = [derivationItem]
if debug: print("[ starting (parent%s child%s %s)" % (parent.GetAddress(), child.GetAddress(), dumpderivationpath(derivationPath)))
MakeVTables(derivationPath)
if debug: print("]")
MakeAllVTables(child)
#=========================================================================================
class DSYM_Reader:
compileUnitRE = re.compile('(0x[0-9a-f]+): Compile Unit: .* addr_size = (0x[0-9a-f]+) .*\n')
tagRE = re.compile('(0x[0-9a-f]+):( +)(?:Unknown )?(?:DW_)?(TAG(?:_\w+| constant: 0x[0-9a-f]+)).*\n')
nullRE = re.compile('(0x[0-9a-f]+): +(NULL)\n')
blankRE = re.compile('\n')
AT_locationRE = re.compile(' +(.*?) *(\))?\n')
AT_rangesRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(\)?))| *(End \))\n')
AT_byte_sizeRE = re.compile(' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(?::?[^)\n]*)(\)?))| *(End \))\n')
AT_REList = [
re.compile(' +(?:DW_)?(AT_location)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until ')\n' is found
re.compile(' +(?:DW_)?(AT_ranges)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until 'End )\n' is found
re.compile(' +(?:DW_)?(AT_byte_size)\t?\( *(0x[0-9a-f]+):? *(\n)'), # loop until 'End )\n' is found
re.compile(' +(?:DW_)?(AT_type)\t?\( *\{(0x[0-9a-f]+)\} \( .*? *\)\n'),
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *<(0x[0-9a-f]+)> ([0-9a-f]{2}) ([0-9a-f]{2})(?: ([0-9a-f]{2})?)? *\)\n'),
re.compile(' +(?:DW_)?(AT_vtable_elem_location)\t?\( *(?:DW_)?(OP_constu) (0x[0-9a-f]+) *\)\n'),
re.compile(' +(?:DW_)?(AT_data_member_location)\t?\( *(?:DW_)?(OP_plus_uconst) (0x[0-9a-f]+) *\)\n'), # found this in 10.10.5_14F2511 kernel
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(0x[0-9a-f]+)\}".*" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\[(.*)\] *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(.*)\} *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *"(.*)" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(0x\w+) ".*" *\)\n'),
re.compile(' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(.*) *\)\n'),
]
neghexRE = re.compile('^0x[8-9a-f][0-9a-f]{15} *$')
hexRE = re.compile('^(0x[0-9a-f]+):? *$')
decRE = re.compile('^[-+]?\d+ *$')
def ReadDIEList(self, f, parent):
indent = None
unexpectedlist = False
if hasattr(parent, "children"):
if len(parent.children) == 1:
indent = parent.children[0].indent
elif len(parent.children) != 0:
print("Error: unexpected list %s %s" % (parent.GetAddress(), parent.tag))
for child in parent.children:
print(" %s %s" % (child.GetAddress(), child.tag))
unexpectedlist = True
while True:
DIE = self.ReadNextDIE(f, parent.dSYM)
if DIE == None:
break
if unexpectedlist:
print("Error: first item of unexpected list %s %s" % (DIE.tag, DIE.GetAddress()))
unexpectedlist = False
if indent == None:
indent = DIE.indent
if DIE.indent > indent:
# indent increased, this record is the first child of the last added record
#print("{ %d" % DIE.indent)
DIE.parent = parent.children[-1] # last added record
DIE.parent.AppendChild(DIE) # make this record the first child of the last added record
elif DIE.indent < indent:
print("Error: indentation")
break
else:
DIE.parent = parent
parent.AppendChild(DIE)
if DIE.tag == "TAG_inheritance":
DIE.UpdateBaseClassLists()
elif DIE.tag == "TAG_friend":
if not hasattr(DIE.parent, "Friends"):
DIE.parent.Friends = []
DIE.parent.Friends.append(DIE)
elif hasattr(DIE, "AT_data_member_location") or hasattr(DIE, "AT_data_bit_offset"):
if not hasattr(DIE.parent, "Fields"):
DIE.parent.Fields = []
DIE.parent.Fields.append(DIE)
elif DIE.tag == "TAG_member" and not hasattr(DIE, "AT_external"):
DIE.AT_data_member_location = 0
DIE.noLocation = 1
if not hasattr(DIE.parent, "Fields"):
DIE.parent.Fields = []
DIE.parent.Fields.append(DIE)
elif hasattr(DIE, "AT_vtable_elem_location"):
if not hasattr(DIE.parent, "VTableEntries"):
DIE.parent.VTableEntries = {}
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries:
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName()
namenew = DIE.GetName()
if namenew != nameold:
print("Error: %s has duplicate VTableEntries 0x%x %s%s %s%s" % (DIE.parent.GetAddress(), DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetAddress(), nameold, DIE.GetAddress(), namenew))
# workaround problem for Mammal and WingedAnimal examples
if DIE.AT_vtable_elem_location == 0 and namenew[:1] == "~" and not nameold[:1] == "~" and not 1 in DIE.parent.VTableEntries:
DIE.parent.VTableEntries[1] = DIE
else:
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE
if DIE.indent > indent:
self.ReadDIEList(f, DIE.parent)
#print("} %d" % DIE.indent)
def ReadNextDIE(self, f, dSYM):
DIE = None
for line in f:
#print (line)
if self.nullRE.match(line):
break
c = self.compileUnitRE.match(line)
if c:
addr_size = int(c.group(2), 16)
continue
t = self.tagRE.match(line)
if t:
DIE = DIEDict()
DIE.dSYM = dSYM
DIE.address = int(t.group(1), 16)
DIE.indent = len(t.group(2))
#print("indent: %d" % DIE.indent)
DIE.tag = t.group(3)
#print("Added DIE %s" % DIE.GetAddress())
for line in f:
if self.blankRE.match(line):
break
for atRE in self.AT_REList:
m = atRE.match(line)
if m:
if atRE.groups == 2:
if m.group(1) == "AT_bit_offset" and self.neghexRE.match(m.group(2)):
DIE.AT_bit_offset = -int(2**64 - int(m.group(2),16))
else:
m2 = self.hexRE.match(m.group(2))
if m2:
setattr(DIE, m.group(1), int(m2.group(1), 16))
elif self.decRE.match(m.group(2)):
setattr(DIE, m.group(1), int(m.group(2), 10))
else:
setattr(DIE, m.group(1), m.group(2))
# the rest of these have more than 2 capture groups (sometimes the third capture group
# is the linefeed just so we can do the following special processing)
elif m.group(1) == "AT_data_member_location":
if m.group(2) == "OP_plus_uconst":
thenum = int(m.group(3),16)
DIE.AT_data_member_location = thenum
elif m.group(1) == "AT_vtable_elem_location":
#print ("AT_vtable_elem_location «%s•%s•%s»" % (m.group(1), m.group(2), m.group(3)))
if m.group(2) == "OP_constu":
thenum = int(m.group(3),16)
else:
numbytes = int(m.group(2),16)
thenum = 0
for i in range(numbytes + 2, 3, -1):
part = int(m.group(i),16)
if (i == numbytes + 2) == (part & 128 != 0):
print("Error: unexpected high bit of elem location byte (%s) %s" % (m.group(3), DIE.GetAddress()))
thenum = thenum * 128 + (part & 127)
if m.group(3) != "10":
print("Error: unexpected elem location type (%s) %s" % (m.group(3), DIE.GetAddress()))
DIE.AT_vtable_elem_location = thenum
elif m.group(1) == "AT_location":
setattr(DIE, m.group(1), int(m.group(2), 16))
lines = []
for line in f:
m = self.AT_locationRE.match(line)
if m:
lines.append(m.group(1))
#print ("AT_location «%s•%s»" % (m.group(1), m.group(2)))
if m.group(2) == ")":
break # AT_location_list finished with error
else:
print("Error in tag %s (AT_location) with line %s" % (DIE.GetAddress(), line))
break # AT_location_list finished with error
DIE.AT_location_list = lines
elif m.group(1) == "AT_ranges":
DIE.AT_ranges = int(m.group(2), 16)
lines = []
for line in f:
m = self.AT_rangesRE.match(line)
if m:
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4)))
if m.group(4) == 'End )':
break # AT_ranges_list finished
lines.append([m.group(1), m.group(2)])
if m.group(3) == ')':
break # AT_ranges_list finished
else:
print("Error in tag %s (AT_ranges_list) with line %s" % (DIE.GetAddress(), line))
break # AT_ranges_list finished with error
DIE.AT_ranges_list = lines
elif m.group(1) == "AT_byte_size":
DIE.AT_byte_size = int(m.group(2), 16)
lines = []
for line in f:
m = self.AT_byte_sizeRE.match(line)
if m:
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4)))
if m.group(4) == 'End )':
break # AT_byte_size_list finished
lines.append([m.group(1), m.group(2)])
if m.group(3) == ')':
break # AT_byte_size_list finished
else:
print("Error in tag %s (AT_byte_size_list) with line %s" % (DIE.GetAddress(), line))
break # AT_byte_size_list finished with error
DIE.AT_byte_size_list = lines
else:
print("Error in tag %s with line %s" % (DIE.GetAddress(), line))
break # AT_ created
dSYM.DIELookup[DIE.address] = DIE
if DIE.tag == "TAG_compile_unit":
dSYM.CompileUnits.append(DIE)
dSYM.currentCompileUnit = DIE
DIE.addr_size = addr_size
else:
DIE.compile_unit = dSYM.currentCompileUnit
break # DIE created
return DIE
def Process_dSYM(self, filename):
if re.match(".*\.txt", filename):
f = open(filename, "r")
else:
f = tempfile.NamedTemporaryFile()
#print("Created temp file: %s" % f.name)
subprocess.call(["dwarfdump", filename], stdout=f)
f.seek(0)
#print("Processing file: %s" % f.name)
dSYM = DIEDict()
dSYM.dSYM = dSYM
dSYM.sourceType = "dwarf"
dSYM.address = 0
dSYM.currentCompileUnit = None
dSYM.DIELookup = {}
dSYM.CompileUnits = []
dSYM.filename = filename
self.ReadDIEList(f, dSYM)
del dSYM.currentCompileUnit
f.close()
MakeAllVTables(dSYM)
return dSYM
#=========================================================================================
# Some (not all) stabs notes
"""
{name}:{symbol-descriptor}{type-information} The overall format of the string field for most stab types.
{name} can be omitted for unnamed types.
{symbol-descriptor} can be omitted for variables.
{type-information} is one of these:
({filenumber},{type-number})|{type-number} - A {type-number}.
{type-number} - A {type reference}, referring directly to a type that has already been defined.
{type-number}= - A {type definition}, where the number represents a new type which is about to be defined. The type definition may refer to other types by number, and those type numbers may be followed by ‘=’ and nested definitions. Also, the Lucid compiler will repeat ‘type-number=’ more than once if it wants to define several type numbers at once.
a{boundary} - {boundary} is an integer specifying the alignment. I assume it applies to all variables of this type.
p{integer} - Pointer class (for checking). Not sure what this means, or how integer is interpreted.
P - Indicate this is a packed type, meaning that structure fields or array elements are placed more closely in memory, to save memory at the expense of speed.
s{size} - Size in bits of a variable of this type. This is fully supported by GDB 4.11 and later.
S - Indicate that this type is a string instead of an array of characters, or a bitstring instead of a set. It doesn’t change the layout of the data being represented, but does enable the debugger to know which type it is.
V - Indicate that this type is a vector instead of an array. The only major difference between vectors and arrays is that vectors are passed by value instead of by reference (vector coprocessor extension).
Stab Symbol Types:
The following symbol types indicate that this is a stab. This is the full list of stab numbers, including stab types that are used in languages other than C.
0x20 N_GSYM - Global symbol; see Global Variables.
0x22 N_FNAME - Function name (for BSD Fortran); see Procedures.
0x24 N_FUN - Function name (see Procedures) or text segment variable (see Statics).
0x26 N_STSYM - Data segment file-scope variable; see Statics.
0x28 N_LCSYM - BSS segment file-scope variable; see Statics.
0x2a N_MAIN - Name of main routine; see Main Program.
0x2c N_ROSYM - Variable in .rodata section; see Statics.
0x30 N_PC - Global symbol (for Pascal); see N_PC.
0x32 N_NSYMS - Number of symbols (according to Ultrix V4.0); see N_NSYMS.
0x34 N_NOMAP - No DST map; see N_NOMAP.
0x36 N_MAC_DEFINE - Name and body of a #defined macro; see Macro define and undefine.
0x38 N_OBJ - Object file (Solaris2).
0x3a N_MAC_UNDEF - Name of an #undefed macro; see Macro define and undefine.
0x3c N_OPT - Debugger options (Solaris2).
0x40 N_RSYM - Register variable; see Register Variables.
0x42 N_M2C - Modula-2 compilation unit; see N_M2C.
0x44 N_SLINE - Line number in text segment; see Line Numbers.
0x46 N_DSLINE - Line number in data segment; see Line Numbers.
0x48 N_BSLINE - Line number in bss segment; see Line Numbers.
0x48 N_BROWS - Sun source code browser, path to .cb file; see N_BROWS.
0x4a N_DEFD - GNU Modula2 definition module dependency; see N_DEFD.
0x4c N_FLINE - Function start/body/end line numbers (Solaris2).
0x50 N_EHDECL - GNU C++ exception variable; see N_EHDECL.
0x50 N_MOD2 - Modula2 info "for imc" (according to Ultrix V4.0); see N_MOD2.
0x54 N_CATCH - GNU C++ catch clause; see N_CATCH.
0x60 N_SSYM - Structure of union element; see N_SSYM.
0x62 N_ENDM - Last stab for module (Solaris2).
0x64 N_SO - Path and name of source file; see Source Files.
0x80 N_LSYM - Stack variable (see Stack Variables) or type (see Typedefs).
0x82 N_BINCL - Beginning of an include file (Sun only); see Include Files.
0x84 N_SOL - Name of include file; see Include Files.
0xa0 N_PSYM - Parameter variable; see Parameters.
0xa2 N_EINCL - End of an include file; see Include Files.
0xa4 N_ENTRY - Alternate entry point; see Alternate Entry Points.
0xc0 N_LBRAC - Beginning of a lexical block; see Block Structure.
0xc2 N_EXCL - Place holder for a deleted include file; see Include Files.
0xc4 N_SCOPE - Modula2 scope information (Sun linker); see N_SCOPE.
0xe0 N_RBRAC - End of a lexical block; see Block Structure.
0xe2 N_BCOMM - Begin named common block; see Common Blocks.
0xe4 N_ECOMM - End named common block; see Common Blocks.
0xe8 N_ECOML - Member of a common block; see Common Blocks.
0xea N_WITH - Pascal with statement: type,,0,0,offset (Solaris2).
0xf0 N_NBTEXT - Gould non-base registers; see Gould.
0xf2 N_NBDATA - Gould non-base registers; see Gould.
0xf4 N_NBBSS - Gould non-base registers; see Gould.
0xf6 N_NBSTS - Gould non-base registers; see Gould.
0xf8 N_NBLCS - Gould non-base registers; see Gould.
Table of Symbol Descriptors:
{symbol-descriptor} The symbol descriptor is the character which follows the colon in many stabs, and which tells what kind of stab it is. See String Field, for more information about their use.
none - Variable on the stack; see Stack Variables.
: - C++ nested symbol; see See Nested Symbols.
a - Parameter passed by reference in register; see Reference Parameters.
b - Based variable; see Based Variables.
c - Constant; see Constants.
C - Conformant array bound (Pascal, maybe other languages); Conformant Arrays. Name of a caught exception (GNU C++). These can be distinguished because the latter uses N_CATCH and the former uses another symbol type.
d - Floating point register variable; see Register Variables.
D - Parameter in floating point register; see Register Parameters.
f - File scope function; see Procedures.
F - Global function; see Procedures.
G - Global variable; see Global Variables.
i - See Register Parameters.
I - Internal (nested) procedure; see Nested Procedures.
J - Internal (nested) function; see Nested Procedures.
L - Label name (documented by AIX, no further information known).
m - Module; see Procedures.
p - Argument list parameter; see Parameters.
pP - See Parameters.
pF - Fortran Function parameter; see Parameters.
P - Unfortunately, three separate meanings have been independently invented for this symbol descriptor. At least the GNU and Sun uses can be distinguished by the symbol type. Global Procedure (AIX) (symbol type used unknown); see Procedures. Register parameter (GNU) (symbol type N_PSYM); see Parameters. Prototype of function referenced by this file (Sun acc) (symbol type N_FUN).
Q - Static Procedure; see Procedures.
R - Register parameter; see Register Parameters.
r - Register variable; see Register Variables.
S - File scope variable; see Statics.
s - Local variable (OS9000).
t - Type name; see Typedefs.
T - Enumeration, structure, or union tag; see Typedefs.
v - Parameter passed by reference; see Reference Parameters.
V - Procedure scope static variable; see Statics.
x - Conformant array; see Conformant Arrays.
X - Function return variable; see Parameters.
Table of Type Descriptors:
The type descriptor is the character which follows the type number and an equals sign. It specifies what kind of type is being defined. See String Field, for more information about their use.
digit|( - Type reference; see String Field.
- - Reference to builtin type; see Negative Type Numbers.
# - Method (C++); see Method Type Descriptor.
* - Pointer; see Miscellaneous Types.
& - Reference (C++).
@ - Type Attributes (AIX); see String Field. Member (class and variable) type (GNU C++); see Member Type Descriptor.
a - Array; see Arrays.
A - Open array; see Arrays.
b - Pascal space type (AIX); see Miscellaneous Types. Builtin integer type (Sun); see Builtin Type Descriptors. Const and volatile qualified type (OS9000).
B - Volatile-qualified type; see Miscellaneous Types.
c - Complex builtin type (AIX); see Builtin Type Descriptors. Const-qualified type (OS9000).
C - COBOL Picture type. See AIX documentation for details.
d - File type; see Miscellaneous Types.
D - N-dimensional dynamic array; see Arrays.
e - Enumeration type; see Enumerations.
E - N-dimensional subarray; see Arrays.
f - Function type; see Function Types.
F - Pascal function parameter; see Function Types
g - Builtin floating point type; see Builtin Type Descriptors.
G - COBOL Group. See AIX documentation for details.
i - Imported type (AIX); see Cross-References. Volatile-qualified type (OS9000).
k - Const-qualified type; see Miscellaneous Types.
K - COBOL File Descriptor. See AIX documentation for details.
M - Multiple instance type; see Miscellaneous Types.
n - String type; see Strings.
N - Stringptr; see Strings.
o - Opaque type; see Typedefs.
p - Procedure; see Function Types.
P - Packed array; see Arrays.
r - Range type; see Subranges.
R - Builtin floating type; see Builtin Type Descriptors (Sun). Pascal subroutine parameter; see Function Types (AIX). Detecting this conflict is possible with careful parsing (hint: a Pascal subroutine parameter type will always contain a comma, and a builtin type descriptor never will).
s - Structure type; see Structures.
S - Set type; see Miscellaneous Types.
u - Union; see Unions.
v - Variant record. This is a Pascal and Modula-2 feature which is like a union within a struct in C. See AIX documentation for details.
w - Wide character; see Builtin Type Descriptors.
x - Cross-reference; see Cross-References.
Y - Used by IBM’s xlC C++ compiler (for structures, I think).
z - gstring; see Strings.
Cross-References to Other Types
x{s|u|e}{name}: - Another way is with the ‘x’ type descriptor, which is followed by ‘s’ for a structure tag, ‘u’ for a union tag, or ‘e’ for a enumerator tag, followed by the name of the tag, followed by ‘:’. If the name contains ‘::’ between a ‘<’ and ‘>’ pair (for C++ templates), such a ‘::’ does not end the name—only a single ‘:’ ends the name; see Nested Symbols.
Subrange Types:
r{type-information};{lower-bound};{upper-bound}; The ‘r’ type descriptor defines a type as a subrange of another type. It is followed by type information for the type of which it is a subrange, a semicolon, an integral lower bound, a semicolon, an integral upper bound, and a semicolon. The AIX documentation does not specify the trailing semicolon, in an effort to specify array indexes more cleanly, but a subrange which is not an array index has always included a trailing semicolon (see Arrays).
Instead of an integer, either bound can be one of the following:
-
A{offset} - The bound is passed by reference on the stack at offset offset from the argument list. See Parameters, for more information on such offsets.
T{offset} - The bound is passed by value on the stack at offset offset from the argument list.
a{register-number} - The bound is passed by reference in register number register-number.
t{register-number} - The bound is passed by value in register number register-number.
J - There is no bound.
Subranges are also used for builtin types; see Traditional Builtin Types.
Array Types:
Negative Type Numbers:
Here is the list of negative type numbers. The phrase integral type is used to mean twos-complement (I strongly suspect that all machines which use stabs use twos-complement; most machines use twos-complement these days).
-1 - int, 32 bit signed integral type.
-2 - char, 8 bit type holding a character. Both GDB and dbx on AIX treat this as signed. GCC uses this type whether char is signed or not, which seems like a bad idea. The AIX compiler (xlc) seems to avoid this type; it uses -5 instead for char.
-3 - short, 16 bit signed integral type.
-4 - long, 32 bit signed integral type.
-5 - unsigned char, 8 bit unsigned integral type.
-6 - signed char, 8 bit signed integral type.
-7 - unsigned short, 16 bit unsigned integral type.
-8 - unsigned int, 32 bit unsigned integral type.
-9 - unsigned, 32 bit unsigned integral type.
-10 - unsigned long, 32 bit unsigned integral type.
-11 - void, type indicating the lack of a value.
-12 - float, IEEE single precision.
-13 - double, IEEE double precision.
-14 - long double, IEEE double precision. The compiler claims the size will increase in a future release, and for binary compatibility you have to avoid using long double. I hope when they increase it they use a new negative type number.
-15 - integer. 32 bit signed integral type.
-16 - boolean. 32 bit type. GDB and GCC assume that zero is false, one is true, and other values have unspecified meaning. I hope this agrees with how the IBM tools use the type.
-17 - short real. IEEE single precision.
-18 - real. IEEE double precision.
-19 - stringptr. See Strings.
-20 - character, 8 bit unsigned character type.
-21 - logical*1, 8 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
-22 - logical*2, 16 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
-23 - logical*4, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
-24 - logical, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
-25 - complex. A complex type consisting of two IEEE single-precision floating point values.
-26 - complex. A complex type consisting of two IEEE double-precision floating point values.
-27 - integer*1, 8 bit signed integral type.
-28 - integer*2, 16 bit signed integral type.
-29 - integer*4, 32 bit signed integral type.
-30 - wchar. Wide character, 16 bits wide, unsigned (what format? Unicode?).
-31 - long long, 64 bit signed integral type.
-32 - unsigned long long, 64 bit unsigned integral type.
-33 - logical*8, 64 bit unsigned integral type.
-34 - integer*8, 64 bit signed integral type.
Miscellaneous Types:
b{type-information};bytes - Pascal space type. This is documented by IBM; what does it mean? This use of the ‘b’ type descriptor can be distinguished from its use for builtin integral types (see Builtin Type Descriptors) because the character following the type descriptor is always a digit, ‘(’, or ‘-’.
B{type-information} - A volatile-qualified version of type-information. This is a Sun extension. References and stores to a variable with a volatile-qualified type must not be optimized or cached; they must occur as the user specifies them.
d{type-information} - File of type type-information. As far as I know this is only used by Pascal.
k{type-information} - A const-qualified version of type-information. This is a Sun extension. A variable with a const-qualified type cannot be modified.
M{type-information};{length} - Multiple instance type. The type seems to composed of length repetitions of type-information, for example character*3 is represented by ‘M-2;3’, where ‘-2’ is a reference to a character type (see Negative Type Numbers). I’m not sure how this differs from an array. This appears to be a Fortran feature. length is a bound, like those in range types; see Subranges.
S{type-information} - Pascal set type. type-information must be a small type such as an enumeration or a subrange, and the type is a bitmask whose length is specified by the number of elements in type-information. In CHILL, if it is a bitstring instead of a set, also use the ‘S’ type attribute (see String Field).
*{type-information} - Pointer to type-information.
"""
#=========================================================================================
class stab_Reader:
stabRE = re.compile('\[ *(\d+)\] [0-9a-f]{8} ([0-9a-f]{2}) \( *(.+?) *\) ([0-9a-f]{2}) ([0-9a-f]{4}) ([0-9a-f]{16})(?: \'(.*)?\')?')
dirpathRE = re.compile('(.*)/$')
stringRE = re.compile('((?:[^:\n]|::)*):((?:pF|pP|Tt|[a-zA-Z])?)(.*)') # \2\t\1\t\t\t\t\t\3
typeRE = re.compile('(\(\d+,\d+\)|\d+)(=?)(.*)')
negativeTypeRE = re.compile('(-\d+);(.*)')
pointerRE = re.compile('([*&kB])(.*)')
methodRE = re.compile('([#f])(.*)')
parameterRE = re.compile('(;|,)(.*)')
methodpropertiesRE = re.compile(':([^;]+);([0-2])([A-D])(?:(\?)|(\.)|(\*)(\d+))(.*)')
attributeRE = re.compile('@(?:s(\d+);)(.*)')
enumerationRE = re.compile('e(.*)')
enumeratorRE = re.compile('(?:(;)|([^:]+):(-?\d+),)(.*)')
subrangeRE = re.compile('r(.*)')
subrangeLimitsRE = re.compile(';(-?\d+);(-?\d+);(.*)')
arrayRE = re.compile('a(.*)')
crossReferenceRE = re.compile('x([sue])((?:::|\w|\$)+):(.*)')
structRE = re.compile('([su])(\d+)(?:!(\d+),)?(.*)')
baseclassRE = re.compile('(?:(;)|(\d)(\d)(\d+),)(.*)')
fieldnameRE = re.compile('(?:(;)|(?:([^:\n]*)(?:(?:(::)|(:))(?:/([0-2]))?)))(.*)')
fieldlocationRE = re.compile('(?:(,)(\d+),(\d+)|:(\w+))(.*)')
methodEndRE = re.compile(';(.*)')
fieldEndRE = re.compile(';(.*)')
structContainingRE = re.compile("~%(.*)")
structContainingEndRE = re.compile(';(.*)')
path = None
currentfile = None
addr_size = 4
compilationUnitNumber = 0
def makeDIE(self, parent, Index, tag):
DIE = DIEDict()
DIE.dSYM = parent.dSYM
DIE.address = Index
DIE.parent = parent
DIE.AT_decl_file = self.currentfile
parent.AppendChild(DIE)
DIE.tag = tag
if DIE.tag == "TAG_compile_unit":
parent.dSYM.CompileUnits.append(DIE)
parent.dSYM.currentCompileUnit = DIE
DIE.addr_size = self.addr_size
self.compilationUnitNumber = self.compilationUnitNumber + 1
DIE.unit_number = self.compilationUnitNumber
else:
DIE.compile_unit = parent.dSYM.currentCompileUnit
return DIE
def parseStabType(self, leftDIE, parent, Index, tstr):
p = self.typeRE.match(tstr)
if p:
typenumber = ("%d %s" % (parent.dSYM.currentCompileUnit.unit_number, p.group(1)))
if typenumber in parent.dSYM.DIELookup:
DIE = parent.dSYM.DIELookup[typenumber]
if debug: dbgprint("found existing type «%s»" % (typenumber))
else:
DIE = self.makeDIE(parent, Index, "TAG_typedef")
DIE.firsttag = 1
DIE.Settypenumber(typenumber)
if leftDIE != None:
if not hasattr(DIE, "createdFrom"):
DIE.createdFrom = []
DIE.createdFrom.append(leftDIE)
if not hasattr(leftDIE, "creates"):
leftDIE.creates = []
leftDIE.creates.append(DIE)
if p.group(2) == "=":
if debug: dbgprint("{ = «%s» «%s» «%s»" % (p.group(1), p.group(2), p.group(3)))
typeDIE, remaining = self.parseStabType(DIE, parent, Index, p.group(3))
if leftDIE != None:
if hasattr(typeDIE, "iscrossreference"):
leftDIE.SetName(typeDIE.AT_name)
leftDIE.isnamefromcrossreference = 1
if debug: dbgprint("} =")
else:
remaining = p.group(3)
if leftDIE != None:
if hasattr(leftDIE, "AT_type") and leftDIE.AT_type != typenumber:
print("Error: [%d] Left DIE (%s) type (%s) changed to (%s): %s" % (Index, leftDIE.typenumber, leftDIE.AT_type, typenumber, tstr))
if leftDIE.typenumber == typenumber:
if debug: dbgprint("Warning: [%d] Type «%s» assumed to be void" % (Index, typenumber))
leftDIE.isvoid = 1
leftDIE.SetName("void")
else:
leftDIE.AT_type = typenumber
return DIE, remaining
if leftDIE != None:
if debug: dbgprint("{ leftDIE «%s»" % tstr)
remaining = tstr
while True:
p = self.attributeRE.match(remaining)
if p:
bits = int(p.group(1), 10)
if bits & 7 == 0:
leftDIE.AT_byte_size = bits >> 3
else:
leftDIE.AT_bit_size = bits
remaining = p.group(2)
else:
break
p = self.negativeTypeRE.match(remaining)
if p:
leftDIE.SetTag("TAG_base_type")
leftDIE.stabsbasetype = int(p.group(1), 10)
if 1 == 0:
pass
#elif leftDIE.stabsbasetype == -1 : # int, 32 bit signed integral type.
#elif leftDIE.stabsbasetype == -2 : # char, 8 bit type holding a character. Both GDB and dbx on AIX treat this as signed. GCC uses this type whether char is signed or not, which seems like a bad idea. The AIX compiler (xlc) seems to avoid this type; it uses -5 instead for char.
#elif leftDIE.stabsbasetype == -3 : # short, 16 bit signed integral type.
#elif leftDIE.stabsbasetype == -4 : # long, 32 bit signed integral type.
#elif leftDIE.stabsbasetype == -5 : # unsigned char, 8 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -6 : # signed char, 8 bit signed integral type.
#elif leftDIE.stabsbasetype == -7 : # unsigned short, 16 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -8 : # unsigned int, 32 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -9 : # unsigned, 32 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -10: # unsigned long, 32 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -11: # void, type indicating the lack of a value.
#elif leftDIE.stabsbasetype == -12: # float, IEEE single precision.
#elif leftDIE.stabsbasetype == -13: # double, IEEE double precision.
#elif leftDIE.stabsbasetype == -14: # long double, IEEE double precision. The compiler claims the size will increase in a future release, and for binary compatibility you have to avoid using long double. I hope when they increase it they use a new negative type number.
#elif leftDIE.stabsbasetype == -15: # integer. 32 bit signed integral type.
elif leftDIE.stabsbasetype == -16: # boolean. 32 bit type. GDB and GCC assume that zero is false, one is true, and other values have unspecified meaning. I hope this agrees with how the IBM tools use the type.
leftDIE.AT_encoding = "ATE_boolean"
leftDIE.SetName("bool")
#leftDIE.AT_byte_size = 1
#elif leftDIE.stabsbasetype == -17: # short real. IEEE single precision.
#elif leftDIE.stabsbasetype == -18: # real. IEEE double precision.
#elif leftDIE.stabsbasetype == -19: # stringptr. See Strings.
#elif leftDIE.stabsbasetype == -20: # character, 8 bit unsigned character type.
#elif leftDIE.stabsbasetype == -21: # logical*1, 8 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
#elif leftDIE.stabsbasetype == -22: # logical*2, 16 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
#elif leftDIE.stabsbasetype == -23: # logical*4, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
#elif leftDIE.stabsbasetype == -24: # logical, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean.
#elif leftDIE.stabsbasetype == -25: # complex. A complex type consisting of two IEEE single-precision floating point values.
#elif leftDIE.stabsbasetype == -26: # complex. A complex type consisting of two IEEE double-precision floating point values.
#elif leftDIE.stabsbasetype == -27: # integer*1, 8 bit signed integral type.
#elif leftDIE.stabsbasetype == -28: # integer*2, 16 bit signed integral type.
#elif leftDIE.stabsbasetype == -29: # integer*4, 32 bit signed integral type.
#elif leftDIE.stabsbasetype == -30: # wchar. Wide character, 16 bits wide, unsigned (what format? Unicode?).
#elif leftDIE.stabsbasetype == -31: # long long, 64 bit signed integral type.
#elif leftDIE.stabsbasetype == -32: # unsigned long long, 64 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -33: # logical*8, 64 bit unsigned integral type.
#elif leftDIE.stabsbasetype == -34: # integer*8, 64 bit signed integral type.
else:
print("Error: unknown negative type «%s»" % p.group(1))
if debug: dbgprint("} leftDIE negativeType")
return None, p.group(2)
p = self.pointerRE.match(remaining)
if p:
DIE, remaining = self.parseStabType(None, parent, Index, p.group(2))
if DIE == None:
print("Error: No type for %s type: %d %s" % (p.group(1), Index, remaining))
leftDIE.SetType(DIE.typenumber)
if p.group(1) == "*":
leftDIE.SetTag("TAG_pointer_type")
elif p.group(1) == "&":
leftDIE.SetTag("TAG_reference_type")
elif p.group(1) == "k":
leftDIE.SetTag("TAG_const_type")
elif p.group(1) == "B":
leftDIE.SetTag("TAG_volatile_type")
if debug: dbgprint("} leftDIE %s" % leftDIE.tag)
return None, remaining
p = self.methodRE.match(remaining)
if p:
classDIE, remaining = self.parseStabType(None, parent, Index, p.group(2))
if classDIE == None:
print("Error: No type for method: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} leftDIE")
return None, remaining
if p.group(1) == "#":
if debug: dbgprint("{ ismethod «%s»" % (leftDIE.typenumber))
leftDIE.ismethod = 1
leftDIE.methodClassDIE = classDIE
leftDIE.SetTag("TAG_subprogram")
#if not hasattr(classDIE, "pointerclasstypenumber"):
# pointerclasstypenumber = ("* %s" % classDIE.typenumber)
# pointerclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type")
# pointerclassDIE.Settypenumber(pointerclasstypenumber)
# pointerclassDIE.AT_type = classDIE.typenumber
# classDIE.pointerclasstypenumber = pointerclasstypenumber
#
#artificalDIE = self.makeDIE(leftDIE, Index, "TAG_formal_parameter")
#artificalDIE.AT_type = classDIE.pointerclasstypenumber
#artificalDIE.AT_artificial = 1 # the this parameter is artificial
#leftDIE.artificalDIE = artificalDIE
numParameters = 0
while True:
q = self.parameterRE.match(remaining)
if not q:
print("Error: Expected , or ; : [%d] «%s»" % (Index, remaining))
break
if q.group(1) == ";":
remaining = q.group(2)
break
parametertypeDIE, remaining = self.parseStabType(None, parent, Index, q.group(2))
if parametertypeDIE == None:
print("Error: Expected parameter type: [%d] «%s»" % (Index, q.group(2)))
break
if numParameters == 0:
leftDIE.AT_type = parametertypeDIE.typenumber
else:
parameterDIE = self.makeDIE(leftDIE, Index, "TAG_formal_parameter")
parameterDIE.AT_type = parametertypeDIE.typenumber
parameterDIE.parameterNumber = numParameters
if leftDIE.ismethod and numParameters == 1:
parameterDIE.AT_artificial = 1
leftDIE.artificalDIE = parameterDIE
numParameters += 1
if numParameters < 1:
print("Error: Expected return type: [%d] «%s»" % (Index, remaining))
elif numParameters < 2:
print("Error: Expected artifical parameter: [%d] «%s»" % (Index, remaining))
elif not hasattr(parametertypeDIE, "isvoid"):
#print("Error: Expected terminating void: [%d] «%s»" % (Index, remaining))
pass
else:
leftDIE.children.pop()
if debug: dbgprint("} ismethod")
else: # p.group(1) == "f"
if debug: dbgprint("{ isstaticmethod «%s»" % (leftDIE.typenumber))
leftDIE.isstaticmethod = 1
leftDIE.AT_type = classDIE.typenumber
leftDIE.SetTag("TAG_subroutine_type")
if debug: dbgprint("} isstaticmethod")
if debug: dbgprint("} leftDIE")
return None, remaining
p = self.enumerationRE.match(remaining)
if p:
leftDIE.SetTag("TAG_enumeration_type")
if not hasattr(leftDIE, "AT_byte_size"):
leftDIE.AT_byte_size = 1
leftDIE.enumsizemin = 1
leftDIE.enumsizemax = 16
remaining = p.group(1)
while True:
q = self.enumeratorRE.match(remaining)
if q:
remaining = q.group(4)
if q.group(1) == ";":
break
DIE = self.makeDIE(leftDIE, Index, "TAG_enumerator")
DIE.SetName(q.group(2))
if q.group(3)[0] == "0":
DIE.AT_const_value = int(q.group(3), 8)
else:
DIE.AT_const_value = int(q.group(3), 10)
if hasattr(leftDIE, "enumsizemin"):
if leftDIE.AT_byte_size <= 1 and DIE.AT_const_value >= -0x80 and DIE.AT_const_value <= 0x7f:
leftDIE.AT_byte_size = 1
leftDIE.enumsizemin = 1
elif leftDIE.AT_byte_size <= 2 and DIE.AT_const_value >= -0x8000 and DIE.AT_const_value <= 0x7fff:
leftDIE.AT_byte_size = 2
leftDIE.enumsizemin = 2
elif leftDIE.AT_byte_size <= 4 and DIE.AT_const_value >= -0x80000000 and DIE.AT_const_value <= 0x7fffffff:
leftDIE.AT_byte_size = 4
leftDIE.enumsizemin = 4
elif leftDIE.AT_byte_size <= 8 and DIE.AT_const_value >= -0x8000000000000000 and DIE.AT_const_value <= 0x7fffffffffffffff:
leftDIE.AT_byte_size = 8
leftDIE.enumsizemin = 8
else:
print("Error: Expected enumerator: [%d] «%s»" % (Index, remaining))
break
if debug: dbgprint("} leftDIE")
return None, remaining
p = self.subrangeRE.match(remaining)
if p:
if debug: dbgprint("{ subrange type: [%d] «%s»" % (Index, p.group(1)))
leftDIE.SetTag("TAG_subrange_type")
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(1))
if typeDIE == None:
print("Error: Expected subrange type: [%d] «%s»" % (Index, p.group(1)))
if debug: dbgprint("} subrange type")
if debug: dbgprint("} leftDIE")
return None, remaining
if leftDIE.typenumber == typeDIE.typenumber:
leftDIE.selfReference = 1
else:
leftDIE.AT_type = typeDIE.typenumber
boundsDIE = leftDIE
# This block matches array sub range block below [
q = self.subrangeLimitsRE.match(remaining)
if not q:
print("Error: Expected subrange limits: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} subrange type error")
if debug: dbgprint("} leftDIE")
return None, remaining
if q.group(1)[0] == "0" or (q.group(1)[0] == "-" and q.group(1)[1] == "0"):
boundsDIE.AT_lower_bound = int(q.group(1), 8)
else:
boundsDIE.AT_lower_bound = int(q.group(1), 10)
if q.group(2)[0] == "0" or (q.group(2)[0] == "-" and q.group(2)[1] == "0"):
boundsDIE.AT_upper_bound = int(q.group(2), 8)
else:
boundsDIE.AT_upper_bound = int(q.group(2), 10)
if boundsDIE.AT_lower_bound > boundsDIE.AT_upper_bound:
if boundsDIE.AT_lower_bound == 0x8000000000000000:
# 64-bit lower bound
boundsDIE.AT_lower_bound = -0x8000000000000000
elif boundsDIE.AT_lower_bound == 0 and boundsDIE.AT_upper_bound == -1:
# unbounded array []
del boundsDIE.AT_lower_bound
del boundsDIE.AT_upper_bound
boundsDIE.unbounded = 1
# ]
elif boundsDIE.AT_lower_bound > 0 and boundsDIE.AT_upper_bound == 0:
# floating point number
boundsDIE.tag = "TAG_base_type"
boundsDIE.AT_encoding = "ATE_float"
boundsDIE.AT_byte_size = boundsDIE.AT_lower_bound
boundsDIE.floatAtType = boundsDIE.AT_type
del boundsDIE.AT_lower_bound
del boundsDIE.AT_upper_bound
del boundsDIE.AT_type
else:
print("Error: Expected subrange type lower bound %d to be less than upper bound %d: [%d] «%s»" % (boundsDIE.AT_lower_bound, boundsDIE.AT_upper_bound, Index, remaining))
if hasattr(leftDIE, "selfReference"):
leftDIE.tag = "TAG_base_type"
if leftDIE.AT_lower_bound < 0:
leftDIE.AT_encoding = "ATE_signed"
if leftDIE.AT_lower_bound >= -0x80 and leftDIE.AT_upper_bound <= 0x7f:
leftDIE.AT_encoding = "ATE_signed_char"
leftDIE.AT_byte_size = 1
elif leftDIE.AT_lower_bound >= -0x8000 and leftDIE.AT_upper_bound <= 0x7fff:
leftDIE.AT_byte_size = 2
elif leftDIE.AT_lower_bound >= -0x80000000 and leftDIE.AT_upper_bound <= 0x7fffffff:
leftDIE.AT_byte_size = 4
elif leftDIE.AT_lower_bound >= -0x8000000000000000 and leftDIE.AT_upper_bound <= 0x7fffffffffffffff:
leftDIE.AT_byte_size = 8
else:
leftDIE.AT_encoding = "ATE_unsigned"
if leftDIE.AT_upper_bound <= 0xff:
leftDIE.AT_encoding = "ATE_unsigned_char"
leftDIE.AT_byte_size = 1
elif leftDIE.AT_upper_bound <= 0xffff:
leftDIE.AT_byte_size = 2
elif leftDIE.AT_upper_bound <= 0xffffffff:
leftDIE.AT_byte_size = 4
elif leftDIE.AT_upper_bound <= 0xffffffffffffffff:
leftDIE.AT_byte_size = 8
elif leftDIE.tag != "TAG_base_type":
print("Error: Expected subrange type to be a base type: [%d] «%s»" % (Index, remaining))
remaining = q.group(3)
if debug: dbgprint("} subrange type")
if debug: dbgprint("} leftDIE")
return None, remaining
# arrayRE = re.compile('a(.*)')
p = self.arrayRE.match(remaining)
if p:
if debug: dbgprint("{ array type: [%d] «%s»" % (Index, p.group(1)))
leftDIE.SetTag("TAG_array_type")
q = self.subrangeRE.match(p.group(1))
if not q:
print("Error: Expected array subrange: [%d] «%s»" % (Index, p.group(1)))
if debug: dbgprint("} array type error 1")
if debug: dbgprint("} leftDIE")
return None, remaining
boundsDIE = self.makeDIE(leftDIE, Index, "TAG_subrange_type")
arrayNdxTypeDIE, remaining = self.parseStabType(None, boundsDIE, Index, q.group(1)) # leftDIE, parent, Index, tstr
if arrayNdxTypeDIE == None:
print("Error: Expected array subrange type: [%d] «%s»" % (Index, q.group(1)))
if debug: dbgprint("} array type error 2")
if debug: dbgprint("} leftDIE")
return None, remaining
boundsDIE.AT_type = arrayNdxTypeDIE.typenumber
# This block matches subrange block above [
q = self.subrangeLimitsRE.match(remaining)
if not q:
print("Error: Expected subrange limits: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} array type error 3")
if debug: dbgprint("} leftDIE")
return None, remaining
if q.group(1)[0] == "0" or (q.group(1)[0] == "-" and q.group(1)[1] == "0"):
boundsDIE.AT_lower_bound = int(q.group(1), 8)
else:
boundsDIE.AT_lower_bound = int(q.group(1), 10)
if q.group(2)[0] == "0" or (q.group(2)[0] == "-" and q.group(2)[1] == "0"):
boundsDIE.AT_upper_bound = int(q.group(2), 8)
else:
boundsDIE.AT_upper_bound = int(q.group(2), 10)
if boundsDIE.AT_lower_bound > boundsDIE.AT_upper_bound:
if boundsDIE.AT_lower_bound == 0x8000000000000000:
# 64-bit lower bound
boundsDIE.AT_lower_bound = -0x8000000000000000
elif boundsDIE.AT_lower_bound == 0 and boundsDIE.AT_upper_bound == -1:
# unbounded array []
del boundsDIE.AT_lower_bound
del boundsDIE.AT_upper_bound
boundsDIE.unbounded = 1
# ]
else:
print("Error: Expected subrange type lower bound %d to be less than upper bound %d: [%d] «%s»" % (boundsDIE.AT_lower_bound, boundsDIE.AT_upper_bound, Index, remaining))
elif boundsDIE.AT_lower_bound == 0:
boundsDIE.AT_count = boundsDIE.AT_upper_bound + 1
del boundsDIE.AT_lower_bound
del boundsDIE.AT_upper_bound
remaining = q.group(3)
typeDIE, remaining = self.parseStabType(None, parent, Index, remaining)
if typeDIE == None:
print("Error: Expected array element type: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} array type error 4")
if debug: dbgprint("} leftDIE")
return None, remaining
if typeDIE.tag == "TAG_array_type":
# handle array of array
arrayNdx2DIE = DIEDict()
arrayNdx2DIE.__dict__.update(typeDIE.children[0].__dict__)
arrayNdx2DIE.parent = leftDIE
leftDIE.children.append(arrayNdx2DIE)
leftDIE.AT_type = typeDIE.AT_type
else:
leftDIE.AT_type = typeDIE.typenumber
if debug: dbgprint("} array type")
if debug: dbgprint("} leftDIE")
return None, remaining
p = self.crossReferenceRE.match(remaining)
if p:
if debug: dbgprint("{ cross reference -- [%d] type:«%s» name:«%s» remaining:«%s»" % (Index, p.group(1), p.group(2), p.group(3)))
if p.group(1) == "s":
leftDIE.SetTag("TAG_structure_type")
elif p.group(1) == "u":
leftDIE.SetTag("TAG_union_type")
elif p.group(1) == "e":
leftDIE.SetTag("TAG_enumeration_type")
leftDIE.SetSize(4)
leftDIE.SetName(p.group(2))
leftDIE.iscrossreference = 1
if debug: dbgprint("} cross reference")
if debug: dbgprint("} leftDIE")
return leftDIE, p.group(3)
p = self.structRE.match(remaining)
if p:
if debug: dbgprint("{ struct -- type:«%s» size:«%s» numBaseClasses:«%s» remaining:«%s»" % (p.group(1), p.group(2), p.group(3), p.group(4)))
if p.group(1) == "s":
leftDIE.SetTag("TAG_structure_type")
else:
leftDIE.SetTag("TAG_union_type")
leftDIE.AT_byte_size = int(p.group(2), 10)
remaining = p.group(4)
if p.group(3) != None:
if debug: dbgprint("{ baseclasses")
numBaseClasses = int(p.group(3), 10)
numFoundBaseClasses = 0
#baseclassRE = re.compile('(?:(;)|(\d)(\d)(\d+),)(.*)')
while True:
if debug: dbgprint("{ baseclassRE")
q = self.baseclassRE.match(remaining)
if not q:
print("Error: Expected base class: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} baseclassRE break 1")
break
if q.group(1) == ";":
remaining = q.group(5)
if debug: dbgprint("} baseclassRE break 2")
break
if debug: dbgprint("got base class -- last:«%s» virtuality:«%s» access:«%s» location:«%s» remaining:«%s»" % (q.group(1), q.group(2), q.group(3), q.group(4), q.group(5)))
numFoundBaseClasses += 1
DIE = self.makeDIE(leftDIE, Index, "TAG_inheritance")
if q.group(2) == "1":
DIE.AT_virtuality = "VIRTUALITY_virtual"
elif q.group(2) == "0":
pass
else:
print("Error: Unexpected virtuality: [%d] «%s» «%s»" % (Index, q.group(2), remaining))
if q.group(3) == "2":
DIE.AT_accessibility = "ACCESS_public"
elif q.group(3) == "1":
DIE.AT_accessibility = "ACCESS_protected"
elif q.group(3) == "0":
DIE.AT_accessibility = "ACCESS_private"
else:
print("Error: Unexpected field accessibility: [%d] «%s» «%s»" % (Index, q.group(2), remaining))
DIE.AT_data_member_location = int(q.group(4), 10)
typeDIE, remaining = self.parseStabType(None, leftDIE, Index, q.group(5))
DIE.AT_type = typeDIE.typenumber
if debug: dbgprint("{ UpdateBaseClassLists")
DIE.UpdateBaseClassLists()
if debug: dbgprint("} UpdateBaseClassLists")
if debug: dbgprint("} baseclassRE")
if numFoundBaseClasses > numBaseClasses:
print("Error: Unexpected number of listed base classes: [%d] «%s» «%s»" % (Index, q.group2, remaining))
if debug: dbgprint("} baseclasses")
if debug: dbgprint("{ members")
while True:
if debug: dbgprint("{ member «%s»" % remaining)
q = self.fieldnameRE.match(remaining)
if not q:
print("Error: Expected field name: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} member error 1")
break
# 1 2 3 4 5 6
# (?:(;)|(?:([^:\n]*)(?:(?:(::)|(:))(?:/([0-2]))?)))(.*)
if debug: dbgprint("got field -- last:«%s» name:«%s» method:«%s» field:«%s» access:«%s» remaining:«%s»" % (q.group(1), q.group(2), q.group(3), q.group(4), q.group(5), q.group(6)))
remaining = q.group(6)
if q.group(1) == ";":
if debug: dbgprint("} member simple")
break
# we can use the same name for multiple fields
if debug: dbgprint("{ names")
while len(remaining) > 0 and (remaining[0] != ";"):
if q.group(3) == "::":
if debug: dbgprint("{ method")
DIE, remaining = self.parseStabType(None, leftDIE, Index, remaining)
if DIE == None:
print("Error: Expected type «%s»" % remaining)
if debug: dbgprint("} method break 1")
break
DIE.tag = "TAG_subprogram"
if debug: dbgprint("Duplicating: [%d] «%s»" % (Index, q.group(6)))
if debug: DumpDIE(DIE,0)
newDIE = DIEDict()
newDIE.__dict__.update(DIE.__dict__)
#DIE.copy()) # need a new DIE for a different method wth same parameters
if hasattr(DIE, "AT_name"):
del newDIE.AT_name
newDIE.duplicateof = DIE
del newDIE.typenumber
DIE = newDIE
newDIE.parent.AppendChild(newDIE)
DIE.SetName(q.group(2))
DIE.isclassmethod = 1
if debug: dbgprint("newDIE:")
if debug: DumpDIE(DIE,0)
# 1 2 3 4 5 6 7 8
# :([^;]+);([0-2])([A-D])(?:(\?)|(\.)|(\*)(\d+))(.*)
r = self.methodpropertiesRE.match(remaining)
if not r:
print("Error: Expected method properties: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} method break 2")
break
if debug: dbgprint ("got method properties -- linkage:«%s» access:«%s» modifier«%s» static:«%s» method:«%s» virtual:«%s» location:«%s» remaining:«%s»" % (r.group(1), r.group(2), r.group(3), r.group(4), r.group(5), r.group(6), r.group(7), r.group(8)))
DIE.AT_MIPS_linkage_name = r.group(1)
if r.group(2) == "2":
pass
# DIE.AT_accessibility = "ACCESS_public"
elif r.group(2) == "1":
DIE.AT_accessibility = "ACCESS_protected"
elif r.group(2) == "0":
DIE.AT_accessibility = "ACCESS_private"
# method properties (static/method/virtual):
if r.group(6) == "*" and hasattr(DIE, "ismethod"):
DIE.AT_virtuality = "VIRTUALITY_virtual"
DIE.AT_vtable_elem_location = int(r.group(7),10)
s = self.methodEndRE.match(r.group(8))
if not s:
print("Error: Expected ; before virtual method containing type: [%d] «%s»" % (Index, r.group(8)))
if debug: dbgprint("} method break 3")
break
containingDIE, remaining = self.parseStabType(None, parent, Index, s.group(1))
DIE.AT_containing_type = containingDIE.typenumber
if containingDIE.typenumber != leftDIE.typenumber:
print("Error: containing type «%s» ≠ struct type «%s»" % (containingDIE.typenumber, leftDIE.typenumber))
s = self.methodEndRE.match(remaining)
if not s:
print("Error: Expected ; after virtual method containing type: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} method break 4")
break
remaining = s.group(1)
if not hasattr(DIE.parent, "VTableEntries"):
DIE.parent.VTableEntries = {}
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries:
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName()
namenew = DIE.GetName()
if namenew != nameold:
print("Error: %s has duplicate VTableEntries 0x%x %s%s %s%s" % (DIE.parent.GetAddress(), DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetAddress(), nameold, DIE.GetAddress(), namenew))
else:
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE
else:
if r.group(4) == "?":
if hasattr(DIE, "isstaticmethod"):
pass
elif hasattr(DIE, "ismethod"):
print("Error: Invalid method properties. Expectining a static method but got a regular method: [%d] «%s»" % (Index, remaining))
else:
DIE.isstaticmethod = 1
DIE.staticfrommethodproperties = 1
elif r.group(5) == ".":
if hasattr(DIE, "ismethod"):
pass
elif hasattr(DIE, "isstaticmethod"):
print("Error: Invalid method properties. Expectining a regular method but got a static method: [%d] «%s»" % (Index, remaining))
else:
DIE.ismethod = 1
DIE.methodfrommethodproperties = 1
else:
print("Error: Invalid method properties: [%d] «%s»" % (Index, remaining))
if debug: DumpDIE(DIE, 0)
if debug: dbgprint("} method break 5")
break
remaining = r.group(8)
# method modifier (A,B,C,D):
#if r.group(3) == "A":
# pass
#else:
# classDIE = DIE.dSYM.DIELookup[leftDIE.typenumber] #classtypenumber
#
# if r.group(3) == "B":
# if not hasattr(leftDIE, "pointerconstclasstypenumber"):
# constclasstypenumber = ("c %s" % classDIE.typenumber)
# constclassDIE = self.makeDIE(parent.parent, Index, "TAG_const_type")
# constclassDIE.Settypenumber(constclasstypenumber)
# constclassDIE.AT_type = classDIE.typenumber
#
# pointerconstclasstypenumber = ("* %s" % constclassDIE.typenumber)
# pointerconstclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type")
# pointerconstclassDIE.Settypenumber(pointerconstclasstypenumber)
# pointerconstclassDIE.AT_type = constclassDIE.typenumber
# classDIE.pointerconstclasstypenumber = pointerconstclasstypenumber
#
# DIE.artificalDIE.AT_type = classDIE.pointerconstclasstypenumber
# elif r.group(3) == "C":
# if not hasattr(leftDIE, "pointervolatileclasstypenumber"):
# volatileclasstypenumber = ("v %s" % classDIE.typenumber)
# volatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_volatile_type")
# volatileclassDIE.Settypenumber(volatileclasstypenumber)
# volatileclassDIE.AT_type = classDIE.typenumber
#
# pointervolatileclasstypenumber = ("* %s" % volatileclassDIE.typenumber)
# pointervolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type")
# pointervolatileclassDIE.Settypenumber(pointervolatileclasstypenumber)
# pointervolatileclassDIE.AT_type = volatileclassDIE.typenumber
# classDIE.pointervolatileclasstypenumber = pointervolatileclasstypenumber
#
# DIE.artificalDIE.AT_type = classDIE.pointervolatileclasstypenumber
# elif r.group(3) == "D":
# if not hasattr(leftDIE, "pointerconstvolatileclasstypenumber"):
# volatileclasstypenumber = ("v %s" % classDIE.typenumber)
# volatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_volatile_type")
# volatileclassDIE.Settypenumber(volatileclasstypenumber)
# volatileclassDIE.AT_type = classDIE.typenumber
#
# constvolatileclasstypenumber = ("c %s" % volatileclassDIE.typenumber)
# constvolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_const_type")
# constvolatileclassDIE.Settypenumber(constvolatileclasstypenumber)
# constvolatileclassDIE.AT_type = volatileclassDIE.typenumber
#
# pointerconstvolatileclasstypenumber = ("* %s" % constvolatileclassDIE.typenumber)
# pointerconstvolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type")
# pointerconstvolatileclassDIE.Settypenumber(typenumber)
# pointerconstvolatileclassDIE.AT_type = classDIE.typenumber
# classDIE.pointerconstvolatileclasstypenumber = pointerconstvolatileclasstypenumber
#
# DIE.artificalDIE.AT_type = classDIE.pointerconstvolatileclasstypenumber
if debug: dbgprint("} method")
elif q.group(4) == ":":
if debug: dbgprint("{ field")
DIE = self.makeDIE(leftDIE, Index, "TAG_member")
DIE.SetName(q.group(2))
if q.group(5) == "2":
pass
# DIE.AT_accessibility = "ACCESS_public"
elif q.group(5) == "1":
DIE.AT_accessibility = "ACCESS_protected"
elif q.group(5) == "0":
DIE.AT_accessibility = "ACCESS_private"
elif q.group(5) != None:
print("Error: Unexpected member accessibility: [%d] «%s» «%s»" % (Index, q.group(5), remaining))
typeDIE, remaining = self.parseStabType(None, leftDIE, Index, q.group(6))
DIE.AT_type = typeDIE.typenumber
q = self.fieldlocationRE.match(remaining)
if q:
if q.group(1) == ",":
bits = int(q.group(3), 10)
if (bits == 0):
if debug: dbgprint("got 0 size field: [%d]" % (Index))
leftDIE.children.pop()
else:
location = int(q.group(2), 10)
if (location & 7) or (bits & 7):
DIE.AT_data_bit_offset = location
DIE.AT_bit_size = bits
else:
DIE.AT_data_member_location = location >> 3
DIE.AT_byte_size = bits >> 3
DIE.checkmembersize = 1
if not hasattr(leftDIE, "Fields"):
leftDIE.Fields = []
leftDIE.Fields.append(DIE)
if hasattr(DIE, "AT_name") and "vptr$" in DIE.AT_name:
DIE.AT_artificial = 1
else: # == ":":
# :{variable_name} for static members
if debug: dbgprint("got static variable -- linkage:«%s» remaining:«%s»" % (q.group(4), remaining))
DIE.tag = "TAG_variable" # don't call SetTag because we are changing the tag from "TAG_member"
DIE.AT_MIPS_linkage_name = q.group(4)
remaining = q.group(5)
else:
print("Error: Expected field location: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} field break")
break
if debug: dbgprint("} field")
if debug: dbgprint("} names")
s = self.fieldEndRE.match(remaining)
if not s:
print("Error: Expected ; at end of field: [%d] «%s»" % (Index, remaining))
if debug: dbgprint("} member break 4")
break
else:
remaining = s.group(1)
if debug: dbgprint("} member")
if debug: dbgprint("} members")
#structContainingRE = re.compile("~%(.*)")
#structContainingEndRE = re.compile(';(.*)')
s = self.structContainingRE.match(remaining)
if s:
structContainingDIE, remaining = self.parseStabType(None, parent, Index, s.group(1))
leftDIE.AT_containing_type = structContainingDIE.typenumber
t = self.structContainingEndRE.match(remaining)
if not t:
print("Error: Expected ; at end of containing class: [%d] «%s»" % (Index, remaining))
#break
remaining = t.group(1)
if debug: dbgprint("} struct")
if debug: dbgprint("} leftDIE")
return None, remaining
print("Error: Uknown type: [%d] «%s»" % (Index, tstr))
return None, tstr
def Readstabs(self, f, parent):
for line in f:
if debug: dbgprint("%.*s" % (len(line) - 1, line))
s = self.stabRE.match(line)
if s:
Index = int(s.group(1), 10)
n_type = int(s.group(2), 16)
n_type_str = s.group(3)
n_sect = int(s.group(4), 16)
n_desc = int(s.group(5), 16)
n_value = int(s.group(6), 16)
n_str = s.group(7)
remaining = ""
if n_type == 0x64: # N_SO
if n_str == None:
if parent.tag != "TAG_compile_unit":
if parent.tag == "TAG_include" and parent.dSYM.currentCompileUnit.AT_name == parent.AT_name:
#print("Warning: parent tag (%s) is not TAG_compile_unit: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
pass
else:
print("Error: parent tag (%s) is not TAG_compile_unit: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
parent.dSYM.popuntiltag = "TAG_compile_unit"
break
else:
p = self.dirpathRE.match(n_str)
if p:
if self.path != None or parent.dSYM.currentCompileUnit != None:
print("Error: path (%s) or currentCompileUnit (%s) is already set: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (self.path, "yes" if parent.dSYM.currentCompileUnit != None else "None", Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
self.path = p.group(1)
if debug: dbgprint("[%d] Path: «%s»" % (Index, self.path))
else:
if parent.dSYM.currentCompileUnit != None:
print("Error: starting new currentCompileUnit when previous hasn't been finished yet: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
DIE = self.makeDIE(parent, Index, "TAG_compile_unit")
self.currentfile = n_str
DIE.SetName(n_str)
DIE.AT_comp_dir = self.path
if n_desc == 0: pass
elif n_desc == 0x1: DIE.AT_language = "N_SO_AS" # Assembly language
elif n_desc == 0x2: DIE.AT_language = "N_SO_C" # K&R traditional C
elif n_desc == 0x3: DIE.AT_language = "N_SO_ANSI_C" # ANSI C
elif n_desc == 0x4: DIE.AT_language = "N_SO_CC" # C++
elif n_desc == 0x5: DIE.AT_language = "N_SO_FORTRAN" # Fortran
elif n_desc == 0x6: DIE.AT_language = "N_SO_PASCAL" # Pascal
elif n_desc == 0x7: DIE.AT_language = "N_SO_FORTRAN90" # Fortran90
elif n_desc == 0x32: DIE.AT_language = "N_SO_OBJC" # Objective-C
elif n_desc == 0x33: DIE.AT_language = "N_SO_OBJCPLUS" # Objective-C++
else: print("Error: [%d] Unknown souce language %x" % (Index, n_desc))
self.Readstabs(f, DIE)
parent.dSYM.currentCompileUnit = None
self.path = None
self.currentfile = None
elif n_type == 0x66: # N_OSO
if n_str != None:
if parent.dSYM.currentCompileUnit != None:
parent.dSYM.currentCompileUnit.objectfile = n_str
elif n_type == 0x84: # N_SOL
self.currentfile = n_str
elif n_type == 0x82: # N_BINCL
DIE = self.makeDIE(parent, Index, "TAG_include") # fake dwarf tag
savecurrentfile = self.currentfile
self.currentfile = n_str
DIE.AT_name = n_str
self.Readstabs(f, DIE)
self.currentfile = savecurrentfile
elif n_type == 0xa2: # N_EINCL
if parent.tag != "TAG_include":
print("Error: parent tag (%s) is not TAG_include: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
parent.dSYM.popuntiltag = "TAG_include"
break
elif n_type == 0xc2: # N_EXCL
DIE = self.makeDIE(parent, Index, "TAG_exclude") # fake dwarf tag
DIE.AT_name = n_str
elif n_type == 0x2e: # N_BNSYM
DIE = self.makeDIE(parent, Index, "TAG_symbol") # fake dwarf tag
self.Readstabs(f, DIE)
elif n_type == 0x4e: # N_ENSYM
if parent.tag != "TAG_symbol":
print("Error: parent tag (%s) is not TAG_symbol: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
break
elif n_type == 0x3c: # N_OPT
if not hasattr(parent.dSYM, "currentCompileUnit"):
print("Error: expected N_OPT to be inside an N_SO: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
parent.dSYM.currentCompileUnit.AT_producer = n_str
elif n_type == 0x24: # N_FUN
if n_str == None:
if parent.tag != "TAG_subprogram":
print("Error: parent tag (%s) is not TAG_subprogram: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
break
else:
p = self.stringRE.match(n_str)
if p:
if p.group(2) == "F" or p.group(2) == "f":
DIE = self.makeDIE(parent, Index, "TAG_subprogram")
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
DIE.AT_type = typeDIE.typenumber
DIE.SetName(p.group(1))
if p.group(2) == "F":
DIE.AT_external = 1
if n_desc > 0:
DIE.AT_decl_line = n_desc
DIE.AT_low_pc = n_value
self.Readstabs(f, DIE)
else:
print("Error: Expected F or f for N_FUN: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_FUN format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0xc0: # N_LBRAC
DIE = self.makeDIE(parent, Index, "TAG_lexical_block")
DIE.AT_low_pc = n_value
self.Readstabs(f, DIE)
elif n_type == 0xe0: # N_RBRAC
if parent.tag != "TAG_lexical_block":
print("Error: parent tag (%s) is not TAG_lexical_block: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
parent.AT_high_pc = n_value
break
elif n_type == 0x80: # N_LSYM
p = self.stringRE.match(n_str)
if p:
if debug: dbgprint("{ N_LSYM [%d] «%s» «%s» «%s»" % (Index, p.group(1), p.group(2), p.group(3)))
DIE = None
typeDIE = None
if p.group(2) == "" or p.group(2) == "t" or p.group(2) == "T" or p.group(2) == "Tt":
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
if debug: DumpDIE(typeDIE, 0)
if p.group(2) != "":
if not hasattr(typeDIE, "AT_name"):
if debug: dbgprint("setting type «%s» name to «%s»" % (typeDIE.typenumber, p.group(1)))
typeDIE.SetName(p.group(1))
elif typeDIE.AT_name != p.group(1):
if hasattr(typeDIE, "iscrossreference"):
# This is the usual form of the message:
# Warning: New name «IORegistryEntry::ExpansionData» ≠ old name «ExpansionData»
if p.group(1) != "" and p.group(1) != " ":
if p.group(1).endswith("::" + typeDIE.AT_name):
typeDIE.SetNameWithClass(p.group(1))
else:
print("Warning: [%d] «%s» New name «%s» ≠ old name «%s»" % (Index, typeDIE.typenumber, p.group(1), typeDIE.AT_name))
elif hasattr(typeDIE, "isnamefromcrossreference"):
print("Warning: [%d] «%s» Name «%s» from cross reference is replaced by typedef name «%s»" % (Index, typeDIE.typenumber, typeDIE.AT_name, p.group(1)))
del typeDIE.isnamefromcrossreference
typeDIE.AT_name = p.group(1)
typeDIE.CheckName()
elif hasattr(typeDIE, "namesetfromtyperef"):
del typeDIE.namesetfromtyperef
typeDIE.AT_name = p.group(1)
typeDIE.CheckName()
elif hasattr(typeDIE, "namesetfromtyperef2"):
del typeDIE.namesetfromtyperef2
typeDIE.AT_name = p.group(1)
typeDIE.CheckName()
else:
if p.group(1) != "" and p.group(1) != " ":
if p.group(1).endswith("::" + typeDIE.AT_name):
typeDIE.SetNameWithClass(p.group(1))
else:
print("Error: [%d] «%s» New name «%s» ≠ old name «%s»" % (Index, typeDIE.typenumber, p.group(1), typeDIE.AT_name))
if typeDIE.tag == "TAG_typedef" and typeDIE.HasType():
attype = typeDIE.GetType()
if attype != None and attype.tag != "TAG_pointer_type" and attype.tag != "TAG_reference_type" and attype.tag != "TAG_const_type" and attype.tag != "TAG_volatile_type":
if not hasattr(attype, "AT_name"):
attype.AT_name = p.group(1)
attype.CheckName()
attype.namesetfromtyperef = 1
if hasattr(typeDIE, "createdFrom"):
for attype in typeDIE.createdFrom:
if attype != None and attype.tag != "TAG_pointer_type" and attype.tag != "TAG_reference_type" and attype.tag != "TAG_const_type" and attype.tag != "TAG_volatile_type":
if not hasattr(attype, "AT_name"):
attype.AT_name = p.group(1)
attype.CheckName()
attype.namesetfromtyperef2 = 1
if p.group(2) == "Tt":
#DIE = self.makeDIE(parent, Index, "TAG_typedef")
#DIE.SetName(p.group(1))
#DIE.AT_type = typeDIE.typenumber
DIE = typeDIE
elif p.group(2) == "t":
if typeDIE.HasType() and typeDIE.tag != "TAG_typedef" and typeDIE.tag != "TAG_base_type" and typeDIE.tag != "TAG_pointer_type" and typeDIE.tag != "TAG_reference_type" and typeDIE.tag != "TAG_const_type" and typeDIE.tag != "TAG_volatile_type" and typeDIE.tag != "TAG_array_type" and typeDIE.tag != "TAG_subroutine_type" and typeDIE.tag != "TAG_subprogram":
if hasattr(typeDIE, "iscrossreference"):
#print("Warning: [%d] «%s» typedef «%s» created from crossreference had tag «%s»" % (Index, typeDIE.typenumber, typeDIE.AT_name, typeDIE.tag))
typeDIE.tag = "TAG_typedef"
elif typeDIE.HasType():
print("Error: [%d] «%s» typedef «%s» has unexpected tag «%s»" % (Index, typeDIE.typenumber, "None" if not hasattr(typeDIE, "AT_name") else typeDIE.AT_name, typeDIE.tag))
DIE = typeDIE
elif p.group(2) == "":
DIE = self.makeDIE(parent, Index, "TAG_variable")
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
else:
DIE = typeDIE
if DIE != None:
if n_desc > 0:
DIE.AT_decl_line = n_desc
if n_value > 0:
DIE.AT_location = ("local %x" % n_value)
else:
print("Error: Expected \"\" or t or T or Tt for N_LSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
if debug: dbgprint("} N_LSYM")
else:
print("Error: Unknown N_LSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0xa0: # N_PSYM
p = self.stringRE.match(n_str)
if p:
if p.group(2) == "p":
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
#typeDIE.SetName(p.group(1))
DIE = self.makeDIE(parent, Index, "TAG_formal_parameter")
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
if n_desc > 0:
DIE.AT_decl_line = n_desc
if n_value > 0:
DIE.AT_location = ("stack %x" % n_value)
else:
print("Error: Expected p for N_PSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_PSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0x40: # N_RSYM
p = self.stringRE.match(n_str)
if p:
DIE = None
typeDIE = None
if p.group(2) == "r" or p.group(2) == "P":
if debug: dbgprint("{ N_RSYM r or P: [%d] «%s» «%s» «%s»" % (Index, p.group(1), p.group(2), p.group(3)))
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
if p.group(2) == "r":
DIE = self.makeDIE(parent, Index, "TAG_variable")
elif p.group(2) == "P":
DIE = self.makeDIE(parent, Index, "TAG_formal_parameter")
if DIE != None:
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
if n_desc > 0:
DIE.AT_decl_line = n_desc
if n_value > 0:
DIE.AT_location = ("register %x" % n_value)
if debug: dbgprint("} N_RSYM")
else:
print("Error: Expected r or P for N_RSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_RSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0x26: # N_STSYM
p = self.stringRE.match(n_str)
if p:
if p.group(2) == "S" or p.group(2) == "V":
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
#typeDIE.SetName(p.group(1))
DIE = self.makeDIE(parent, Index, "TAG_variable")
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
if n_desc > 0:
DIE.AT_decl_line = n_desc
if n_value > 0:
DIE.AT_location = ("data %x" % n_value)
else:
print("Error: Expected S or V for N_STSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_STSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0x28: # N_LCSYM
p = self.stringRE.match(n_str)
if p:
if p.group(2) == "S" or p.group(2) == "V":
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
#typeDIE.SetName(p.group(1))
DIE = self.makeDIE(parent, Index, "TAG_variable")
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
if n_desc > 0:
DIE.AT_decl_line = n_desc
if n_value > 0:
DIE.AT_location = ("BSS %x" % n_value)
else:
print("Error: Expected S or V for N_LCSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_LCSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0x20: # N_GSYM
p = self.stringRE.match(n_str)
if p:
if p.group(2) == "G":
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3))
#typeDIE.SetName(p.group(1))
DIE = self.makeDIE(parent, Index, "TAG_variable")
DIE.SetName(p.group(1))
DIE.AT_type = typeDIE.typenumber
DIE.AT_external = 1
if n_desc > 0:
DIE.AT_decl_line = n_desc
else:
print("Error: Expected G for N_GSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else:
print("Error: Unknown N_GSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
elif n_type == 0x01: # UNDF EXT
pass
elif n_type == 0x0d: # PBUD EXT
pass
elif n_type == 0x0e: # SECT
pass
elif n_type == 0x0f: # SECT EXT
pass
elif n_type == 0x12: # PEXT ABS
pass
elif n_type == 0x1e: # PEXT SECT
pass
elif n_type == 0x44: # N_SLINE # line numbers
pass
elif n_type == 0x02: # ABS
pass
elif n_type == 0x03: # ABS EXT
pass
else:
print("Error: Unknown stab type (0x%x = %s): [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (n_type, n_type_str, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
if remaining != "":
print("Error: Unparsed stuff (%s): [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (remaining, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str))
else: # if s:
#print("%s" % line)
pass
if hasattr(parent.dSYM, "popuntiltag"):
if parent.tag != parent.dSYM.popuntiltag:
print("Warning: got %s breaking until %s [%6d]" % (parent.tag, parent.dSYM.popuntiltag, Index))
else:
#print("Warning: reached tag %s [%6d]" % (parent.tag, Index))
del parent.dSYM.popuntiltag
break
def CheckDIEs(self, parent):
if hasattr(parent, "children"):
for child in parent.children:
if hasattr(child, "checkmembersize"):
del child.checkmembersize
typeDIE = child.GetBaseType()
if hasattr(typeDIE, "enumsizemax") and child.AT_byte_size < typeDIE.enumsizemax:
# set enum type size to minimum of struct member size
typeDIE.AT_byte_size = child.AT_byte_size
typeDIE.enumsizemax = child.AT_byte_size
if child.AT_byte_size < typeDIE.GetByteSize():
# assume struct members that are small than their type are actually bit fields
child.AT_data_bit_offset = child.AT_data_member_location * 8
child.AT_bit_size = child.AT_byte_size * 8
del child.AT_data_member_location
del child.AT_byte_size
self.CheckDIEs(child)
def Process_stab(self, filename):
if re.match(".*\.txt", filename):
f = open(filename, "r")
else:
f = tempfile.NamedTemporaryFile()
#print("Created temp file: %s" % f.name)
subprocess.call(["dsymutil", "-s", filename], stdout=f)
f.seek(0)
#print("Processing file: %s" % f.name)
dSYM = DIEDict()
dSYM.dSYM = dSYM
dSYM.sourceType = "stabs"
dSYM.address = 0
dSYM.currentCompileUnit = None
dSYM.DIELookup = {}
dSYM.CompileUnits = []
dSYM.filename = filename
#print(dSYM)
self.Readstabs(f, dSYM)
self.CheckDIEs(dSYM)
del dSYM.currentCompileUnit
f.close()
MakeAllVTables(dSYM)
return dSYM
#=========================================================================================
# DumpAllStructs
def MakeOffsetStr(offset):
#return "%4d" % offset
return "%6s" % ("0x%x" % offset)
# from /Library/Developer/KDKs/KDK_10.11.5_15F34.kdk/System/Library/Kernels/kernel.dSYM/Contents/Resources/Python/lldbmacros/structanalyze.py
def _showStructPacking(symbol, typename, fieldname, prefix, depth, class_depth, begin_offset=0):
classType = symbol.GetClass()
if classType == lldb.eTypeClassClass :
ctype = "class"
elif classType == lldb.eTypeClassUnion :
ctype = "union"
elif classType == lldb.eTypeClassStruct :
ctype = "struct"
else:
ctype = "_unknown_%x" % (classType or 0)
if typename == None:
typename = symbol.GetName() or "_anon_%s" % symbol.GetAddress()
if fieldname != None and fieldname != "":
outstr = "[%4d] (%s) %s %s {" % (symbol.GetByteSize(), ctype, typename, fieldname) + "\n"
else:
outstr = "[%4d] (%s) %s {" % (symbol.GetByteSize(), ctype, typename) + "\n"
if hasattr(symbol, "Friends"):
for friend in symbol.Friends:
friendType = None
friendClass = None
if friend.HasType():
friendType = friend.GetType()
elif hasattr(friend, "AT_friend"):
friendType = friend.dSYM.DIELookup[friend.AT_friend]
if friendType != None:
friendClass = friendType.GetClass()
if friendClass == lldb.eTypeClassClass :
ctype = "class"
elif friendClass == lldb.eTypeClassUnion :
ctype = "union"
elif friendClass == lldb.eTypeClassStruct :
ctype = "struct"
else:
ctype = "_unknown_%x" % (classType or 0)
outstr = outstr + prefix + " friend %s %s\n" % (ctype, friendType.GetName() or "_anon_%x" % friendType.GetAddress())
"""
Mac OS X 10.8:
0x009fcff0: TAG_structure_type
AT_name ("IOStatistics")
AT_declaration (0x01)
0x009fd4d4: TAG_friend
AT_type (0x009fcff0 "IOStatistics")
AT_data_member_location (OP_plus_uconst 0x0)
AT_accessibility (ACCESS_public)
Mac OS X 10.9:
0x00b67148: TAG_class_type
AT_name ("IOStatistics")
AT_declaration (0x01)
0x00b675ca: TAG_friend
AT_friend (0x00b67148)
"""
_compact_offset = begin_offset
max_union_member_size = 0
max_align = 1
m_align = 1
numClasses = symbol.GetNumberOfDirectBaseClasses()
for i in range(numClasses):
member = symbol.GetDirectBaseClassAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
m_size = m_type.GetByteSize()
warningstr = ""
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size))
#print(prefix, "V", membertypename, debugstr)
if _compact_offset > m_offset:
warningstr = " *** Possible memory overlap ***"
elif _compact_offset < m_offset:
align_offset = ((_compact_offset + m_align-1) & -m_align)
if align_offset != m_offset :
# ignore memory holes that may be caused by field alignment
#_has_memory_hole = True
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset)
_compact_offset = m_offset
s, compact_size, m_align = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, class_depth + 1, m_offset)
if m_align > max_align:
max_align = m_align
#print(prefix, "V", membertypename, "m_align:%d max_align:%d compact_size:%d" % (m_align, max_align, compact_size))
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n"
_compact_offset += compact_size
numFields = symbol.GetNumberOfFields()
#_has_memory_hole = False
inBitField = False
totalBits = 0
bitFieldStartByte = -1
bitFieldTotalBytes = -1
used_bits = 0
next_used_bits = 0
next_totalBits = 0
for i in range(numFields):
member = symbol.GetFieldAtIndex(i)
m_offset = member.GetOffsetInBytes() + begin_offset
m_size_bits = member.GetBitSize()
m_offset_bits = member.GetOffsetInBits()
isBitField = hasattr(member, "AT_bit_size")
m_name = member.GetName() or "" # "_anon_member_%s" % member.GetAddress()
if m_name == None:
print("Error: m_name is None")
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
membertypeclass = None
m_size = 1
if m_type != None:
membertypeclass = m_type.GetClass()
m_size = m_type.GetByteSize()
if inBitField:
# continuing previously started bit fields?
if (not isBitField) or classType == lldb.eTypeClassUnion or (m_offset >= (bitFieldStartByte + bitFieldTotalBytes)):
# no, finish previously started bit field
compactbitsbytes = 0
tmp_used_bits = used_bits
while tmp_used_bits != 0:
tmp_used_bits >>= 8
compactbitsbytes += 1
#if compactbitsbytes != bitFieldTotalBytes:
# if debug: dbgprint("*** usedbits:%x bitsbytes:%d bitFieldTotalBytes:%d ***" % (used_bits, compactbitsbytes, bitFieldTotalBytes))
if classType != lldb.eTypeClassUnion:
#_compact_offset += bitFieldTotalBytes
_compact_offset += compactbitsbytes
inBitField = False
bitFieldStartByte = m_offset
used_bits = 0
totalBits = 0
if isBitField:
if not inBitField:
# new set of bit fields started
inBitField = True
bitFieldStartByte = m_offset
bitFieldTotalBytes = m_size
used_bits = next_used_bits
totalBits = next_totalBits
next_used_bits = 0
next_totalBits = 0
if next_used_bits:
print("Error: have carry over bits but not in new bit field next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress()))
next_used_bits = 0
next_totalBits = 0
totalBits += m_size_bits
m_offset_bits += (m_offset - bitFieldStartByte) * 8
m_offset = bitFieldStartByte
# the type of a bitfield does not always mean the total size of all bit fields because you can mix sized types in a sequence of bit fields
while bitFieldTotalBytes * 8 < m_offset_bits + m_size_bits:
#print("[ bitFieldTotalBytes %d" % bitFieldTotalBytes)
bitFieldTotalBytes *= 2
#print("] bitFieldTotalBytes %d" % bitFieldTotalBytes)
if m_offset_bits >= 0:
bitfield = " : %d // %d..%d" % (m_size_bits, m_offset_bits, m_offset_bits + m_size_bits - 1)
elif m_size_bits + m_offset_bits > 0:
bitfield = " : %d // %d..%d,%d..%d" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1, 0, m_size_bits + m_offset_bits - 1)
else:
bitfield = " : %d // %d..%d,nothing" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1)
else:
bitFieldStartByte = m_offset
bitFieldTotalBytes = m_size
used_bits = 0
totalBits = 0
bitfield = ""
if next_used_bits:
print("Error: have carry over bits but not in bit field next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress()))
next_used_bits = 0
next_totalBits = 0
warningstr = ""
thebits = 0
try:
if m_offset_bits >= 0:
thebits = (~(-1 << m_size_bits)) << m_offset_bits
else:
next_totalBits = -m_offset_bits
next_used_bits = (~(-1 << next_totalBits)) << (bitFieldTotalBytes * 8 + m_offset_bits)
thebits = (~(-1 << (m_size_bits + m_offset_bits))) << 0
except:
# negative bit offset means something like bit field overlaps next member... complicated
print("Error with bits used_bits:0x%08x thebits(previous):0x%08x size:%d offset:%d type:%s at %s" % (used_bits, thebits, m_size_bits, m_offset_bits, m_offset_bits.__class__.__name__, member.GetAddress()))
thebits = 0
if ((thebits & used_bits) != 0) or m_size_bits < 0 or m_size_bits + m_offset_bits > bitFieldTotalBytes * 8:
warningstr = " *** Possible bit field error ***"
used_bits = 0
used_bits |= thebits
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_offset_bits, m_size, m_size_bits, thebits, used_bits))
if membertypeclass == lldb.eTypeClassStruct or membertypeclass == lldb.eTypeClassUnion or membertypeclass == lldb.eTypeClassClass :
s, compact_size, m_align = _showStructPacking(m_type, membertypename, m_name, prefix+" ", depth + 1, 0, m_offset)
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s
else:
outstr += prefix + ("+%s,[%4d] (%s)%s%s%s" % (MakeOffsetStr(m_offset), m_size, membertypename, "" if m_name == "" else " ", m_name, bitfield))
compact_size = m_size
m_align = 1
if m_type != None:
m_align = m_type.GetAlign()
if m_align > max_align:
max_align = m_align
#print(prefix, membertypename, m_name, "calc:0x%x actual:0x%x calcsize:%d actualsize:%d m_align:%d max_align:%d" % (_compact_offset, m_offset, compact_size, m_size, m_align, max_align))
if _compact_offset > m_offset:
warningstr = " *** Possible memory overlap (msize:%d align:%d calc:%d actual:%d) ***" % (m_size, m_align, _compact_offset, m_offset)
elif _compact_offset < m_offset:
align_offset = ((_compact_offset + m_align-1) & -m_align)
if align_offset != m_offset :
# ignore memory holes that may be caused by field alignment
#_has_memory_hole = True
if m_align == 8:
align_offset4 = ((_compact_offset + 4-1) & -4)
if align_offset4 == m_offset :
warningstr = " *** Alignment is 4 instead of 8 ***"
else:
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset)
else:
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset)
_compact_offset = m_offset
if classType == lldb.eTypeClassUnion:
if m_size > max_union_member_size:
max_union_member_size = m_size
elif not inBitField:
_compact_offset += m_size
outstr += warningstr + debugstr + "\n"
if next_used_bits:
print("Error: have carry over bits after fields next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress()))
next_used_bits = 0
next_totalBits = 0
if classType != lldb.eTypeClassUnion and inBitField:
_compact_offset += bitFieldTotalBytes
inBitField = False
if class_depth == 0 and hasattr(symbol, "AllVirtualBaseClasses"):
for virtualbaseclassinfo in symbol.AllVirtualBaseClasses:
member = virtualbaseclassinfo.member
m_type = member.GetType()
membertypename = m_type.GetName()
m_type = member.GetBaseType()
m_size = m_type.GetByteSize()
m_align = m_type.GetAlign()
m_offset = ((_compact_offset + m_align-1) & -m_align)
warningstr = " virtual"
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size))
#print(prefix, "V", membertypename, debugstr)
_compact_offset = m_offset
s, compact_size, a = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, 0, m_offset)
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n"
_compact_offset += compact_size
outstr += prefix + "}"
if classType == lldb.eTypeClassUnion:
_compact_offset += max_union_member_size
#if _has_memory_hole == True :
# outstr += " *** Warning: Struct layout leaves memory hole ***"
return outstr, _compact_offset - begin_offset, max_align
vtableFunctionRE = re.compile("(.*?) \(\)\(/\*( const)? (.*?) \*,? \*/ ?(.*)\)")
# group(1) = function return type
# group(2) = ' const'
# group(3) = class from artificial parameter
# group(4) = parameters
def doOneVTable(symbol, prefix, vtableinfo):
outstr = ""
numEntries = vtableinfo.vmax + 1
if numEntries > 0:
if vtableinfo.vPtrOffset == 0:
outstr = outstr + prefix + "vtable for %s {\n" % (symbol.GetName())
else:
outstr = outstr + prefix + "vtable for %s 0x%x {\n" % (symbol.GetName(), vtableinfo.vPtrOffset)
for i in range(numEntries):
if i in vtableinfo.mergedVTableEntries:
vtableitem = vtableinfo.mergedVTableEntries[i]
member = vtableitem.DIE
m_name = member.GetName()
membertypename = member.GetNameForType(True)
containingtype = member.GetContainingType()
if containingtype != None:
containingtypename = containingtype.GetName()
containingtypenamequalified = containingtypename+"::"
else:
containingtypename = ""
containingtypenamequalified = ""
if hasattr(vtableitem, "ThunkType"):
namemodify = vtableitem.ThunkType
else:
namemodify = ""
else:
m_name = None
m_size = symbol.compile_unit.addr_size
if m_name == None:
outstr += prefix + ("+%s,[%4d]\n" % (MakeOffsetStr(i * m_size), m_size))
else:
m = vtableFunctionRE.match(membertypename)
if m:
#print("vtablefunc", m.group(0), m.group(1), m.group(2), m.group(3), m.group(4))
if containingtypename == m.group(3):
if m.group(2) == None:
constpart = ""
else:
constpart = m.group(2)
if m.group(1) == "void":
typepart = ""
else:
typepart = m.group(1)
#outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name))
outstr += prefix + ("+%s,[%4d] %s %s%s::%s(%s)%s\n" % (MakeOffsetStr(i * m_size), m_size, typepart, namemodify, containingtypename, m_name, m.group(4), constpart))
else:
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name))
print("Error: containingtype '%s' doesn't match artifical parameter '%s'" % (containingtypename, m.group(3)))
else:
#print("unknownfunc", membertypename)
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name))
outstr += prefix + "}"
return outstr
def _showVTablePacking(symbol, prefix):
outstr = ""
if hasattr(symbol, "VTables"):
for vtableinfo in symbol.VTables:
vstr = doOneVTable(symbol, prefix, vtableinfo)
if len(vstr) > 0:
if len(outstr) > 0:
outstr += "\n\n"
outstr += "%s" % vstr
if hasattr(symbol, "VTablesVirtual"):
for vtableinfo in symbol.VTablesVirtual:
vstr = doOneVTable(symbol, prefix, vtableinfo)
if len(vstr) > 0:
if len(outstr) > 0:
outstr += "\n\n"
outstr += "%s" % vstr
return outstr
def DumpAllStructs(parent, names, level):
#print("[ Structs %d" % level)
if hasattr(parent, "children"):
for child in parent.children:
if hasattr(child, "AT_name"):
# only dump named types
#print("address %s" % member.GetAddress())
# also dump typedef'd structs too
member = child
membertypename = member.GetName()
if member.tag == "TAG_typedef":
typedepth = 0
while member.tag == "TAG_typedef" and member.HasType():
member = member.GetType() # follow typedefs
typedepth = typedepth + 1
if typedepth > 20:
print("Error: type depth is too big «%s» «%s»" % (member.typenumber, member.GetName()))
break
if member.GetName() == membertypename:
# don't do typedef if struct has same name, we'll do the struct when we get there
member = None
if member != None and member.GetClass() != None and hasattr(member, "children") and len(member.children) > 0:
if names == None or member.GetName() in names:
print("==========================================================================================")
if hasattr(child, "AT_decl_file"):
print('%s "%s"\n' % (child.GetAddress(), child.AT_decl_file))
else:
print("%s\n" % child.GetAddress())
s, n, a = _showStructPacking(member, membertypename, None, "", 0, 0, 0)
print(s)
print
s = _showVTablePacking(member, "")
if s != "":
print(s)
print
print
#if member.GetName() = "_lck_grp_": break
DumpAllStructs(child, names, level + 1)
# print("] Structs %d" % level)
#=========================================================================================
# DumpAllTypes
def DumpAllTypes(Hopper, parent):
for child in parent.children:
name = None
if hasattr(child, "AT_name"):
name = child.AT_name
attype = None
if child.HasType():
attype = child.GetType()
if child.tag == "TAG_compile_unit":
pass
elif child.tag == "TAG_variable":
pass
elif child.tag == "TAG_inheritance": # handled by TAG_class_type
pass
elif child.tag == "TAG_member": # handled by TAG_class_type, TAG_structure_type, TAG_union_type (AT_data_member_location)
pass
elif child.tag == "TAG_subprogram": # handled by TAG_class_type (AT_data_member_location)
pass
elif child.tag == "TAG_formal_parameter":
pass
elif child.tag == "TAG_subroutine_type": # handled by TAG_pointer_type
pass
elif child.tag == "TAG_subrange_type": # handled by TAG_array_type
pass
elif child.tag == "TAG_unspecified_parameters": # handled by TAG_subroutine_type
pass
elif child.tag == "TAG_enumerator": # handled by TAG_enumeration_type
pass
elif child.tag == "TAG_lexical_block":
pass
elif child.tag == "TAG_inlined_subroutine":
pass
elif child.tag == "TAG_GNU_template_parameter_pack": # template
pass
elif child.tag == "TAG_imported_declaration":
pass
elif child.tag == "TAG_imported_module": # points to TAG_namespace
pass
elif child.tag == "TAG_namespace":
pass
elif child.tag == "TAG_template_type_parameter":
pass
elif child.tag == "TAG_template_value_parameter":
pass
elif child.tag == "TAG_unspecified_type":
pass
elif child.tag == "TAG_pointer_type":
if attype != None and attype.tag == "TAG_subroutine_type":
Hopper.AddFunctionPointer(child, name, attype)
else:
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_reference_type":
if name == None:
if attype == None:
name = "&"
else:
name = child.GetType().GetName() + " &"
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_rvalue_reference_type":
# I don't know whan an rvalue_reference_type looks like so I use &_
if name == None:
if attype == None:
name = "&_"
else:
name = child.GetType().GetName() + " &_"
Hopper.AddPointer(child, name, attype)
elif child.tag == "TAG_const_type":
if name == None:
if attype == None:
name = 'const'
else:
name = 'const ' + child.GetType().GetName()
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_volatile_type":
if name == None:
if attype == None:
name = 'volatile'
else:
name = 'volatile ' + child.GetType().GetName()
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_class_type":
Hopper.AddClass(child, name, child)
# might just be a declaration AT_declaration( true )
# or it might contain children with AT_data_member_location
# Direct inheritance TAG_inheritance DirectBaseClasses
elif child.tag == "TAG_structure_type":
Hopper.AddStruct(child, name, child)
# might be a declaration - replace with define if it exists in same compileunit
elif child.tag == "TAG_union_type":
Hopper.AddStruct(child, name, child)
# might be a declaration - replace with define if it exists in same compileunit
elif child.tag == "TAG_typedef":
Hopper.AddTypedef(child, name, attype, None)
elif child.tag == "TAG_base_type":
Hopper.AddBaseType(child)
elif child.tag == "TAG_ptr_to_member_type":
if attype != None and attype.tag == "TAG_subroutine_type":
Hopper.AddPointerToMember(child, name, attype)
else:
print("Error: unexpected tag %s" % child.GetAddress())
elif child.tag == "TAG_array_type":
Hopper.AddArray(child, name, attype)
elif child.tag == "TAG_enumeration_type":
Hopper.AddEnumeration(child, name)
#bytesize
else:
print("Error: unknown tag '%s':" % child.tag)
DumpAllTypes(Hopper, child)
#=========================================================================================
def DumpDIE(DIE, level):
dumpstr = "%-30s %*s%*s" % (DIE.GetAddress(), level * 2, "", -40 + level * 2, "" if not hasattr(DIE, "tag") else DIE.tag)
for attr, value in vars(DIE).items():
if 1 == 0: pass
elif attr == "dSYM" : pass
elif attr == "address" : pass
elif attr == "typenumber" : pass
elif attr == "compile_unit" : pass
elif attr == "children" : pass
elif attr == "tag" : pass
elif attr == "AT_decl_file" : pass
elif attr == "DIELookup" or attr == "Fields" or attr == "VTables" or attr == "VTablesByOffset" or attr == "VTablesVirtual" or attr == "AllVirtualBaseClasses" or attr == "IncludedVirtualBaseClasses" or attr == "VTableEntries":
dumpstr += (" %s:[%d]" % (attr, len(getattr(DIE, attr))))
elif attr == "parent" or attr == "duplicateof" or attr == "methodClassDIE":
dumpstr += (" %s:%s" % (attr, getattr(DIE, attr).GetAddress()))
elif attr == "artificalDIE":
dumpstr += (" %s:%s:parameter:%d" % (attr, DIE.artificalDIE.parent.GetAddress(), DIE.artificalDIE.parameterNumber))
elif attr == "creates" or attr == "createdFrom":
thestr = ""
for thetype in getattr(DIE, attr):
thestr += "%s{%s}" % ("" if thestr == "" else ", ", thetype.GetAddress())
dumpstr += (" %s:[%s]" % (attr, thestr))
elif attr == "DirectBaseClasses" or attr == "VirtualBaseClasses":
thestr = ""
for theinheritance in getattr(DIE, attr):
thestr += "%s{%s}" % ("" if thestr == "" else ", ", theinheritance.GetType().GetAddress())
dumpstr += (" %s:[%s]" % (attr, thestr))
else : dumpstr += (" %s:«%s»" % (attr, value))
dbgprint(dumpstr)
if hasattr(DIE, "children"):
for child in DIE.children:
DumpDIE(child, level + 1)
#=========================================================================================
# Main
if __name__ == '__main__':
if len(sys.argv) < 1:
print('Expected usage: {0} <dsym>'.format(sys.argv[0]))
sys.exit(1)
H = Hopper()
dostab = False
for i in range(1, len(sys.argv)):
if sys.argv[i] == "-s":
dostab = True
else:
print("==========================================================================================")
print("The file: %s\n" % sys.argv[i])
if dostab == True:
stabr = stab_Reader()
dSYM = stabr.Process_stab(sys.argv[i])
else:
dSYMr = DSYM_Reader()
dSYM = dSYMr.Process_dSYM(sys.argv[i])
if debug:
print("==========================================================================================")
DumpDIE(dSYM, 0)
DumpAllStructs(dSYM, None, 0)
#••••••• TO DO: Finish DumpAllTypes
#DumpAllTypes(H, dSYM)
'''
pp = pprint.PrettyPrinter(indent=4, depth=10)
pp.pprint(H.Types)
pp.pprint(H.UUIDs)
for k,v in H.UUIDs.items():
pp.pprint(k)
for attr, value in iter(v.__dict__.items()):
print attr, value
print
'''
dostab = False
@joevt
Copy link
Author

joevt commented Mar 6, 2022

Description

This script is based on the structanalyze.py script from KDK_10.11.5_15F34.kdk. It outputs offsets of fields in structs and classes and offsets of methods in vtables. The result can help in disassembly and debugging.

How To Use

The information comes from DWARF or stabs.

DWARF

First, use dwarfdump to convert a dSYM (DWARF debug symbol file) to text. You must specify an architecture if there is more than one in the file. You can use the file command to check what architectures a dSYM file contains.

KDKs=/Library/Developer/KDKs
dwarfdump --arch=i386 "${KDKs}/KDK_10.5_9a581.kdk/mach_kernel.dSYM/Contents/Resources/DWARF/mach_kernel" > "${joedwarf}/dumps_10.5_9a581/dwarf_i386_mach_kernel.txt"

Then use the joedwarftohpt.py script to output the struct and class and vtable offsets:

python "${joedwarf}/joedwarftohpt.py" "${joedwarf}/dumps_10.5_9a581/dwarf_i386_mach_kernel.txt" > "${joedwarf}/structs_10.5_9a581/structs_i386_mach_kernel.txt"

stabs

First, use dsymutil -s to extract stabs from a binary. If the binary contains more than one architecture then use lipo to extract an architecture to a separate file. You can use the file command to check what architectures a binary contains.

KDKs=/Library/Developer/KDKs
dsymutil -s "${KDKs}/KDK_10.2.8.kdk/mach_kernel" > "${joedwarf}/dumps_10.2.8/stabs_mach_kernel.txt"

Then use the joedwarftohpt.py -s to output the struct and class and vtable offsets:

python "${joedwarf}/joedwarftohpt.py" -s "${joedwarf}/dumps_10.2.8/stabs_mach_kernel.txt" > "${joedwarf}/structs_10.2.8/structs_mach_kernel.txt"

KDKs

If you have downloaded and installed many KDKs to /Library/Developer/KDKs (such as the KDK for the last revision of each macOS version), then the following script will create commands to dump the info from the kernel files.

> Output commands to dump debug symbols of all kernels from all KDKs
# dump all the kernel symbol files to txt files 
i=100
maxpath=$(find "${KDKs}"/ -type f -path "*/DWARF/*" \( -name "mach_kernel" -o -name "kernel" \) | sed -E 's/././g' | sort -r | sed -nE '1p' | tr -d "\n" | wc -c)
((maxpath+=2))
IFS=$'\n'
for thekdk in $(ls "${KDKs}"/ | sed -E "/joedwarf/d; /KDK_(.+)\.kdk/s//\1/" | sort -V) ; do
	gotdwarf=0
	for thefile in $(find "${KDKs}"/KDK_$thekdk.kdk -type f -path "*/DWARF/*" \( -name "mach_kernel" -o -name "kernel" \) ); do
		gotdwarf=1
		subdir="$joedwarf/dumps_$thekdk$(perl -pE 's|.*?\.kdk/|/|; s|[^/]*\.dSYM/.*||' <<< "$thefile")"
		name="$(perl -pE 's|.*?\.kdk/||; s|\.dSYM/.*||; s|.*/||' <<< "$thefile")"
		mkdir -p "${subdir}"

		thearchs="$(file "${thefile}" | sed -nE '/.*for architecture ([^)]+).*/s//\1/p')"
		if [[ -z $thearchs ]]; then
			dumpout="${subdir}dwarf_${name}.txt"
			[[ -f $dumpout ]] && printf "# " || printf "  "
			printf 'echo "%d" ; dwarfdump %-13s %-*s > "%s"\n' $i "" $maxpath '"'"${thefile}"'"' "$dumpout" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
			((i++))
		else
			for thearch in $(echo $thearchs); do
				dumpout="${subdir}dwarf_${thearch}_${name}.txt"
				[[ -f $dumpout ]] && printf "# " || printf "  "
				printf 'echo "%d" ; dwarfdump %-13s %-*s > "%s"\n' $i '--arch='"$thearch" $maxpath '"'"${thefile}"'"' "$dumpout" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
				((i++))
			done
		fi
	done
	
	if ((gotdwarf == 0)); then
		for thefile in $(find "${KDKs}"/KDK_$thekdk.kdk -type f \( -name "mach_kernel" -o -name "kernel" \) ); do
			name="$(basename "$thefile")"
			subdir="$joedwarf/dumps_$thekdk$(perl -pE 's|.*?\.kdk/|/|; s|[^/]*$||' <<< "$thefile")"
			mkdir -p "${subdir}"

			thearchs="$(file "${thefile}" | sed -nE '/.*for architecture ([^)]+).*/s//\1/p')"
			if [[ -z $thearchs ]]; then
				dumpout="${subdir}stabs_${name}.txt"
				[[ -f $dumpout ]] && printf "# " || printf "  "
				printf 'echo "%d" ; dsymutil  %-13s %-*s > "%s"\n' $i "-s" $maxpath '"'"${thefile}"'"' "$dumpout" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
				((i++))
			else
				for thearch in $(echo $thearchs); do
					dumpout="${subdir}stabs_${thearch}_${name}.txt"
					archout="/tmp/${thearch}_${name}"
					[[ -f $dumpout ]] && printf "# " || printf "  "
					printf 'echo "%d" ; lipo "%s" -extract "%s" -output "%s" ; dsymutil -s "%s" > "%s"\n' $i "${thefile}" "$thearch" "$archout" "$archout" "$dumpout" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
					((i++))
				done
			fi
		done
	fi
done
> Output commands to output text files with struct/class/vtable offsets for all dumps of kernel types from all KDKs
i=200
maxpath=$(find $joedwarf -type f \( -name "stabs*_kernel.txt" -o -name "dwarf*_kernel.txt" \) | sed -E 's/././g' | sort -r | sed -nE '1p' | tr -d "\n" | wc -c)
((maxpath+=2))
IFS=$'\n'
for thekdk in $(ls "${KDKs}"/ | sed -E "/joedwarf/d; /KDK_(.+)\.kdk/s//\1/" | sort -V) ; do
	subdir="$joedwarf/structs_$thekdk/"
	mkdir -p "${subdir}"
	for thefile in $(find $joedwarf/dumps_$thekdk -type f -name "dwarf*_kernel.txt" ); do
		name="$(perl -pE 's|(/joedwarf/)dumps_|\1structs_|; s|/dwarf_?([^/]*)$|/structs_\1|' <<< "$thefile")"
		mkdir -p "$(dirname "$name")"
		[[ -f $name ]] && printf "# " || printf "  "
		printf 'echo "%d" ; python "%s"    %-*s > "%s"\n' $i "$joedwarf/joedwarftohpt.py" $maxpath '"'"${thefile}"'"' "${name}" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
		((i++))
	done
	for thefile in $(find $joedwarf/dumps_$thekdk -type f -name "stabs*_kernel.txt" ); do
		name="$(perl -pE 's|(/joedwarf/)dumps_|\1structs_|; s|/stabs_?([^/]*)$|/structs_\1|' <<< "$thefile")"
		mkdir -p "$(dirname "$name")"
		[[ -f $name ]] && printf "# " || printf "  "
		printf 'echo "%d" ; python "%s" -s %-*s > "%s"\n' $i "$joedwarf/joedwarftohpt.py" $maxpath '"'"${thefile}"'"' "${name}" | perl -pe 's|/Volumes/Work/Programming/XcodeProjects/joedwarf/|\$\{joedwarf\}/|g'
		((i++))
	done
done

Here's a script to list all offsets of certain fields in a specific struct.

> Search for fields in struct vm_map_entry
# search for fields in vm_map_entry

IFS=$'\n'
for thevar in max_protection next start end; do
	for thefile in $(find $joedwarf -name 'structs_*kernel*' | sort -V); do
		# echo "# $thefile"
		perl -ne '
			if (/^\[.* vm_map_entry \{/ .. /^}/ ) {
				if (/(\+ +(0x\w+),\[ *(\d+)\] \(.*\) ('"$thevar"').*)\n/) {
					print $1 . " # '"$thefile"'\n";
				}
			}
		' \
		"$thefile" | sort -u
	done
done

Notes

  • Mac OS X KDKs up to 10.4.11 contains stabs.
  • Mac OS X 10.5 and later KDKs have dSYM files containing DWARF information.
  • The python script converts stabs to DWARF. More accurately, it creates a DWARF dictionary from stabs strings. It does not output the DWARF dictionary as a DWARF binary. It handles enough of stabs to deal with mach_kernel binaries in the Mac OS X KDKs.
  • Bit offsets for bit fields in KDKs from 10.5 to 10.13 may be counting from MSB instead of LSB so you need to look at the code to see what bits are actually used for a bit field. An incorrect interpretation of the bit ordering by the script may cause it to incorrectly state *** Possible memory overlap *** for nearby fields.
  • There are other bit field issues such as for 128-bit bit fields being used as function pointers.
  • Some classes (such as IOUserClient in the x86_64 kernel of 10.7.5_11g56) have duplicate VTable entry locations for some virtual functions.
  • Change debug = 0 to debug = 1 to see log messages in the output and to see the generated DWARF dictionary that is used to produce the output.

To Do

  • Remove duplicate output. In a kernel dSYM, many types maybe repeated more than 100 times. A simple recursive compare should be able to solve this.
  • Finish reverse engineering the Hopper export file. Finish the code that converts the DWARF dictionary into a Hopper export file so it can be imported by Hopper. Hopper.app currently has very poor type import capabilities. The "Import Types from C-like Header File..." option is undocumented and virtually unusable, and the "Read Debug Symbols File..." option does not read types.
  • Maybe use a format that can be used by a C compiler.

@jquirke
Copy link

jquirke commented Oct 30, 2022

Very useful script indeed.

I made a couple changes here to support python3 (s/xrange/range/)
https://gist.github.com/jquirke/132d18b7bba9e3f96368bbc43230b755

@joevt
Copy link
Author

joevt commented Oct 31, 2022

Thanks @jquirke . I also changed d.iteritems() to iter(d.items()) as described at https://peps.python.org/pep-0469/

@joevt
Copy link
Author

joevt commented May 18, 2023

  • Added support for stabs so that KDKs for Mac OS X 10.2.6 to 10.4.11 can be processed.
  • Fixed some issues with arrays and unions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment