Skip to content

Instantly share code, notes, and snippets.

@abrasive
Last active April 5, 2019 00:12
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save abrasive/b7f28ed63f90a91cf402049e9425d78a to your computer and use it in GitHub Desktop.
Save abrasive/b7f28ed63f90a91cf402049e9425d78a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# A crude parser for Artemis SAM hives
# Currently effective on legacyconfigcardprogrammerapp.hive.xml
#
# Plonk it in a folder with as many .hive.xml as you can scrape from your
# installation and supply the target hive as argument.
# Types and methods are referred to in code by a single byte.
# A 0-indexed table is referred to for each, containing first all the
# typerefs and then all the typedefs (similarly for methrefs/methdefs).
# So if there are 5 typerefs then type 07 is the third entry in the typedefs.
import lxml.etree
from base64 import b64decode
import struct
from binascii import hexlify
from StringIO import StringIO
def ntohs(data):
return struct.unpack('>H', data)[0]
def ntohl(data):
return struct.unpack('>L', data)[0]
def le16(data):
return struct.unpack('<H', data)[0]
def le24(data):
return struct.unpack('<L', data + '\0')[0]
def le32(data):
return struct.unpack('<L', data)[0]
class Namer(object):
def __init__(self):
self.module_names = {
0x5424ba: 'mscorlib',
}
self.type_names = {}
self.method_names = {}
# XXX TODO: each type/method tag ties all the way back to a module. type 'em strong
def module(self, modtag):
tag = modtag & 0xffffff
return self.module_names.get(tag, 'm%06x' % tag)
def type(self, typetag):
return self.type_names.get(typetag, 't%04x' % typetag)
def method(self, typetag, methtag):
tag = typetag<<16 | methtag
return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag))
def parse_debug(self, debug_data):
data = StringIO(debug_data)
def take_tlv(data, tagsize):
tag = data.read(tagsize)
length = ntohs(data.read(2))
value = data.read(length)
return tag, value
nmodules = ntohs(data.read(2))
for i in range(nmodules):
tag, name = take_tlv(data, 4)
self.module_names[ntohl(tag)] = name
ntypes = ntohs(data.read(2))
for i in range(ntypes):
tag, name = take_tlv(data, 6)
modtag = ntohl(tag[:4])
modname = self.module_names[modtag]
typetag = ntohs(tag[4:])
self.type_names[typetag] = modname + "::" + name
nmethods = ntohs(data.read(2))
for i in range(nmethods):
tag, name = take_tlv(data, 4)
typetag = ntohs(tag[:2])
typename = self.type_names[typetag]
methtag = ntohs(tag[2:])
tag = typetag<<16 | methtag
self.method_names[tag] = typename + "." + name
if len(data.read()):
print "Trailing data in debug stream:", hexlify(data)
def dump(self):
print "Modules:"
for tag, name in sorted(self.module_names.iteritems()):
print " %06x %s" % (tag, name)
print "Types:"
for tag, name in sorted(self.type_names.iteritems()):
print " %04x %s" % (tag, name)
print "Methods:"
for tag, name in sorted(self.method_names.iteritems()):
print " %04x.%04x %s" % (tag>>16, tag&0xffff, name)
class Hive(object):
def __init__(self, filename, namer=None, skip_body=False):
fp = open(filename)
self.xml = lxml.etree.parse(fp)
self.assembly = self.xml.getroot().find('Assembly')
self.name = self.assembly.get('Name')
self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text))
self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text))
if namer is None:
namer = Namer()
self.namer = namer
self.namer.parse_debug(self.debug_data)
if not skip_body:
self.parse_hive()
def parse_hive(self):
data = self.hive_data[8:] # skip length and HIVE
num_mod_deps = le16(data[0x19:0x1b])
num_mod_refs = le16(data[0x1b:0x1d])
unk_len = le16(data[0x2d:0x2f])
body = StringIO(data[0x2f:])
self.dependencies = []
for i in range(num_mod_deps):
tag = le32(body.read(4))
flags = body.read(8)
self.dependencies.append((tag, flags))
m_typerefs = []
m_typedefs = []
active = m_typerefs
first_module = None
for i in range(num_mod_refs):
module = le24(body.read(3))
count = ord(body.read(1))
if first_module is None:
first_module = module
elif module == first_module: # can't find a count/pointer for this changeover
active = m_typedefs
active.append((module, count))
self.types = []
self.methods = []
typeref_meths = []
for module, count in m_typerefs:
for i in range(count):
typetag = le16(body.read(2))
nmethods = le16(body.read(2))
typeref_meths.append((typetag, nmethods))
self.types.append(typetag)
methrefs = []
for typetag, nmethods in typeref_meths:
for i in range(nmethods):
methtag = le16(body.read(2))
methrefs.append((typetag, methtag))
self.methods.append((typetag, methtag))
# XXX string tables, or what?
unknown = body.read(unk_len)
# type defs
typedefs = []
for module, count in m_typedefs:
for i in range(count):
typetag = le16(body.read(2))
basetype = ord(body.read(1))
subtype = ord(body.read(1))
nfields = ord(body.read(1))
nmethods = ord(body.read(1))
flags = le16(body.read(2))
fields = []
for j in range(nfields):
fields.append(body.read(6))
self.types.append(typetag)
typedefs.append((typetag, nmethods))
# method defs
methdefs = []
for typetag, nmethods in typedefs:
for i in range(nmethods):
methtag = le16(body.read(2))
argflags = ord(body.read(1))
nargs = argflags & 7
flags = ord(body.read(1))
ret_type = ord(body.read(1))
defs = []
prebytes = 0
if argflags & 0x80:
nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5))
for j in range(ndefs):
defs.append(body.read(7))
if argflags & 0x40:
# XXX this is a hack and probably wrong
prebytes += 4
if flags & 0x80:
methtype = 'pointer' # just points to a module with same named/tagged method (4 byte full tag)
else:
methtype = 'actual'
arg_types = body.read(nargs)
methdefs.append([typetag, methtag, methtype, prebytes])
self.methods.append((typetag, methtag))
# method bodies
for methdef in methdefs:
typetag, methtag, methtype, prebytes = methdef
if methtype == 'pointer':
methdef.append(le32(body.read(4)))
continue
pre = body.read(prebytes)
length = le16(body.read(2))
methdef.append(body.read(length))
print "%d prebytes" % prebytes
print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1]))
def dump_hive(self):
print "Dependencies:"
for tag, flags in self.dependencies:
print " %s\t%s" % (self.namer.module(tag), hexlify(flags))
def dump_tables(self):
print "Types:"
for i, typetag in enumerate(self.types):
print " %02X %s" % (i, self.namer.type(typetag))
print "Methods:"
for i, (typetag, methtag) in enumerate(self.methods):
print " %02X %s" % (i, self.namer.method(typetag, methtag))
def dump(self):
print "Assembly: %s" % self.name
self.dump_hive()
self.dump_tables()
if __name__ == "__main__":
import sys
import glob
namer = Namer()
for ff in glob.glob('*.hive.xml'):
hive = Hive(ff, namer=namer, skip_body=True)
hive = Hive(sys.argv[1], namer=namer)
hive.dump()
# hive.namer.dump()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment