-
-
Save abrasive/b7f28ed63f90a91cf402049e9425d78a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# A crude parser for Artemis SAM hives | |
# Currently effective on legacyconfigcardprogrammerapp.hive.xml | |
# | |
# Plonk it in a folder with as many .hive.xml as you can scrape from your | |
# installation and supply the target hive as argument. | |
# Types and methods are referred to in code by a single byte. | |
# A 0-indexed table is referred to for each, containing first all the | |
# typerefs and then all the typedefs (similarly for methrefs/methdefs). | |
# So if there are 5 typerefs then type 07 is the third entry in the typedefs. | |
import lxml.etree | |
from base64 import b64decode | |
import struct | |
from binascii import hexlify | |
from StringIO import StringIO | |
def ntohs(data): | |
return struct.unpack('>H', data)[0] | |
def ntohl(data): | |
return struct.unpack('>L', data)[0] | |
def le16(data): | |
return struct.unpack('<H', data)[0] | |
def le24(data): | |
return struct.unpack('<L', data + '\0')[0] | |
def le32(data): | |
return struct.unpack('<L', data)[0] | |
class Namer(object): | |
def __init__(self): | |
self.module_names = { | |
0x5424ba: 'mscorlib', | |
} | |
self.type_names = {} | |
self.method_names = {} | |
# XXX TODO: each type/method tag ties all the way back to a module. type 'em strong | |
def module(self, modtag): | |
tag = modtag & 0xffffff | |
return self.module_names.get(tag, 'm%06x' % tag) | |
def type(self, typetag): | |
return self.type_names.get(typetag, 't%04x' % typetag) | |
def method(self, typetag, methtag): | |
tag = typetag<<16 | methtag | |
return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag)) | |
def parse_debug(self, debug_data): | |
data = StringIO(debug_data) | |
def take_tlv(data, tagsize): | |
tag = data.read(tagsize) | |
length = ntohs(data.read(2)) | |
value = data.read(length) | |
return tag, value | |
nmodules = ntohs(data.read(2)) | |
for i in range(nmodules): | |
tag, name = take_tlv(data, 4) | |
self.module_names[ntohl(tag)] = name | |
ntypes = ntohs(data.read(2)) | |
for i in range(ntypes): | |
tag, name = take_tlv(data, 6) | |
modtag = ntohl(tag[:4]) | |
modname = self.module_names[modtag] | |
typetag = ntohs(tag[4:]) | |
self.type_names[typetag] = modname + "::" + name | |
nmethods = ntohs(data.read(2)) | |
for i in range(nmethods): | |
tag, name = take_tlv(data, 4) | |
typetag = ntohs(tag[:2]) | |
typename = self.type_names[typetag] | |
methtag = ntohs(tag[2:]) | |
tag = typetag<<16 | methtag | |
self.method_names[tag] = typename + "." + name | |
if len(data.read()): | |
print "Trailing data in debug stream:", hexlify(data) | |
def dump(self): | |
print "Modules:" | |
for tag, name in sorted(self.module_names.iteritems()): | |
print " %06x %s" % (tag, name) | |
print "Types:" | |
for tag, name in sorted(self.type_names.iteritems()): | |
print " %04x %s" % (tag, name) | |
print "Methods:" | |
for tag, name in sorted(self.method_names.iteritems()): | |
print " %04x.%04x %s" % (tag>>16, tag&0xffff, name) | |
class Hive(object): | |
def __init__(self, filename, namer=None, skip_body=False): | |
fp = open(filename) | |
self.xml = lxml.etree.parse(fp) | |
self.assembly = self.xml.getroot().find('Assembly') | |
self.name = self.assembly.get('Name') | |
self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text)) | |
self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text)) | |
if namer is None: | |
namer = Namer() | |
self.namer = namer | |
self.namer.parse_debug(self.debug_data) | |
if not skip_body: | |
self.parse_hive() | |
def parse_hive(self): | |
data = self.hive_data[8:] # skip length and HIVE | |
num_mod_deps = le16(data[0x19:0x1b]) | |
num_mod_refs = le16(data[0x1b:0x1d]) | |
unk_len = le16(data[0x2d:0x2f]) | |
body = StringIO(data[0x2f:]) | |
self.dependencies = [] | |
for i in range(num_mod_deps): | |
tag = le32(body.read(4)) | |
flags = body.read(8) | |
self.dependencies.append((tag, flags)) | |
m_typerefs = [] | |
m_typedefs = [] | |
active = m_typerefs | |
first_module = None | |
for i in range(num_mod_refs): | |
module = le24(body.read(3)) | |
count = ord(body.read(1)) | |
if first_module is None: | |
first_module = module | |
elif module == first_module: # can't find a count/pointer for this changeover | |
active = m_typedefs | |
active.append((module, count)) | |
self.types = [] | |
self.methods = [] | |
typeref_meths = [] | |
for module, count in m_typerefs: | |
for i in range(count): | |
typetag = le16(body.read(2)) | |
nmethods = le16(body.read(2)) | |
typeref_meths.append((typetag, nmethods)) | |
self.types.append(typetag) | |
methrefs = [] | |
for typetag, nmethods in typeref_meths: | |
for i in range(nmethods): | |
methtag = le16(body.read(2)) | |
methrefs.append((typetag, methtag)) | |
self.methods.append((typetag, methtag)) | |
# XXX string tables, or what? | |
unknown = body.read(unk_len) | |
# type defs | |
typedefs = [] | |
for module, count in m_typedefs: | |
for i in range(count): | |
typetag = le16(body.read(2)) | |
basetype = ord(body.read(1)) | |
subtype = ord(body.read(1)) | |
nfields = ord(body.read(1)) | |
nmethods = ord(body.read(1)) | |
flags = le16(body.read(2)) | |
fields = [] | |
for j in range(nfields): | |
fields.append(body.read(6)) | |
self.types.append(typetag) | |
typedefs.append((typetag, nmethods)) | |
# method defs | |
methdefs = [] | |
for typetag, nmethods in typedefs: | |
for i in range(nmethods): | |
methtag = le16(body.read(2)) | |
argflags = ord(body.read(1)) | |
nargs = argflags & 7 | |
flags = ord(body.read(1)) | |
ret_type = ord(body.read(1)) | |
defs = [] | |
prebytes = 0 | |
if argflags & 0x80: | |
nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5)) | |
for j in range(ndefs): | |
defs.append(body.read(7)) | |
if argflags & 0x40: | |
# XXX this is a hack and probably wrong | |
prebytes += 4 | |
if flags & 0x80: | |
methtype = 'pointer' # just points to a module with same named/tagged method (4 byte full tag) | |
else: | |
methtype = 'actual' | |
arg_types = body.read(nargs) | |
methdefs.append([typetag, methtag, methtype, prebytes]) | |
self.methods.append((typetag, methtag)) | |
# method bodies | |
for methdef in methdefs: | |
typetag, methtag, methtype, prebytes = methdef | |
if methtype == 'pointer': | |
methdef.append(le32(body.read(4))) | |
continue | |
pre = body.read(prebytes) | |
length = le16(body.read(2)) | |
methdef.append(body.read(length)) | |
print "%d prebytes" % prebytes | |
print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1])) | |
def dump_hive(self): | |
print "Dependencies:" | |
for tag, flags in self.dependencies: | |
print " %s\t%s" % (self.namer.module(tag), hexlify(flags)) | |
def dump_tables(self): | |
print "Types:" | |
for i, typetag in enumerate(self.types): | |
print " %02X %s" % (i, self.namer.type(typetag)) | |
print "Methods:" | |
for i, (typetag, methtag) in enumerate(self.methods): | |
print " %02X %s" % (i, self.namer.method(typetag, methtag)) | |
def dump(self): | |
print "Assembly: %s" % self.name | |
self.dump_hive() | |
self.dump_tables() | |
if __name__ == "__main__": | |
import sys | |
import glob | |
namer = Namer() | |
for ff in glob.glob('*.hive.xml'): | |
hive = Hive(ff, namer=namer, skip_body=True) | |
hive = Hive(sys.argv[1], namer=namer) | |
hive.dump() | |
# hive.namer.dump() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment