Skip to content

Instantly share code, notes, and snippets.

@abrasive
Created October 5, 2016 04:41
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save abrasive/d996607c036fa779ce114827865a2078 to your computer and use it in GitHub Desktop.
Save abrasive/d996607c036fa779ce114827865a2078 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# A crude parser for Artemis SAM hives
# Currently effective on legacyconfigcardprogrammerapp.hive.xml
#
# Plonk it in a folder with as many .hive.xml as you can scrape from your
# installation and supply the target hive as argument.
# Types and methods are referred to in code by a single byte.
# A 0-indexed table is referred to for each, containing first all the
# typerefs and then all the typedefs (similarly for methrefs/methdefs).
# So if there are 5 typerefs then type 07 is the third entry in the typedefs.
import lxml.etree
from base64 import b64decode
import struct
from binascii import hexlify
from StringIO import StringIO
def ntohs(data):
return struct.unpack('>H', data)[0]
def ntohl(data):
return struct.unpack('>L', data)[0]
def le16(data):
return struct.unpack('<H', data)[0]
def le24(data):
return struct.unpack('<L', data + '\0')[0]
def le32(data):
return struct.unpack('<L', data)[0]
class Namer(object):
def __init__(self):
self.module_names = {
0x5424ba: 'mscorlib',
}
self.type_names = {}
self.method_names = {}
# XXX TODO: each type/method tag ties all the way back to a module. type 'em strong
def module(self, modtag):
tag = modtag & 0xffffff
return self.module_names.get(tag, 'm%06x' % tag)
def type(self, typetag):
return self.type_names.get(typetag, 't%04x' % typetag)
def method(self, typetag, methtag):
tag = typetag<<16 | methtag
return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag))
def parse_debug(self, debug_data):
data = StringIO(debug_data)
def take_tlv(data, tagsize):
tag = data.read(tagsize)
length = ntohs(data.read(2))
value = data.read(length)
return tag, value
nmodules = ntohs(data.read(2))
for i in range(nmodules):
tag, name = take_tlv(data, 4)
self.module_names[ntohl(tag)] = name
ntypes = ntohs(data.read(2))
for i in range(ntypes):
tag, name = take_tlv(data, 6)
modtag = ntohl(tag[:4])
modname = self.module_names[modtag]
typetag = ntohs(tag[4:])
self.type_names[typetag] = modname + "::" + name
nmethods = ntohs(data.read(2))
for i in range(nmethods):
tag, name = take_tlv(data, 4)
typetag = ntohs(tag[:2])
typename = self.type_names[typetag]
methtag = ntohs(tag[2:])
tag = typetag<<16 | methtag
self.method_names[tag] = typename + "." + name
if len(data.read()):
print "Trailing data in debug stream:", hexlify(data)
def dump(self):
print "Modules:"
for tag, name in sorted(self.module_names.iteritems()):
print " %06x %s" % (tag, name)
print "Types:"
for tag, name in sorted(self.type_names.iteritems()):
print " %04x %s" % (tag, name)
print "Methods:"
for tag, name in sorted(self.method_names.iteritems()):
print " %04x.%04x %s" % (tag>>16, tag&0xffff, name)
class Hive(object):
def __init__(self, filename, namer=None, skip_body=False):
fp = open(filename)
self.xml = lxml.etree.parse(fp)
self.assembly = self.xml.getroot().find('Assembly')
self.name = self.assembly.get('Name')
self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text))
self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text))
if namer is None:
namer = Namer()
self.namer = namer
self.namer.parse_debug(self.debug_data)
if not skip_body:
self.parse_hive()
def parse_hive(self):
data = self.hive_data[8:] # skip length and HIVE
unk_dat = le16(data[0x17:0x19])
num_mod_deps = le16(data[0x19:0x1b])
num_mod_refs = le16(data[0x1b:0x1d])
body = StringIO(data[0x2d:])
unk_len0 = le16(body.read(2))
unk_len = unk_len0
if unk_dat != 0xffff: # not sure if this is the correct trigger
unk_len1 = le16(body.read(2))
unk_len2 = le16(body.read(2))
unk_len += 2*unk_len1 + unk_len2
self.dependencies = []
for i in range(num_mod_deps):
tag = le32(body.read(4))
flags = body.read(8)
self.dependencies.append((tag, flags))
m_typerefs = []
m_typedefs = []
active = m_typerefs
first_module = None
for i in range(num_mod_refs):
module = le24(body.read(3))
count = ord(body.read(1))
if first_module is None:
first_module = module
elif module == first_module: # can't find a count/pointer for this changeover
active = m_typedefs
active.append((module, count))
self.types = []
self.methods = []
typeref_meths = []
for module, count in m_typerefs:
for i in range(count):
typetag = le16(body.read(2))
nmethods = ord(body.read(1))
nother = ord(body.read(1))
typeref_meths.append((typetag, nmethods, nother))
self.types.append(typetag)
methrefs = []
for typetag, nmethods, nother in typeref_meths:
for i in range(nmethods):
methtag = le16(body.read(2))
methrefs.append((typetag, methtag))
self.methods.append((typetag, methtag))
for typetag, nmethods, nother in typeref_meths:
for i in range(nother):
methtag = le16(body.read(2))
methrefs.append((typetag, methtag))
self.methods.append((typetag, methtag))
# XXX string tables, or what?
unknown = body.read(unk_len)
# type defs
typedefs = []
for module, count in m_typedefs:
for i in range(count):
typetag = le16(body.read(2))
basetype = ord(body.read(1))
subtype = ord(body.read(1))
nfields = ord(body.read(1))
nmethods = ord(body.read(1))
flags = le16(body.read(2))
fields = []
for j in range(nfields):
fields.append(body.read(6))
self.types.append(typetag)
typedefs.append((typetag, nmethods))
# method defs
methdefs = []
for typetag, nmethods in typedefs:
for i in range(nmethods):
methtag = le16(body.read(2))
argflags = ord(body.read(1))
nargs = argflags & 7
flags = ord(body.read(1))
ret_type = ord(body.read(1))
defs = []
prebytes = 0
if argflags & 0x80:
nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5))
for j in range(ndefs):
defs.append(body.read(7))
else:
prebytes = argflags >> 4
if flags & 0x80:
methtype = 'pointer' # just points to a module with same named/tagged method (4 byte full tag)
else:
methtype = 'actual'
arg_types = body.read(nargs)
methdefs.append([typetag, methtag, methtype, prebytes])
self.methods.append((typetag, methtag))
# method bodies
for methdef in methdefs:
typetag, methtag, methtype, prebytes = methdef
if methtype == 'pointer':
methdef.append(le32(body.read(4)))
continue
pre = body.read(prebytes)
length = le16(body.read(2))
methdef.append(body.read(length))
print "%d prebytes: %s" % (prebytes, hexlify(pre))
print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1]))
for insn in self.disassemble(methdef[-1]):
strs = []
for elem in insn:
if isinstance(elem, int):
strs.append('0x%x' % elem)
else:
strs.append(str(elem))
print "\t".join(strs)
def disassemble(self, methbody):
data = StringIO(methbody)
insns = []
while data.tell() < len(methbody):
i_addr = data.tell()
insns.append(self.disas_one(data))
i_end = data.tell()
i_bytes = data.getvalue()[i_addr:i_end]
insns[-1] = ['0x%02x' % i_addr, hexlify(i_bytes)] + list(insns[-1])
return insns
def disas_one(self, data):
no_arg = {
0x77: 'inc', # Artemis custom: add 1 to top of stack
0x00: 'nop',
0x01: 'break',
0x02: 'ldarg.0',
0x03: 'ldarg.1',
0x04: 'ldarg.2',
0x05: 'ldarg.3',
0x06: 'ldloc.0',
0x07: 'ldloc.1',
0x08: 'ldloc.2',
0x09: 'ldloc.3',
0x0a: 'stloc.0',
0x0b: 'stloc.1',
0x0c: 'stloc.2',
0x0d: 'stloc.3',
0x14: 'ldnull',
0x15: 'ldc.i4.m1',
0x16: 'ldc.i4.0',
0x17: 'ldc.i4.1',
0x18: 'ldc.i4.2',
0x19: 'ldc.i4.3',
0x1a: 'ldc.i4.4',
0x1b: 'ldc.i4.5',
0x1c: 'ldc.i4.6',
0x1d: 'ldc.i4.7',
0x1e: 'ldc.i4.8',
0x25: 'dup',
0x26: 'pop',
0x2a: 'ret',
0x46: 'ldind.i1',
0x47: 'ldind.u1',
0x48: 'ldind.i2',
0x49: 'ldind.u2',
0x4a: 'ldind.i4',
0x4b: 'ldind.u4',
0x4c: 'ldind.i8',
0x4d: 'ldind.i',
0x4e: 'ldind.r4',
0x4f: 'ldind.r8',
0x50: 'ldind.ref',
0x51: 'stind.ref',
0x52: 'stind.i1',
0x53: 'stind.i2',
0x54: 'stind.i4',
0x55: 'stind.i8',
0x56: 'stind.r4',
0x57: 'stind.r8',
0x58: 'add',
0x59: 'sub',
0x5a: 'mul',
0x5b: 'div',
0x5c: 'div.un',
0x5d: 'rem',
0x5e: 'rem.un',
0x5f: 'and',
0x60: 'or',
0x61: 'xor',
0x62: 'shl',
0x63: 'shr',
0x64: 'shr.un',
0x65: 'neg',
0x66: 'not',
0x67: 'conv.i1',
0x68: 'conv.i2',
0x69: 'conv.i4',
0x6a: 'conv.i8',
0x6b: 'conv.r4',
0x6c: 'conv.r8',
0x6d: 'conv.u4',
0x6e: 'conv.u8',
0x76: 'conv.r.un',
0x7a: 'throw',
0x82: 'conv.ovf.i1.un',
0x83: 'conv.ovf.i2.un',
0x84: 'conv.ovf.i4.un',
0x85: 'conv.ovf.i8.un',
0x86: 'conv.ovf.u1.un',
0x87: 'conv.ovf.u2.un',
0x88: 'conv.ovf.u4.un',
0x89: 'conv.ovf.u8.un',
0x8a: 'conv.ovf.i.un',
0x8b: 'conv.ovf.u.un',
0x8e: 'ldlen',
0x90: 'ldelem.i1',
0x91: 'ldelem.u1',
0x92: 'ldelem.i2',
0x93: 'ldelem.u2',
0x94: 'ldelem.i4',
0x95: 'ldelem.u4',
0x96: 'ldelem.i8',
0x97: 'ldelem.i',
0x98: 'ldelem.r4',
0x99: 'ldelem.r8',
0x9a: 'ldelem.ref',
0x9b: 'stelem.i',
0x9c: 'stelem.i1',
0x9d: 'stelem.i2',
0x9e: 'stelem.i4',
0x9f: 'stelem.i8',
0xa0: 'stelem.r4',
0xa1: 'stelem.r8',
0xa2: 'stelem.ref',
0xb3: 'conv.ovf.i1',
0xb4: 'conv.ovf.u1',
0xb5: 'conv.ovf.i2',
0xb6: 'conv.ovf.u2',
0xb7: 'conv.ovf.i4',
0xb8: 'conv.ovf.u4',
0xb9: 'conv.ovf.i8',
0xba: 'conv.ovf.u8',
0xc3: 'ckfinite',
0xd1: 'conv.u2',
0xd2: 'conv.u1',
0xd3: 'conv.i',
0xd4: 'conv.ovf.i',
0xd5: 'conv.ovf.u',
0xd6: 'add.ovf',
0xd7: 'add.ovf.un',
0xd8: 'mul.ovf',
0xd9: 'mul.ovf.un',
0xda: 'sub.ovf',
0xdb: 'sub.ovf.un',
0xdc: 'endfinally',
0xdf: 'stind.i',
0xe0: 'conv.u',
0xfe01: 'ceq',
0xfe02: 'cgt',
0xfe03: 'cgt.un',
0xfe04: 'clt',
0xfe05: 'clt.un',
0xfe1a: 'rethrow',
}
uint8_arg = {
0x0e: 'ldarg.s',
0x0f: 'ldarga.s',
0x10: 'starg.s',
0x11: 'ldloc.s',
0x12: 'ldloca.s',
0x13: 'stloc.s',
0xde: 'leave.s',
}
type_arg = {
0x8d: 'newarr',
0xfe15: 'initobj',
0xfe16: 'constrained.',
0x74: 'castclass',
0x70: 'cpobj',
0x71: 'ldobj',
0x81: 'stobj',
0x8c: 'box',
0x79: 'unbox',
}
field_arg = {
0x7b: 'ldfld',
0x7c: 'ldflda',
0x7d: 'stfld',
0x7e: 'ldsfld',
0x7f: 'ldsflda',
0x80: 'stsfld',
}
branch_short = {
0x2b: 'br.s',
0x2c: 'brfalse.s',
0x2d: 'brtrue.s',
0x2e: 'beq.s',
0x2f: 'bge.s',
0x30: 'bgt.s',
0x31: 'ble.s',
0x32: 'blt.s',
0x33: 'bne.un.s',
0x34: 'bge.un.s',
0x35: 'bgt.un.s',
0x36: 'ble.un.s',
0x37: 'blt.un.s',
}
branch_long = {
0x38: 'br',
0x39: 'brfalse',
0x3a: 'brtrue',
0x3b: 'beq',
0x3c: 'bge',
0x3d: 'bgt',
0x3e: 'ble',
0x3f: 'blt',
0x40: 'bne.un',
0x41: 'bge.un',
0x42: 'bgt.un',
0x43: 'ble.un',
0x44: 'blt.un',
}
unhandled = {
0x21: 'ldc.i8',
0x23: 'ldc.r8',
0x27: 'jmp',
0x28: 'call',
0x29: 'calli',
0x45: 'switch',
0x6f: 'callvirt',
0x72: 'ldstr',
0x75: 'isinst',
0x8f: 'ldelema',
0xc2: 'refanyval',
0xc6: 'mkrefany',
0xd0: 'ldtoken',
0xfe00: 'arglist',
0xfe07: 'ldvirtftn',
0xfe09: 'ldarg',
0xfe0a: 'ldarga',
0xfe0b: 'starg',
0xfe0c: 'ldloc',
0xfe0d: 'ldloca',
0xfe0e: 'stloc',
0xfe0f: 'localloc',
0xfe11: 'endfilter',
0xfe12: 'unaligned.',
0xfe13: 'volatile.',
0xfe14: 'tail.',
0xfe17: 'cpblk',
0xfe18: 'initblk',
0xfe1c: 'sizeof',
0xfe1d: 'refanytype',
}
call = {
0x28: 'call',
0x6f: 'callvirt',
0x73: 'newobj',
0xfe06: 'ldftn', # really? I'm not so sure
}
artemis_unknown = set([
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xbb, 0xbd, 0xb0, 0xbe, 0xb1, 0xbc, 0xaf, 0xfd, 0xff
])
opcode = ord(data.read(1))
if opcode == 0xfe:
opcode <<= 8
opcode |= ord(data.read(1))
if opcode in no_arg:
return no_arg[opcode],
if opcode in uint8_arg:
arg = ord(data.read(1))
return uint8_arg[opcode], arg
if opcode == 0xdd:
arg = le32(data.read(4))
return 'leave', arg
if opcode in type_arg:
arg = ord(data.read(1))
return type_arg[opcode], self.namer.type(self.types[arg])
if opcode in field_arg:
arg = ord(data.read(1))
return field_arg[opcode], arg
if opcode in branch_short:
arg = struct.unpack('b', data.read(1))[0]
loc = data.tell()
return branch_short[opcode], loc + arg
if opcode in branch_long:
arg = struct.unpack('<l', data.read(4))[0]
loc = data.tell()
return branch_long[opcode], loc + arg
if opcode in call:
method_id = ord(data.read(1))
return call[opcode], self.namer.method(*self.methods[method_id])
if opcode == 0x1f:
arg = struct.unpack('b', data.read(1))[0]
return 'ldc.i4.s', arg
if opcode == 0x20:
arg = struct.unpack('<l', data.read(4))[0]
return 'ldc.i4', arg
# this is *definitely* not ldc.r4
if opcode == 0x22:
# arg = struct.unpack('<f', data.read(4))[0]
return 'unk22',
if opcode == 0x23:
return 'unk23' # ??
if opcode == 0xd0:
arg = ord(data.read(1)) # ??
return 'ldtoken', arg
if opcode == 0x24: # Artemis custom - multiple ldarg
arg = ord(data.read(1))
ldargs = []
for i in range(8):
if arg & (1<<i):
ldargs.append(i)
return 'ldarg', ','.join(map(str, ldargs))
if opcode == 0xf0: # Artemis unknown
arg = data.read(3)
return 'unkF0', hexlify(arg)
if opcode in artemis_unknown:
return 'unk%2X' % opcode,
if opcode == 0x45: # switch
count = le32(data.read(4))
loc = data.tell() + 4*count
targets = []
for i in range(count):
offset = struct.unpack('<l', data.read(4))[0]
targets.append(loc + offset)
return 'switch', targets
raise ValueError("Unhandled opcode: 0x%02x" % opcode)
def dump_hive(self):
print "Dependencies:"
for tag, flags in self.dependencies:
print " %s\t%s" % (self.namer.module(tag), hexlify(flags))
def dump_tables(self):
print "Types:"
for i, typetag in enumerate(self.types):
print " %02X %s" % (i, self.namer.type(typetag))
print "Methods:"
for i, (typetag, methtag) in enumerate(self.methods):
print " %02X %s" % (i, self.namer.method(typetag, methtag))
def dump(self):
print "Assembly: %s" % self.name
self.dump_hive()
self.dump_tables()
if __name__ == "__main__":
import sys
import glob
namer = Namer()
for ff in glob.glob('*.hive.xml'):
hive = Hive(ff, namer=namer, skip_body=True)
hive = Hive(sys.argv[1], namer=namer)
hive.dump()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment