-
-
Save abrasive/d996607c036fa779ce114827865a2078 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# A crude parser for Artemis SAM hives | |
# Currently effective on legacyconfigcardprogrammerapp.hive.xml | |
# | |
# Plonk it in a folder with as many .hive.xml as you can scrape from your | |
# installation and supply the target hive as argument. | |
# Types and methods are referred to in code by a single byte. | |
# A 0-indexed table is referred to for each, containing first all the | |
# typerefs and then all the typedefs (similarly for methrefs/methdefs). | |
# So if there are 5 typerefs then type 07 is the third entry in the typedefs. | |
import lxml.etree | |
from base64 import b64decode | |
import struct | |
from binascii import hexlify | |
from StringIO import StringIO | |
def ntohs(data): | |
return struct.unpack('>H', data)[0] | |
def ntohl(data): | |
return struct.unpack('>L', data)[0] | |
def le16(data): | |
return struct.unpack('<H', data)[0] | |
def le24(data): | |
return struct.unpack('<L', data + '\0')[0] | |
def le32(data): | |
return struct.unpack('<L', data)[0] | |
class Namer(object): | |
def __init__(self): | |
self.module_names = { | |
0x5424ba: 'mscorlib', | |
} | |
self.type_names = {} | |
self.method_names = {} | |
# XXX TODO: each type/method tag ties all the way back to a module. type 'em strong | |
def module(self, modtag): | |
tag = modtag & 0xffffff | |
return self.module_names.get(tag, 'm%06x' % tag) | |
def type(self, typetag): | |
return self.type_names.get(typetag, 't%04x' % typetag) | |
def method(self, typetag, methtag): | |
tag = typetag<<16 | methtag | |
return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag)) | |
def parse_debug(self, debug_data): | |
data = StringIO(debug_data) | |
def take_tlv(data, tagsize): | |
tag = data.read(tagsize) | |
length = ntohs(data.read(2)) | |
value = data.read(length) | |
return tag, value | |
nmodules = ntohs(data.read(2)) | |
for i in range(nmodules): | |
tag, name = take_tlv(data, 4) | |
self.module_names[ntohl(tag)] = name | |
ntypes = ntohs(data.read(2)) | |
for i in range(ntypes): | |
tag, name = take_tlv(data, 6) | |
modtag = ntohl(tag[:4]) | |
modname = self.module_names[modtag] | |
typetag = ntohs(tag[4:]) | |
self.type_names[typetag] = modname + "::" + name | |
nmethods = ntohs(data.read(2)) | |
for i in range(nmethods): | |
tag, name = take_tlv(data, 4) | |
typetag = ntohs(tag[:2]) | |
typename = self.type_names[typetag] | |
methtag = ntohs(tag[2:]) | |
tag = typetag<<16 | methtag | |
self.method_names[tag] = typename + "." + name | |
if len(data.read()): | |
print "Trailing data in debug stream:", hexlify(data) | |
def dump(self): | |
print "Modules:" | |
for tag, name in sorted(self.module_names.iteritems()): | |
print " %06x %s" % (tag, name) | |
print "Types:" | |
for tag, name in sorted(self.type_names.iteritems()): | |
print " %04x %s" % (tag, name) | |
print "Methods:" | |
for tag, name in sorted(self.method_names.iteritems()): | |
print " %04x.%04x %s" % (tag>>16, tag&0xffff, name) | |
class Hive(object): | |
def __init__(self, filename, namer=None, skip_body=False): | |
fp = open(filename) | |
self.xml = lxml.etree.parse(fp) | |
self.assembly = self.xml.getroot().find('Assembly') | |
self.name = self.assembly.get('Name') | |
self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text)) | |
self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text)) | |
if namer is None: | |
namer = Namer() | |
self.namer = namer | |
self.namer.parse_debug(self.debug_data) | |
if not skip_body: | |
self.parse_hive() | |
def parse_hive(self): | |
data = self.hive_data[8:] # skip length and HIVE | |
unk_dat = le16(data[0x17:0x19]) | |
num_mod_deps = le16(data[0x19:0x1b]) | |
num_mod_refs = le16(data[0x1b:0x1d]) | |
body = StringIO(data[0x2d:]) | |
unk_len0 = le16(body.read(2)) | |
unk_len = unk_len0 | |
if unk_dat != 0xffff: # not sure if this is the correct trigger | |
unk_len1 = le16(body.read(2)) | |
unk_len2 = le16(body.read(2)) | |
unk_len += 2*unk_len1 + unk_len2 | |
self.dependencies = [] | |
for i in range(num_mod_deps): | |
tag = le32(body.read(4)) | |
flags = body.read(8) | |
self.dependencies.append((tag, flags)) | |
m_typerefs = [] | |
m_typedefs = [] | |
active = m_typerefs | |
first_module = None | |
for i in range(num_mod_refs): | |
module = le24(body.read(3)) | |
count = ord(body.read(1)) | |
if first_module is None: | |
first_module = module | |
elif module == first_module: # can't find a count/pointer for this changeover | |
active = m_typedefs | |
active.append((module, count)) | |
self.types = [] | |
self.methods = [] | |
typeref_meths = [] | |
for module, count in m_typerefs: | |
for i in range(count): | |
typetag = le16(body.read(2)) | |
nmethods = ord(body.read(1)) | |
nother = ord(body.read(1)) | |
typeref_meths.append((typetag, nmethods, nother)) | |
self.types.append(typetag) | |
methrefs = [] | |
for typetag, nmethods, nother in typeref_meths: | |
for i in range(nmethods): | |
methtag = le16(body.read(2)) | |
methrefs.append((typetag, methtag)) | |
self.methods.append((typetag, methtag)) | |
for typetag, nmethods, nother in typeref_meths: | |
for i in range(nother): | |
methtag = le16(body.read(2)) | |
methrefs.append((typetag, methtag)) | |
self.methods.append((typetag, methtag)) | |
# XXX string tables, or what? | |
unknown = body.read(unk_len) | |
# type defs | |
typedefs = [] | |
for module, count in m_typedefs: | |
for i in range(count): | |
typetag = le16(body.read(2)) | |
basetype = ord(body.read(1)) | |
subtype = ord(body.read(1)) | |
nfields = ord(body.read(1)) | |
nmethods = ord(body.read(1)) | |
flags = le16(body.read(2)) | |
fields = [] | |
for j in range(nfields): | |
fields.append(body.read(6)) | |
self.types.append(typetag) | |
typedefs.append((typetag, nmethods)) | |
# method defs | |
methdefs = [] | |
for typetag, nmethods in typedefs: | |
for i in range(nmethods): | |
methtag = le16(body.read(2)) | |
argflags = ord(body.read(1)) | |
nargs = argflags & 7 | |
flags = ord(body.read(1)) | |
ret_type = ord(body.read(1)) | |
defs = [] | |
prebytes = 0 | |
if argflags & 0x80: | |
nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5)) | |
for j in range(ndefs): | |
defs.append(body.read(7)) | |
else: | |
prebytes = argflags >> 4 | |
if flags & 0x80: | |
methtype = 'pointer' # just points to a module with same named/tagged method (4 byte full tag) | |
else: | |
methtype = 'actual' | |
arg_types = body.read(nargs) | |
methdefs.append([typetag, methtag, methtype, prebytes]) | |
self.methods.append((typetag, methtag)) | |
# method bodies | |
for methdef in methdefs: | |
typetag, methtag, methtype, prebytes = methdef | |
if methtype == 'pointer': | |
methdef.append(le32(body.read(4))) | |
continue | |
pre = body.read(prebytes) | |
length = le16(body.read(2)) | |
methdef.append(body.read(length)) | |
print "%d prebytes: %s" % (prebytes, hexlify(pre)) | |
print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1])) | |
for insn in self.disassemble(methdef[-1]): | |
strs = [] | |
for elem in insn: | |
if isinstance(elem, int): | |
strs.append('0x%x' % elem) | |
else: | |
strs.append(str(elem)) | |
print "\t".join(strs) | |
def disassemble(self, methbody): | |
data = StringIO(methbody) | |
insns = [] | |
while data.tell() < len(methbody): | |
i_addr = data.tell() | |
insns.append(self.disas_one(data)) | |
i_end = data.tell() | |
i_bytes = data.getvalue()[i_addr:i_end] | |
insns[-1] = ['0x%02x' % i_addr, hexlify(i_bytes)] + list(insns[-1]) | |
return insns | |
def disas_one(self, data): | |
no_arg = { | |
0x77: 'inc', # Artemis custom: add 1 to top of stack | |
0x00: 'nop', | |
0x01: 'break', | |
0x02: 'ldarg.0', | |
0x03: 'ldarg.1', | |
0x04: 'ldarg.2', | |
0x05: 'ldarg.3', | |
0x06: 'ldloc.0', | |
0x07: 'ldloc.1', | |
0x08: 'ldloc.2', | |
0x09: 'ldloc.3', | |
0x0a: 'stloc.0', | |
0x0b: 'stloc.1', | |
0x0c: 'stloc.2', | |
0x0d: 'stloc.3', | |
0x14: 'ldnull', | |
0x15: 'ldc.i4.m1', | |
0x16: 'ldc.i4.0', | |
0x17: 'ldc.i4.1', | |
0x18: 'ldc.i4.2', | |
0x19: 'ldc.i4.3', | |
0x1a: 'ldc.i4.4', | |
0x1b: 'ldc.i4.5', | |
0x1c: 'ldc.i4.6', | |
0x1d: 'ldc.i4.7', | |
0x1e: 'ldc.i4.8', | |
0x25: 'dup', | |
0x26: 'pop', | |
0x2a: 'ret', | |
0x46: 'ldind.i1', | |
0x47: 'ldind.u1', | |
0x48: 'ldind.i2', | |
0x49: 'ldind.u2', | |
0x4a: 'ldind.i4', | |
0x4b: 'ldind.u4', | |
0x4c: 'ldind.i8', | |
0x4d: 'ldind.i', | |
0x4e: 'ldind.r4', | |
0x4f: 'ldind.r8', | |
0x50: 'ldind.ref', | |
0x51: 'stind.ref', | |
0x52: 'stind.i1', | |
0x53: 'stind.i2', | |
0x54: 'stind.i4', | |
0x55: 'stind.i8', | |
0x56: 'stind.r4', | |
0x57: 'stind.r8', | |
0x58: 'add', | |
0x59: 'sub', | |
0x5a: 'mul', | |
0x5b: 'div', | |
0x5c: 'div.un', | |
0x5d: 'rem', | |
0x5e: 'rem.un', | |
0x5f: 'and', | |
0x60: 'or', | |
0x61: 'xor', | |
0x62: 'shl', | |
0x63: 'shr', | |
0x64: 'shr.un', | |
0x65: 'neg', | |
0x66: 'not', | |
0x67: 'conv.i1', | |
0x68: 'conv.i2', | |
0x69: 'conv.i4', | |
0x6a: 'conv.i8', | |
0x6b: 'conv.r4', | |
0x6c: 'conv.r8', | |
0x6d: 'conv.u4', | |
0x6e: 'conv.u8', | |
0x76: 'conv.r.un', | |
0x7a: 'throw', | |
0x82: 'conv.ovf.i1.un', | |
0x83: 'conv.ovf.i2.un', | |
0x84: 'conv.ovf.i4.un', | |
0x85: 'conv.ovf.i8.un', | |
0x86: 'conv.ovf.u1.un', | |
0x87: 'conv.ovf.u2.un', | |
0x88: 'conv.ovf.u4.un', | |
0x89: 'conv.ovf.u8.un', | |
0x8a: 'conv.ovf.i.un', | |
0x8b: 'conv.ovf.u.un', | |
0x8e: 'ldlen', | |
0x90: 'ldelem.i1', | |
0x91: 'ldelem.u1', | |
0x92: 'ldelem.i2', | |
0x93: 'ldelem.u2', | |
0x94: 'ldelem.i4', | |
0x95: 'ldelem.u4', | |
0x96: 'ldelem.i8', | |
0x97: 'ldelem.i', | |
0x98: 'ldelem.r4', | |
0x99: 'ldelem.r8', | |
0x9a: 'ldelem.ref', | |
0x9b: 'stelem.i', | |
0x9c: 'stelem.i1', | |
0x9d: 'stelem.i2', | |
0x9e: 'stelem.i4', | |
0x9f: 'stelem.i8', | |
0xa0: 'stelem.r4', | |
0xa1: 'stelem.r8', | |
0xa2: 'stelem.ref', | |
0xb3: 'conv.ovf.i1', | |
0xb4: 'conv.ovf.u1', | |
0xb5: 'conv.ovf.i2', | |
0xb6: 'conv.ovf.u2', | |
0xb7: 'conv.ovf.i4', | |
0xb8: 'conv.ovf.u4', | |
0xb9: 'conv.ovf.i8', | |
0xba: 'conv.ovf.u8', | |
0xc3: 'ckfinite', | |
0xd1: 'conv.u2', | |
0xd2: 'conv.u1', | |
0xd3: 'conv.i', | |
0xd4: 'conv.ovf.i', | |
0xd5: 'conv.ovf.u', | |
0xd6: 'add.ovf', | |
0xd7: 'add.ovf.un', | |
0xd8: 'mul.ovf', | |
0xd9: 'mul.ovf.un', | |
0xda: 'sub.ovf', | |
0xdb: 'sub.ovf.un', | |
0xdc: 'endfinally', | |
0xdf: 'stind.i', | |
0xe0: 'conv.u', | |
0xfe01: 'ceq', | |
0xfe02: 'cgt', | |
0xfe03: 'cgt.un', | |
0xfe04: 'clt', | |
0xfe05: 'clt.un', | |
0xfe1a: 'rethrow', | |
} | |
uint8_arg = { | |
0x0e: 'ldarg.s', | |
0x0f: 'ldarga.s', | |
0x10: 'starg.s', | |
0x11: 'ldloc.s', | |
0x12: 'ldloca.s', | |
0x13: 'stloc.s', | |
0xde: 'leave.s', | |
} | |
type_arg = { | |
0x8d: 'newarr', | |
0xfe15: 'initobj', | |
0xfe16: 'constrained.', | |
0x74: 'castclass', | |
0x70: 'cpobj', | |
0x71: 'ldobj', | |
0x81: 'stobj', | |
0x8c: 'box', | |
0x79: 'unbox', | |
} | |
field_arg = { | |
0x7b: 'ldfld', | |
0x7c: 'ldflda', | |
0x7d: 'stfld', | |
0x7e: 'ldsfld', | |
0x7f: 'ldsflda', | |
0x80: 'stsfld', | |
} | |
branch_short = { | |
0x2b: 'br.s', | |
0x2c: 'brfalse.s', | |
0x2d: 'brtrue.s', | |
0x2e: 'beq.s', | |
0x2f: 'bge.s', | |
0x30: 'bgt.s', | |
0x31: 'ble.s', | |
0x32: 'blt.s', | |
0x33: 'bne.un.s', | |
0x34: 'bge.un.s', | |
0x35: 'bgt.un.s', | |
0x36: 'ble.un.s', | |
0x37: 'blt.un.s', | |
} | |
branch_long = { | |
0x38: 'br', | |
0x39: 'brfalse', | |
0x3a: 'brtrue', | |
0x3b: 'beq', | |
0x3c: 'bge', | |
0x3d: 'bgt', | |
0x3e: 'ble', | |
0x3f: 'blt', | |
0x40: 'bne.un', | |
0x41: 'bge.un', | |
0x42: 'bgt.un', | |
0x43: 'ble.un', | |
0x44: 'blt.un', | |
} | |
unhandled = { | |
0x21: 'ldc.i8', | |
0x23: 'ldc.r8', | |
0x27: 'jmp', | |
0x28: 'call', | |
0x29: 'calli', | |
0x45: 'switch', | |
0x6f: 'callvirt', | |
0x72: 'ldstr', | |
0x75: 'isinst', | |
0x8f: 'ldelema', | |
0xc2: 'refanyval', | |
0xc6: 'mkrefany', | |
0xd0: 'ldtoken', | |
0xfe00: 'arglist', | |
0xfe07: 'ldvirtftn', | |
0xfe09: 'ldarg', | |
0xfe0a: 'ldarga', | |
0xfe0b: 'starg', | |
0xfe0c: 'ldloc', | |
0xfe0d: 'ldloca', | |
0xfe0e: 'stloc', | |
0xfe0f: 'localloc', | |
0xfe11: 'endfilter', | |
0xfe12: 'unaligned.', | |
0xfe13: 'volatile.', | |
0xfe14: 'tail.', | |
0xfe17: 'cpblk', | |
0xfe18: 'initblk', | |
0xfe1c: 'sizeof', | |
0xfe1d: 'refanytype', | |
} | |
call = { | |
0x28: 'call', | |
0x6f: 'callvirt', | |
0x73: 'newobj', | |
0xfe06: 'ldftn', # really? I'm not so sure | |
} | |
artemis_unknown = set([ | |
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xbb, 0xbd, 0xb0, 0xbe, 0xb1, 0xbc, 0xaf, 0xfd, 0xff | |
]) | |
opcode = ord(data.read(1)) | |
if opcode == 0xfe: | |
opcode <<= 8 | |
opcode |= ord(data.read(1)) | |
if opcode in no_arg: | |
return no_arg[opcode], | |
if opcode in uint8_arg: | |
arg = ord(data.read(1)) | |
return uint8_arg[opcode], arg | |
if opcode == 0xdd: | |
arg = le32(data.read(4)) | |
return 'leave', arg | |
if opcode in type_arg: | |
arg = ord(data.read(1)) | |
return type_arg[opcode], self.namer.type(self.types[arg]) | |
if opcode in field_arg: | |
arg = ord(data.read(1)) | |
return field_arg[opcode], arg | |
if opcode in branch_short: | |
arg = struct.unpack('b', data.read(1))[0] | |
loc = data.tell() | |
return branch_short[opcode], loc + arg | |
if opcode in branch_long: | |
arg = struct.unpack('<l', data.read(4))[0] | |
loc = data.tell() | |
return branch_long[opcode], loc + arg | |
if opcode in call: | |
method_id = ord(data.read(1)) | |
return call[opcode], self.namer.method(*self.methods[method_id]) | |
if opcode == 0x1f: | |
arg = struct.unpack('b', data.read(1))[0] | |
return 'ldc.i4.s', arg | |
if opcode == 0x20: | |
arg = struct.unpack('<l', data.read(4))[0] | |
return 'ldc.i4', arg | |
# this is *definitely* not ldc.r4 | |
if opcode == 0x22: | |
# arg = struct.unpack('<f', data.read(4))[0] | |
return 'unk22', | |
if opcode == 0x23: | |
return 'unk23' # ?? | |
if opcode == 0xd0: | |
arg = ord(data.read(1)) # ?? | |
return 'ldtoken', arg | |
if opcode == 0x24: # Artemis custom - multiple ldarg | |
arg = ord(data.read(1)) | |
ldargs = [] | |
for i in range(8): | |
if arg & (1<<i): | |
ldargs.append(i) | |
return 'ldarg', ','.join(map(str, ldargs)) | |
if opcode == 0xf0: # Artemis unknown | |
arg = data.read(3) | |
return 'unkF0', hexlify(arg) | |
if opcode in artemis_unknown: | |
return 'unk%2X' % opcode, | |
if opcode == 0x45: # switch | |
count = le32(data.read(4)) | |
loc = data.tell() + 4*count | |
targets = [] | |
for i in range(count): | |
offset = struct.unpack('<l', data.read(4))[0] | |
targets.append(loc + offset) | |
return 'switch', targets | |
raise ValueError("Unhandled opcode: 0x%02x" % opcode) | |
def dump_hive(self): | |
print "Dependencies:" | |
for tag, flags in self.dependencies: | |
print " %s\t%s" % (self.namer.module(tag), hexlify(flags)) | |
def dump_tables(self): | |
print "Types:" | |
for i, typetag in enumerate(self.types): | |
print " %02X %s" % (i, self.namer.type(typetag)) | |
print "Methods:" | |
for i, (typetag, methtag) in enumerate(self.methods): | |
print " %02X %s" % (i, self.namer.method(typetag, methtag)) | |
def dump(self): | |
print "Assembly: %s" % self.name | |
self.dump_hive() | |
self.dump_tables() | |
if __name__ == "__main__": | |
import sys | |
import glob | |
namer = Namer() | |
for ff in glob.glob('*.hive.xml'): | |
hive = Hive(ff, namer=namer, skip_body=True) | |
hive = Hive(sys.argv[1], namer=namer) | |
hive.dump() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment