abrasive/get_honey.py Secret

## get_honey.py
#!/usr/bin/env python2

# A crude parser for Artemis SAM hives
# Currently effective on legacyconfigcardprogrammerapp.hive.xml
#
# Plonk it in a folder with as many .hive.xml as you can scrape from your
# installation and supply the target hive as argument.

# Types and methods are referred to in code by a single byte.
# A 0-indexed table is referred to for each, containing first all the
# typerefs and then all the typedefs (similarly for methrefs/methdefs).
# So if there are 5 typerefs then type 07 is the third entry in the typedefs.

import lxml.etree
from base64 import b64decode
import struct
from binascii import hexlify
from StringIO import StringIO

def ntohs(data):
    return struct.unpack('>H', data)[0]
def ntohl(data):
    return struct.unpack('>L', data)[0]

def le16(data):
    return struct.unpack('<H', data)[0]

def le24(data):
    return struct.unpack('<L', data + '\0')[0]

def le32(data):
    return struct.unpack('<L', data)[0]

class Namer(object):
    def __init__(self):
        self.module_names = {
                0x5424ba: 'mscorlib',
            }

        self.type_names = {}
        self.method_names = {}

    # XXX TODO: each type/method tag ties all the way back to a module. type 'em strong
    def module(self, modtag):
        tag = modtag & 0xffffff
        return self.module_names.get(tag, 'm%06x' % tag)

    def type(self, typetag):
        return self.type_names.get(typetag, 't%04x' % typetag)

    def method(self, typetag, methtag):
        tag = typetag<<16 | methtag
        return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag))

    def parse_debug(self, debug_data):
        data = StringIO(debug_data)

        def take_tlv(data, tagsize):
            tag = data.read(tagsize)
            length = ntohs(data.read(2))
            value = data.read(length)

            return tag, value

        nmodules = ntohs(data.read(2))
        for i in range(nmodules):
            tag, name = take_tlv(data, 4)

            self.module_names[ntohl(tag)] = name

        ntypes = ntohs(data.read(2))
        for i in range(ntypes):
            tag, name = take_tlv(data, 6)

            modtag = ntohl(tag[:4])
            modname = self.module_names[modtag]
            typetag = ntohs(tag[4:])
            self.type_names[typetag] = modname + "::" + name

        nmethods = ntohs(data.read(2))
        for i in range(nmethods):
            tag, name = take_tlv(data, 4)

            typetag = ntohs(tag[:2])
            typename = self.type_names[typetag]
            methtag = ntohs(tag[2:])
            tag = typetag<<16 | methtag
            self.method_names[tag] = typename + "." + name

        if len(data.read()):
            print "Trailing data in debug stream:", hexlify(data)

    def dump(self):
        print "Modules:"
        for tag, name in sorted(self.module_names.iteritems()):
            print "  %06x %s" % (tag, name)

        print "Types:"
        for tag, name in sorted(self.type_names.iteritems()):
            print "  %04x %s" % (tag, name)

        print "Methods:"
        for tag, name in sorted(self.method_names.iteritems()):
            print "  %04x.%04x %s" % (tag>>16, tag&0xffff, name)

class Hive(object):
    def __init__(self, filename, namer=None, skip_body=False):
        fp = open(filename)
        self.xml = lxml.etree.parse(fp)

        self.assembly = self.xml.getroot().find('Assembly')

        self.name = self.assembly.get('Name')

        self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text))
        self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text))

        if namer is None:
            namer = Namer()
        self.namer = namer

        self.namer.parse_debug(self.debug_data)

        if not skip_body:
            self.parse_hive()

    def parse_hive(self):
        data = self.hive_data[8:]   # skip length and HIVE

        unk_dat = le16(data[0x17:0x19])
        num_mod_deps = le16(data[0x19:0x1b])
        num_mod_refs = le16(data[0x1b:0x1d])
        body = StringIO(data[0x2d:])

        unk_len0 = le16(body.read(2))
        unk_len = unk_len0

        if unk_dat != 0xffff:   # not sure if this is the correct trigger
            unk_len1 = le16(body.read(2))
            unk_len2 = le16(body.read(2))
            unk_len += 2*unk_len1 + unk_len2

        self.dependencies = []
        for i in range(num_mod_deps):
            tag = le32(body.read(4))
            flags = body.read(8)
            self.dependencies.append((tag, flags))

        m_typerefs = []
        m_typedefs = []
        active = m_typerefs
        first_module = None
        for i in range(num_mod_refs):
            module = le24(body.read(3))
            count = ord(body.read(1))

            if first_module is None:
                first_module = module
            elif module == first_module:    # can't find a count/pointer for this changeover
                active = m_typedefs

            active.append((module, count))

        self.types = []
        self.methods = []

        typeref_meths = []
        for module, count in m_typerefs:
            for i in range(count):
                typetag = le16(body.read(2))
                nmethods = ord(body.read(1))
                nother = ord(body.read(1))
                typeref_meths.append((typetag, nmethods, nother))

                self.types.append(typetag)

        methrefs = []
        for typetag, nmethods, nother in typeref_meths:
            for i in range(nmethods):
                methtag = le16(body.read(2))
                methrefs.append((typetag, methtag))
                self.methods.append((typetag, methtag))

        for typetag, nmethods, nother in typeref_meths:
            for i in range(nother):
                methtag = le16(body.read(2))
                methrefs.append((typetag, methtag))
                self.methods.append((typetag, methtag))

        # XXX string tables, or what?
        unknown = body.read(unk_len)

        # type defs
        typedefs = []
        for module, count in m_typedefs:
            for i in range(count):
                typetag = le16(body.read(2))
                basetype = ord(body.read(1))
                subtype = ord(body.read(1))
                nfields = ord(body.read(1))
                nmethods = ord(body.read(1))
                flags = le16(body.read(2))

                fields = []
                for j in range(nfields):
                    fields.append(body.read(6))

                self.types.append(typetag)
                typedefs.append((typetag, nmethods))

        # method defs
        methdefs = []
        for typetag, nmethods in typedefs:
            for i in range(nmethods):
                methtag = le16(body.read(2))
                argflags = ord(body.read(1))
                nargs = argflags & 7
                flags = ord(body.read(1))
                ret_type = ord(body.read(1))

                defs = []
                prebytes = 0
                if argflags & 0x80:
                    nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5))
                    for j in range(ndefs):
                        defs.append(body.read(7))
                else:
                    prebytes = argflags >> 4

                if flags & 0x80:
                    methtype = 'pointer'    # just points to a module with same named/tagged method (4 byte full tag)
                else:
                    methtype = 'actual'

                arg_types = body.read(nargs)

                methdefs.append([typetag, methtag, methtype, prebytes])
                self.methods.append((typetag, methtag))

        # method bodies
        for methdef in methdefs:
            typetag, methtag, methtype, prebytes = methdef

            if methtype == 'pointer':
                methdef.append(le32(body.read(4)))
                continue

            pre = body.read(prebytes)

            length = le16(body.read(2))
            methdef.append(body.read(length))

            print "%d prebytes: %s" % (prebytes, hexlify(pre))

            print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1]))

            for insn in self.disassemble(methdef[-1]):
                strs = []
                for elem in insn:
                    if isinstance(elem, int):
                        strs.append('0x%x' % elem)
                    else:
                        strs.append(str(elem))

                print "\t".join(strs)

    def disassemble(self, methbody):
        data = StringIO(methbody)
        insns = []
        while data.tell() < len(methbody):
            i_addr = data.tell()
            insns.append(self.disas_one(data))

            i_end = data.tell()
            i_bytes = data.getvalue()[i_addr:i_end]

            insns[-1] = ['0x%02x' % i_addr, hexlify(i_bytes)] + list(insns[-1])

        return insns


    def disas_one(self, data):
        no_arg = {
            0x77: 'inc',    # Artemis custom: add 1 to top of stack

            0x00: 'nop',
            0x01: 'break',
            0x02: 'ldarg.0',
            0x03: 'ldarg.1',
            0x04: 'ldarg.2',
            0x05: 'ldarg.3',
            0x06: 'ldloc.0',
            0x07: 'ldloc.1',
            0x08: 'ldloc.2',
            0x09: 'ldloc.3',
            0x0a: 'stloc.0',
            0x0b: 'stloc.1',
            0x0c: 'stloc.2',
            0x0d: 'stloc.3',

            0x14: 'ldnull',
            0x15: 'ldc.i4.m1',
            0x16: 'ldc.i4.0',
            0x17: 'ldc.i4.1',
            0x18: 'ldc.i4.2',
            0x19: 'ldc.i4.3',
            0x1a: 'ldc.i4.4',
            0x1b: 'ldc.i4.5',
            0x1c: 'ldc.i4.6',
            0x1d: 'ldc.i4.7',
            0x1e: 'ldc.i4.8',
            0x25: 'dup',
            0x26: 'pop',
            0x2a: 'ret',
            0x46: 'ldind.i1',
            0x47: 'ldind.u1',
            0x48: 'ldind.i2',
            0x49: 'ldind.u2',
            0x4a: 'ldind.i4',
            0x4b: 'ldind.u4',
            0x4c: 'ldind.i8',
            0x4d: 'ldind.i',
            0x4e: 'ldind.r4',
            0x4f: 'ldind.r8',
            0x50: 'ldind.ref',
            0x51: 'stind.ref',
            0x52: 'stind.i1',
            0x53: 'stind.i2',
            0x54: 'stind.i4',
            0x55: 'stind.i8',
            0x56: 'stind.r4',
            0x57: 'stind.r8',
            0x58: 'add',
            0x59: 'sub',
            0x5a: 'mul',
            0x5b: 'div',
            0x5c: 'div.un',
            0x5d: 'rem',
            0x5e: 'rem.un',
            0x5f: 'and',
            0x60: 'or',
            0x61: 'xor',
            0x62: 'shl',
            0x63: 'shr',
            0x64: 'shr.un',
            0x65: 'neg',
            0x66: 'not',
            0x67: 'conv.i1',
            0x68: 'conv.i2',
            0x69: 'conv.i4',
            0x6a: 'conv.i8',
            0x6b: 'conv.r4',
            0x6c: 'conv.r8',
            0x6d: 'conv.u4',
            0x6e: 'conv.u8',
            0x76: 'conv.r.un',
            0x7a: 'throw',
            0x82: 'conv.ovf.i1.un',
            0x83: 'conv.ovf.i2.un',
            0x84: 'conv.ovf.i4.un',
            0x85: 'conv.ovf.i8.un',
            0x86: 'conv.ovf.u1.un',
            0x87: 'conv.ovf.u2.un',
            0x88: 'conv.ovf.u4.un',
            0x89: 'conv.ovf.u8.un',
            0x8a: 'conv.ovf.i.un',
            0x8b: 'conv.ovf.u.un',
            0x8e: 'ldlen',
            0x90: 'ldelem.i1',
            0x91: 'ldelem.u1',
            0x92: 'ldelem.i2',
            0x93: 'ldelem.u2',
            0x94: 'ldelem.i4',
            0x95: 'ldelem.u4',
            0x96: 'ldelem.i8',
            0x97: 'ldelem.i',
            0x98: 'ldelem.r4',
            0x99: 'ldelem.r8',
            0x9a: 'ldelem.ref',
            0x9b: 'stelem.i',
            0x9c: 'stelem.i1',
            0x9d: 'stelem.i2',
            0x9e: 'stelem.i4',
            0x9f: 'stelem.i8',
            0xa0: 'stelem.r4',
            0xa1: 'stelem.r8',
            0xa2: 'stelem.ref',
            0xb3: 'conv.ovf.i1',
            0xb4: 'conv.ovf.u1',
            0xb5: 'conv.ovf.i2',
            0xb6: 'conv.ovf.u2',
            0xb7: 'conv.ovf.i4',
            0xb8: 'conv.ovf.u4',
            0xb9: 'conv.ovf.i8',
            0xba: 'conv.ovf.u8',
            0xc3: 'ckfinite',
            0xd1: 'conv.u2',
            0xd2: 'conv.u1',
            0xd3: 'conv.i',
            0xd4: 'conv.ovf.i',
            0xd5: 'conv.ovf.u',
            0xd6: 'add.ovf',
            0xd7: 'add.ovf.un',
            0xd8: 'mul.ovf',
            0xd9: 'mul.ovf.un',
            0xda: 'sub.ovf',
            0xdb: 'sub.ovf.un',
            0xdc: 'endfinally',
            0xdf: 'stind.i',
            0xe0: 'conv.u',
            0xfe01: 'ceq',
            0xfe02: 'cgt',
            0xfe03: 'cgt.un',
            0xfe04: 'clt',
            0xfe05: 'clt.un',
            0xfe1a: 'rethrow',
        }


        uint8_arg = {
            0x0e: 'ldarg.s',
            0x0f: 'ldarga.s',
            0x10: 'starg.s',
            0x11: 'ldloc.s',
            0x12: 'ldloca.s',
            0x13: 'stloc.s',
            0xde: 'leave.s',
        }

        type_arg = {
            0x8d: 'newarr',
            0xfe15: 'initobj',
            0xfe16: 'constrained.',
            0x74: 'castclass',
            0x70: 'cpobj',
            0x71: 'ldobj',
            0x81: 'stobj',
            0x8c: 'box',
            0x79: 'unbox',
        }

        field_arg = {
            0x7b: 'ldfld',
            0x7c: 'ldflda',
            0x7d: 'stfld',
            0x7e: 'ldsfld',
            0x7f: 'ldsflda',
            0x80: 'stsfld',
        }

        branch_short = {
            0x2b: 'br.s',
            0x2c: 'brfalse.s',
            0x2d: 'brtrue.s',
            0x2e: 'beq.s',
            0x2f: 'bge.s',
            0x30: 'bgt.s',
            0x31: 'ble.s',
            0x32: 'blt.s',
            0x33: 'bne.un.s',
            0x34: 'bge.un.s',
            0x35: 'bgt.un.s',
            0x36: 'ble.un.s',
            0x37: 'blt.un.s',
        }

        branch_long = {
            0x38: 'br',
            0x39: 'brfalse',
            0x3a: 'brtrue',
            0x3b: 'beq',
            0x3c: 'bge',
            0x3d: 'bgt',
            0x3e: 'ble',
            0x3f: 'blt',
            0x40: 'bne.un',
            0x41: 'bge.un',
            0x42: 'bgt.un',
            0x43: 'ble.un',
            0x44: 'blt.un',
        }


        unhandled = {
            0x21: 'ldc.i8',
            0x23: 'ldc.r8',
            0x27: 'jmp',
            0x28: 'call',
            0x29: 'calli',
            0x45: 'switch',
            0x6f: 'callvirt',
            0x72: 'ldstr',
            0x75: 'isinst',
            0x8f: 'ldelema',
            0xc2: 'refanyval',
            0xc6: 'mkrefany',
            0xd0: 'ldtoken',
            0xfe00: 'arglist',
            0xfe07: 'ldvirtftn',
            0xfe09: 'ldarg',
            0xfe0a: 'ldarga',
            0xfe0b: 'starg',
            0xfe0c: 'ldloc',
            0xfe0d: 'ldloca',
            0xfe0e: 'stloc',
            0xfe0f: 'localloc',
            0xfe11: 'endfilter',
            0xfe12: 'unaligned.',
            0xfe13: 'volatile.',
            0xfe14: 'tail.',
            0xfe17: 'cpblk',
            0xfe18: 'initblk',
            0xfe1c: 'sizeof',
            0xfe1d: 'refanytype',
        }

        call = {
            0x28: 'call',
            0x6f: 'callvirt',
            0x73: 'newobj',
            0xfe06: 'ldftn',    # really? I'm not so sure
        }

        artemis_unknown = set([
            0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xbb, 0xbd, 0xb0, 0xbe, 0xb1, 0xbc, 0xaf, 0xfd, 0xff
        ])

        opcode = ord(data.read(1))
        if opcode == 0xfe:
            opcode <<= 8
            opcode |= ord(data.read(1))

        if opcode in no_arg:
            return no_arg[opcode],

        if opcode in uint8_arg:
            arg = ord(data.read(1))
            return uint8_arg[opcode], arg

        if opcode == 0xdd:
            arg = le32(data.read(4))
            return 'leave', arg

        if opcode in type_arg:
            arg = ord(data.read(1))
            return type_arg[opcode], self.namer.type(self.types[arg])

        if opcode in field_arg:
            arg = ord(data.read(1))
            return field_arg[opcode], arg

        if opcode in branch_short:
            arg = struct.unpack('b', data.read(1))[0]
            loc = data.tell()
            return branch_short[opcode], loc + arg

        if opcode in branch_long:
            arg = struct.unpack('<l', data.read(4))[0]
            loc = data.tell()
            return branch_long[opcode], loc + arg

        if opcode in call:
            method_id = ord(data.read(1))
            return call[opcode], self.namer.method(*self.methods[method_id])

        if opcode == 0x1f:
            arg = struct.unpack('b', data.read(1))[0]
            return 'ldc.i4.s', arg

        if opcode == 0x20:
            arg = struct.unpack('<l', data.read(4))[0]
            return 'ldc.i4', arg

        # this is *definitely* not ldc.r4
        if opcode == 0x22:
            # arg = struct.unpack('<f', data.read(4))[0]
            return 'unk22',

        if opcode == 0x23:
            return 'unk23'  # ??

        if opcode == 0xd0:
            arg = ord(data.read(1))  # ??
            return 'ldtoken', arg

        if opcode == 0x24:  # Artemis custom - multiple ldarg
            arg = ord(data.read(1))
            ldargs = []
            for i in range(8):
                if arg & (1<<i):
                    ldargs.append(i)

            return 'ldarg', ','.join(map(str, ldargs))

        if opcode == 0xf0:  # Artemis unknown
            arg = data.read(3)
            return 'unkF0', hexlify(arg)

        if opcode in artemis_unknown:
            return 'unk%2X' % opcode,

        if opcode == 0x45:  # switch
            count = le32(data.read(4))
            loc = data.tell() + 4*count
            targets = []
            for i in range(count):
                offset = struct.unpack('<l', data.read(4))[0]
                targets.append(loc + offset)

            return 'switch', targets

        raise ValueError("Unhandled opcode: 0x%02x" % opcode)

    def dump_hive(self):
        print "Dependencies:"
        for tag, flags in self.dependencies:
            print "  %s\t%s" % (self.namer.module(tag), hexlify(flags))

    def dump_tables(self):
        print "Types:"

        for i, typetag in enumerate(self.types):
            print "  %02X  %s" % (i, self.namer.type(typetag))

        print "Methods:"

        for i, (typetag, methtag) in enumerate(self.methods):
            print "  %02X  %s" % (i, self.namer.method(typetag, methtag))

    def dump(self):
        print "Assembly: %s" % self.name
        self.dump_hive()
        self.dump_tables()

if __name__ == "__main__":
    import sys
    import glob

    namer = Namer()

    for ff in glob.glob('*.hive.xml'):
        hive = Hive(ff, namer=namer, skip_body=True)

    hive = Hive(sys.argv[1], namer=namer)
    hive.dump()
	#!/usr/bin/env python2

	# A crude parser for Artemis SAM hives
	# Currently effective on legacyconfigcardprogrammerapp.hive.xml
	#
	# Plonk it in a folder with as many .hive.xml as you can scrape from your
	# installation and supply the target hive as argument.

	# Types and methods are referred to in code by a single byte.
	# A 0-indexed table is referred to for each, containing first all the
	# typerefs and then all the typedefs (similarly for methrefs/methdefs).
	# So if there are 5 typerefs then type 07 is the third entry in the typedefs.

	import lxml.etree
	from base64 import b64decode
	import struct
	from binascii import hexlify
	from StringIO import StringIO

	def ntohs(data):
	return struct.unpack('>H', data)[0]
	def ntohl(data):
	return struct.unpack('>L', data)[0]

	def le16(data):
	return struct.unpack('<H', data)[0]

	def le24(data):
	return struct.unpack('<L', data + '\0')[0]

	def le32(data):
	return struct.unpack('<L', data)[0]

	class Namer(object):
	def __init__(self):
	self.module_names = {
	0x5424ba: 'mscorlib',
	}

	self.type_names = {}
	self.method_names = {}

	# XXX TODO: each type/method tag ties all the way back to a module. type 'em strong
	def module(self, modtag):
	tag = modtag & 0xffffff
	return self.module_names.get(tag, 'm%06x' % tag)

	def type(self, typetag):
	return self.type_names.get(typetag, 't%04x' % typetag)

	def method(self, typetag, methtag):
	tag = typetag<<16 \| methtag
	return self.method_names.get(tag, 'f%04x.%04x' % (typetag, methtag))

	def parse_debug(self, debug_data):
	data = StringIO(debug_data)

	def take_tlv(data, tagsize):
	tag = data.read(tagsize)
	length = ntohs(data.read(2))
	value = data.read(length)

	return tag, value

	nmodules = ntohs(data.read(2))
	for i in range(nmodules):
	tag, name = take_tlv(data, 4)

	self.module_names[ntohl(tag)] = name

	ntypes = ntohs(data.read(2))
	for i in range(ntypes):
	tag, name = take_tlv(data, 6)

	modtag = ntohl(tag[:4])
	modname = self.module_names[modtag]
	typetag = ntohs(tag[4:])
	self.type_names[typetag] = modname + "::" + name

	nmethods = ntohs(data.read(2))
	for i in range(nmethods):
	tag, name = take_tlv(data, 4)

	typetag = ntohs(tag[:2])
	typename = self.type_names[typetag]
	methtag = ntohs(tag[2:])
	tag = typetag<<16 \| methtag
	self.method_names[tag] = typename + "." + name

	if len(data.read()):
	print "Trailing data in debug stream:", hexlify(data)

	def dump(self):
	print "Modules:"
	for tag, name in sorted(self.module_names.iteritems()):
	print " %06x %s" % (tag, name)

	print "Types:"
	for tag, name in sorted(self.type_names.iteritems()):
	print " %04x %s" % (tag, name)

	print "Methods:"
	for tag, name in sorted(self.method_names.iteritems()):
	print " %04x.%04x %s" % (tag>>16, tag&0xffff, name)

	class Hive(object):
	def __init__(self, filename, namer=None, skip_body=False):
	fp = open(filename)
	self.xml = lxml.etree.parse(fp)

	self.assembly = self.xml.getroot().find('Assembly')

	self.name = self.assembly.get('Name')

	self.hive_data = bytearray(b64decode(self.assembly.find('Hive').text))
	self.debug_data = bytearray(b64decode(self.assembly.find('Win32DebugMetadata').text))

	if namer is None:
	namer = Namer()
	self.namer = namer

	self.namer.parse_debug(self.debug_data)

	if not skip_body:
	self.parse_hive()

	def parse_hive(self):
	data = self.hive_data[8:] # skip length and HIVE

	unk_dat = le16(data[0x17:0x19])
	num_mod_deps = le16(data[0x19:0x1b])
	num_mod_refs = le16(data[0x1b:0x1d])
	body = StringIO(data[0x2d:])

	unk_len0 = le16(body.read(2))
	unk_len = unk_len0

	if unk_dat != 0xffff: # not sure if this is the correct trigger
	unk_len1 = le16(body.read(2))
	unk_len2 = le16(body.read(2))
	unk_len += 2*unk_len1 + unk_len2

	self.dependencies = []
	for i in range(num_mod_deps):
	tag = le32(body.read(4))
	flags = body.read(8)
	self.dependencies.append((tag, flags))

	m_typerefs = []
	m_typedefs = []
	active = m_typerefs
	first_module = None
	for i in range(num_mod_refs):
	module = le24(body.read(3))
	count = ord(body.read(1))

	if first_module is None:
	first_module = module
	elif module == first_module: # can't find a count/pointer for this changeover
	active = m_typedefs

	active.append((module, count))

	self.types = []
	self.methods = []

	typeref_meths = []
	for module, count in m_typerefs:
	for i in range(count):
	typetag = le16(body.read(2))
	nmethods = ord(body.read(1))
	nother = ord(body.read(1))
	typeref_meths.append((typetag, nmethods, nother))

	self.types.append(typetag)

	methrefs = []
	for typetag, nmethods, nother in typeref_meths:
	for i in range(nmethods):
	methtag = le16(body.read(2))
	methrefs.append((typetag, methtag))
	self.methods.append((typetag, methtag))

	for typetag, nmethods, nother in typeref_meths:
	for i in range(nother):
	methtag = le16(body.read(2))
	methrefs.append((typetag, methtag))
	self.methods.append((typetag, methtag))

	# XXX string tables, or what?
	unknown = body.read(unk_len)

	# type defs
	typedefs = []
	for module, count in m_typedefs:
	for i in range(count):
	typetag = le16(body.read(2))
	basetype = ord(body.read(1))
	subtype = ord(body.read(1))
	nfields = ord(body.read(1))
	nmethods = ord(body.read(1))
	flags = le16(body.read(2))

	fields = []
	for j in range(nfields):
	fields.append(body.read(6))

	self.types.append(typetag)
	typedefs.append((typetag, nmethods))

	# method defs
	methdefs = []
	for typetag, nmethods in typedefs:
	for i in range(nmethods):
	methtag = le16(body.read(2))
	argflags = ord(body.read(1))
	nargs = argflags & 7
	flags = ord(body.read(1))
	ret_type = ord(body.read(1))

	defs = []
	prebytes = 0
	if argflags & 0x80:
	nargs, prebytes, unk2, ndefs, unk3 = map(ord, body.read(5))
	for j in range(ndefs):
	defs.append(body.read(7))
	else:
	prebytes = argflags >> 4

	if flags & 0x80:
	methtype = 'pointer' # just points to a module with same named/tagged method (4 byte full tag)
	else:
	methtype = 'actual'

	arg_types = body.read(nargs)

	methdefs.append([typetag, methtag, methtype, prebytes])
	self.methods.append((typetag, methtag))

	# method bodies
	for methdef in methdefs:
	typetag, methtag, methtype, prebytes = methdef

	if methtype == 'pointer':
	methdef.append(le32(body.read(4)))
	continue

	pre = body.read(prebytes)

	length = le16(body.read(2))
	methdef.append(body.read(length))

	print "%d prebytes: %s" % (prebytes, hexlify(pre))

	print "%s\n\t%s" % (self.namer.method(typetag, methtag), hexlify(methdef[-1]))

	for insn in self.disassemble(methdef[-1]):
	strs = []
	for elem in insn:
	if isinstance(elem, int):
	strs.append('0x%x' % elem)
	else:
	strs.append(str(elem))

	print "\t".join(strs)

	def disassemble(self, methbody):
	data = StringIO(methbody)
	insns = []
	while data.tell() < len(methbody):
	i_addr = data.tell()
	insns.append(self.disas_one(data))

	i_end = data.tell()
	i_bytes = data.getvalue()[i_addr:i_end]

	insns[-1] = ['0x%02x' % i_addr, hexlify(i_bytes)] + list(insns[-1])

	return insns


	def disas_one(self, data):
	no_arg = {
	0x77: 'inc', # Artemis custom: add 1 to top of stack

	0x00: 'nop',
	0x01: 'break',
	0x02: 'ldarg.0',
	0x03: 'ldarg.1',
	0x04: 'ldarg.2',
	0x05: 'ldarg.3',
	0x06: 'ldloc.0',
	0x07: 'ldloc.1',
	0x08: 'ldloc.2',
	0x09: 'ldloc.3',
	0x0a: 'stloc.0',
	0x0b: 'stloc.1',
	0x0c: 'stloc.2',
	0x0d: 'stloc.3',

	0x14: 'ldnull',
	0x15: 'ldc.i4.m1',
	0x16: 'ldc.i4.0',
	0x17: 'ldc.i4.1',
	0x18: 'ldc.i4.2',
	0x19: 'ldc.i4.3',
	0x1a: 'ldc.i4.4',
	0x1b: 'ldc.i4.5',
	0x1c: 'ldc.i4.6',
	0x1d: 'ldc.i4.7',
	0x1e: 'ldc.i4.8',
	0x25: 'dup',
	0x26: 'pop',
	0x2a: 'ret',
	0x46: 'ldind.i1',
	0x47: 'ldind.u1',
	0x48: 'ldind.i2',
	0x49: 'ldind.u2',
	0x4a: 'ldind.i4',
	0x4b: 'ldind.u4',
	0x4c: 'ldind.i8',
	0x4d: 'ldind.i',
	0x4e: 'ldind.r4',
	0x4f: 'ldind.r8',
	0x50: 'ldind.ref',
	0x51: 'stind.ref',
	0x52: 'stind.i1',
	0x53: 'stind.i2',
	0x54: 'stind.i4',
	0x55: 'stind.i8',
	0x56: 'stind.r4',
	0x57: 'stind.r8',
	0x58: 'add',
	0x59: 'sub',
	0x5a: 'mul',
	0x5b: 'div',
	0x5c: 'div.un',
	0x5d: 'rem',
	0x5e: 'rem.un',
	0x5f: 'and',
	0x60: 'or',
	0x61: 'xor',
	0x62: 'shl',
	0x63: 'shr',
	0x64: 'shr.un',
	0x65: 'neg',
	0x66: 'not',
	0x67: 'conv.i1',
	0x68: 'conv.i2',
	0x69: 'conv.i4',
	0x6a: 'conv.i8',
	0x6b: 'conv.r4',
	0x6c: 'conv.r8',
	0x6d: 'conv.u4',
	0x6e: 'conv.u8',
	0x76: 'conv.r.un',
	0x7a: 'throw',
	0x82: 'conv.ovf.i1.un',
	0x83: 'conv.ovf.i2.un',
	0x84: 'conv.ovf.i4.un',
	0x85: 'conv.ovf.i8.un',
	0x86: 'conv.ovf.u1.un',
	0x87: 'conv.ovf.u2.un',
	0x88: 'conv.ovf.u4.un',
	0x89: 'conv.ovf.u8.un',
	0x8a: 'conv.ovf.i.un',
	0x8b: 'conv.ovf.u.un',
	0x8e: 'ldlen',
	0x90: 'ldelem.i1',
	0x91: 'ldelem.u1',
	0x92: 'ldelem.i2',
	0x93: 'ldelem.u2',
	0x94: 'ldelem.i4',
	0x95: 'ldelem.u4',
	0x96: 'ldelem.i8',
	0x97: 'ldelem.i',
	0x98: 'ldelem.r4',
	0x99: 'ldelem.r8',
	0x9a: 'ldelem.ref',
	0x9b: 'stelem.i',
	0x9c: 'stelem.i1',
	0x9d: 'stelem.i2',
	0x9e: 'stelem.i4',
	0x9f: 'stelem.i8',
	0xa0: 'stelem.r4',
	0xa1: 'stelem.r8',
	0xa2: 'stelem.ref',
	0xb3: 'conv.ovf.i1',
	0xb4: 'conv.ovf.u1',
	0xb5: 'conv.ovf.i2',
	0xb6: 'conv.ovf.u2',
	0xb7: 'conv.ovf.i4',
	0xb8: 'conv.ovf.u4',
	0xb9: 'conv.ovf.i8',
	0xba: 'conv.ovf.u8',
	0xc3: 'ckfinite',
	0xd1: 'conv.u2',
	0xd2: 'conv.u1',
	0xd3: 'conv.i',
	0xd4: 'conv.ovf.i',
	0xd5: 'conv.ovf.u',
	0xd6: 'add.ovf',
	0xd7: 'add.ovf.un',
	0xd8: 'mul.ovf',
	0xd9: 'mul.ovf.un',
	0xda: 'sub.ovf',
	0xdb: 'sub.ovf.un',
	0xdc: 'endfinally',
	0xdf: 'stind.i',
	0xe0: 'conv.u',
	0xfe01: 'ceq',
	0xfe02: 'cgt',
	0xfe03: 'cgt.un',
	0xfe04: 'clt',
	0xfe05: 'clt.un',
	0xfe1a: 'rethrow',
	}


	uint8_arg = {
	0x0e: 'ldarg.s',
	0x0f: 'ldarga.s',
	0x10: 'starg.s',
	0x11: 'ldloc.s',
	0x12: 'ldloca.s',
	0x13: 'stloc.s',
	0xde: 'leave.s',
	}

	type_arg = {
	0x8d: 'newarr',
	0xfe15: 'initobj',
	0xfe16: 'constrained.',
	0x74: 'castclass',
	0x70: 'cpobj',
	0x71: 'ldobj',
	0x81: 'stobj',
	0x8c: 'box',
	0x79: 'unbox',
	}

	field_arg = {
	0x7b: 'ldfld',
	0x7c: 'ldflda',
	0x7d: 'stfld',
	0x7e: 'ldsfld',
	0x7f: 'ldsflda',
	0x80: 'stsfld',
	}

	branch_short = {
	0x2b: 'br.s',
	0x2c: 'brfalse.s',
	0x2d: 'brtrue.s',
	0x2e: 'beq.s',
	0x2f: 'bge.s',
	0x30: 'bgt.s',
	0x31: 'ble.s',
	0x32: 'blt.s',
	0x33: 'bne.un.s',
	0x34: 'bge.un.s',
	0x35: 'bgt.un.s',
	0x36: 'ble.un.s',
	0x37: 'blt.un.s',
	}

	branch_long = {
	0x38: 'br',
	0x39: 'brfalse',
	0x3a: 'brtrue',
	0x3b: 'beq',
	0x3c: 'bge',
	0x3d: 'bgt',
	0x3e: 'ble',
	0x3f: 'blt',
	0x40: 'bne.un',
	0x41: 'bge.un',
	0x42: 'bgt.un',
	0x43: 'ble.un',
	0x44: 'blt.un',
	}


	unhandled = {
	0x21: 'ldc.i8',
	0x23: 'ldc.r8',
	0x27: 'jmp',
	0x28: 'call',
	0x29: 'calli',
	0x45: 'switch',
	0x6f: 'callvirt',
	0x72: 'ldstr',
	0x75: 'isinst',
	0x8f: 'ldelema',
	0xc2: 'refanyval',
	0xc6: 'mkrefany',
	0xd0: 'ldtoken',
	0xfe00: 'arglist',
	0xfe07: 'ldvirtftn',
	0xfe09: 'ldarg',
	0xfe0a: 'ldarga',
	0xfe0b: 'starg',
	0xfe0c: 'ldloc',
	0xfe0d: 'ldloca',
	0xfe0e: 'stloc',
	0xfe0f: 'localloc',
	0xfe11: 'endfilter',
	0xfe12: 'unaligned.',
	0xfe13: 'volatile.',
	0xfe14: 'tail.',
	0xfe17: 'cpblk',
	0xfe18: 'initblk',
	0xfe1c: 'sizeof',
	0xfe1d: 'refanytype',
	}

	call = {
	0x28: 'call',
	0x6f: 'callvirt',
	0x73: 'newobj',
	0xfe06: 'ldftn', # really? I'm not so sure
	}

	artemis_unknown = set([
	0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xbb, 0xbd, 0xb0, 0xbe, 0xb1, 0xbc, 0xaf, 0xfd, 0xff
	])

	opcode = ord(data.read(1))
	if opcode == 0xfe:
	opcode <<= 8
	opcode \|= ord(data.read(1))

	if opcode in no_arg:
	return no_arg[opcode],

	if opcode in uint8_arg:
	arg = ord(data.read(1))
	return uint8_arg[opcode], arg

	if opcode == 0xdd:
	arg = le32(data.read(4))
	return 'leave', arg

	if opcode in type_arg:
	arg = ord(data.read(1))
	return type_arg[opcode], self.namer.type(self.types[arg])

	if opcode in field_arg:
	arg = ord(data.read(1))
	return field_arg[opcode], arg

	if opcode in branch_short:
	arg = struct.unpack('b', data.read(1))[0]
	loc = data.tell()
	return branch_short[opcode], loc + arg

	if opcode in branch_long:
	arg = struct.unpack('<l', data.read(4))[0]
	loc = data.tell()
	return branch_long[opcode], loc + arg

	if opcode in call:
	method_id = ord(data.read(1))
	return call[opcode], self.namer.method(*self.methods[method_id])

	if opcode == 0x1f:
	arg = struct.unpack('b', data.read(1))[0]
	return 'ldc.i4.s', arg

	if opcode == 0x20:
	arg = struct.unpack('<l', data.read(4))[0]
	return 'ldc.i4', arg

	# this is definitely not ldc.r4
	if opcode == 0x22:
	# arg = struct.unpack('<f', data.read(4))[0]
	return 'unk22',

	if opcode == 0x23:
	return 'unk23' # ??

	if opcode == 0xd0:
	arg = ord(data.read(1)) # ??
	return 'ldtoken', arg

	if opcode == 0x24: # Artemis custom - multiple ldarg
	arg = ord(data.read(1))
	ldargs = []
	for i in range(8):
	if arg & (1<<i):
	ldargs.append(i)

	return 'ldarg', ','.join(map(str, ldargs))

	if opcode == 0xf0: # Artemis unknown
	arg = data.read(3)
	return 'unkF0', hexlify(arg)

	if opcode in artemis_unknown:
	return 'unk%2X' % opcode,

	if opcode == 0x45: # switch
	count = le32(data.read(4))
	loc = data.tell() + 4*count
	targets = []
	for i in range(count):
	offset = struct.unpack('<l', data.read(4))[0]
	targets.append(loc + offset)

	return 'switch', targets

	raise ValueError("Unhandled opcode: 0x%02x" % opcode)

	def dump_hive(self):
	print "Dependencies:"
	for tag, flags in self.dependencies:
	print " %s\t%s" % (self.namer.module(tag), hexlify(flags))

	def dump_tables(self):
	print "Types:"

	for i, typetag in enumerate(self.types):
	print " %02X %s" % (i, self.namer.type(typetag))

	print "Methods:"

	for i, (typetag, methtag) in enumerate(self.methods):
	print " %02X %s" % (i, self.namer.method(typetag, methtag))

	def dump(self):
	print "Assembly: %s" % self.name
	self.dump_hive()
	self.dump_tables()

	if __name__ == "__main__":
	import sys
	import glob

	namer = Namer()

	for ff in glob.glob('*.hive.xml'):
	hive = Hive(ff, namer=namer, skip_body=True)

	hive = Hive(sys.argv[1], namer=namer)
	hive.dump()