dev-zzo/coffdump.py

## coffdump.py
#
# BUGS:
#
# * Data is not handled at all.
#
# * Imports which are either IMPORT_NAME_NOPREFIX or IMPORT_NAME_UNDECORATE
#   should be properly restored -- make a huge map?..
#

import idaapi
import idautils
import struct
import os

class CoffSection:
    def __init__(self, name, chars=0x60500020):
        self.name = name

        self.data = None
        self.relocs = []

        self.data_size = 0
        self.data_offset = 0
        self.relocs_offset = 0

        self.characteristics = chars

    def pack(self):
        return struct.pack('<8sIIIIIIHHI',
            self.name, 0, 0,
            self.data_size, self.data_offset,
            self.relocs_offset, 0,
            len(self.relocs), 0,
            self.characteristics)

class CoffStringTableBuilder:
    """Helper class to build a COFF string table."""
    def __init__(self):
        self.offset = 4
        self.data = ''
        self.items = {}

    def put(self, str):
        """Store a string in the table."""
        try:
            return self.items[str]
        except KeyError:
            offset = self.offset
            self.items[str] = offset
            self.data += str + "\0"
            self.offset += len(str) + 1
            return offset

    def pack(self):
        """Get the packed table."""
        return struct.pack('<I', self.offset) + self.data

class CoffReloc:
    # The relocation is ignored.
    I386_ABSOLUTE = 0x0000
    # The target's 32-bit VA.
    I386_DIR32 = 0x0006
    # The target's 32-bit RVA.
    I386_DIR32NB = 0x0007
    # The 16-bit section index of the section that contains the target.
    # This is used to support debugging information.
    I386_SECTION = 0x000A
    # The 32-bit offset of the target from the beginning of its section.
    # This is used to support debugging information and static thread local storage.
    I386_SECREL = 0x000B
    # The CLR token.
    I386_TOKEN = 0x000C
    # A 7-bit offset from the base of the section that contains the target.
    I386_SECREL7 = 0x000D
    # The 32-bit relative displacement of the target.
    # This supports the x86 relative branch and call instructions.
    I386_REL32 = 0x0014

    def __init__(self, type, rva, symbol_index):
        self.rva = rva
        self.symbol_index = symbol_index
        self.type = type
    def __str__(self):
        return 'Relocation type %d <%08x -> #%d>' % (self.type, self.rva, self.symbol_index)
    def __repr__(self):
        return str(self)
    def pack(self):
        return struct.pack('<IIH', self.rva, self.symbol_index, self.type)

class CoffSymbol:
    # The Value field indicates the size if the section number is IMAGE_SYM_UNDEFINED (0).
    # If the section number is not zero, then the Value field specifies the offset within the section.
    CLASS_EXTERNAL = 2
    # The offset of the symbol within the section.
    # If the Value field is zero, then the symbol represents a section name.
    CLASS_STATIC = 3

    def __init__(self, name, type, stg_class, value=0, section_num=0):
        self.name = name
        self.name_offset = 0
        self.type = type
        self.stg_class = stg_class
        self.value = value
        self.section_num = section_num
        self.aux_count = 0

    def pack(self):
        s = struct.pack('<IHHBB', self.value, self.section_num, self.type, self.stg_class, self.aux_count)
        if self.name_offset == 0:
            return struct.pack('<8s', self.name) + s
        return struct.pack('<II', 0, self.name_offset) + s

class CoffSymbolTable:
    def __init__(self):
        self.symbols = []
    def get_index(self, name):
        index = 0
        count = len(self.symbols)
        while index < count:
            s = self.symbols[index]
            if s.name == name:
                return index
            index += 1 + s.aux_count
        raise IndexError('No such symbol has been defined')
    def add_symbol(self, sym):
        self.symbols.append(sym)
        return len(self.symbols) - 1

class Coff:
    """The container for COFF data."""
    def __init__(self):
        self.sections = []
        self.symtab = CoffSymbolTable()
        self.strtab = CoffStringTableBuilder()

    def add_section(self, sct):
        """add_section(section) -> index

        Add a section to the section table and obtain a corresponding index.
        """
        self.sections.append(sct)
        return len(self.sections) - 1

    def write(self, fp):
        # File layout:
        # File header
        # Section headers
        # Raw data for section #1
        # Relocations for section #1
        # ...
        # Raw data for section #N
        # Relocations for section #N
        # Symbol table
        # String table

        # Calculate the offsets
        offset = 0x14 + len(self.sections) * 0x28
        for s in self.sections:
            if s.data is not None:
                s.data_offset = offset
                offset += len(s.data)
            if len(s.relocs) > 0:
                s.relocs_offset = offset
                offset += len(s.relocs) * 0x0A
        symtab_offset = offset

        fp.write(struct.pack('<HHIIIHH',
            0x014C, len(self.sections), 0,
            symtab_offset, len(self.symtab.symbols),
            0, 0))

        for s in self.sections:
            fp.write(s.pack())

        for s in self.sections:
            if s.data is not None:
                fp.write(s.data)
            for r in s.relocs:
                fp.write(r.pack())

        index = 0
        symbols_count = len(self.symtab.symbols)
        while index < symbols_count:
            s = self.symtab.symbols[index]
            if len(s.name) > 8:
                s.name_offset = self.strtab.put(s.name)
            index += 1 + s.aux_count

        for s in self.symtab.symbols:
            fp.write(s.pack())

        fp.write(self.strtab.pack())
#

def dump_function(func, coff):
    """Dumps a function into a COFF container."""
    section = CoffSection('.text')
    section_index = coff.add_section(section)

    # FIXME: Add a section symbol?

    idaapi.msg('Dumping function at %08x\n' % func.startEA)

    bytes = bytearray(idaapi.get_many_bytes(func.startEA, func.endEA - func.startEA))
    section.data = bytes
    section.data_size = len(bytes)

    func_name = idaapi.get_func_name(func.startEA)
    idaapi.msg('Function name: %s\n' % func_name)
    coff.symtab.add_symbol(CoffSymbol(func_name, 0x20, CoffSymbol.CLASS_EXTERNAL, 0, section_index + 1))

    for insn_ea in idautils.FuncItems(func.startEA):
        insn_offset = insn_ea - func.startEA
        idaapi.msg('\nInsn at: %08x +%08x ' % (insn_ea, insn_offset))

        xref_to = None
        for xref in XrefsFrom(insn_ea, 0):
            # Ignore ordinary control flow
            if xref.type == 21:
                continue
            # Ignore writes to/from structs on stack?
            if xref_to >= 0xFF000000:
                continue
            # Ignore xrefs within the function.
            if (xref.type >= 16) and (func.startEA <= xref_to < func.endEA):
                continue
            xref_to = xref.to
            break
        else:
            # Go for the next insn
            continue

        xref_name = idaapi.get_true_name(idaapi.BADADDR, xref_to)
        idaapi.msg('(XREF:%s %08x %s) ' % (idautils.XrefTypeName(xref.type), xref_to, xref_name))

        insn = idautils.DecodeInstruction(insn_ea)

        # Walk the operands, see which one causes the reference
        for op in insn.Operands:
            if op.type in (idaapi.o_mem, idaapi.o_near, idaapi.o_far):
                value = op.addr
                break
            elif op.type == idaapi.o_imm:
                value = op.value
                break
        else:
            idaapi.warn('HUH? XREF exists, but no matching operand?')
            # Go for the next insn
            continue

        # Create/fetch a symbol
        try:
            sym_index = coff.symtab.get_index(xref_name)
        except IndexError:
            if (idaapi.getFlags(xref_to) & idaapi.MS_CLS) == idaapi.FF_CODE:
                sym_type = 0x20
            else:
                sym_type = 0x00
            sym_index = coff.symtab.add_symbol(CoffSymbol(xref_name, sym_type, CoffSymbol.CLASS_EXTERNAL))

        # Decide between I386_DIR32 and I386_REL32
        # Currently, only E8 and E9 insns need to be REL32
        # I can see a point in handling 0F8x cond jumps in this way as well
        reloc_type = CoffReloc.I386_DIR32
        if insn.itype == idaapi.NN_call:
            reloc_type = CoffReloc.I386_REL32
        elif insn.itype == idaapi.NN_jmp and insn.size == 5:
            reloc_type = CoffReloc.I386_REL32
        elif insn.size == 6 and (idaapi.NN_ja <= insn.itype <= idaapi.NN_jc or idaapi.NN_je <= insn.itype <= idaapi.NN_jz):
            idaapi.warn('Meh! Spotted 0F8x jump.')
            reloc_type = CoffReloc.I386_REL32

        # Store the relocation record
        reloc_rva = insn_offset + op.offb
        reloc = CoffReloc(reloc_type, reloc_rva, sym_index)
        section.relocs.append(reloc)

        # Correct the offset in bytes (ugly...)
        bytes[reloc_rva:reloc_rva + 4] = struct.pack('<I', value - xref_to)

    idaapi.msg('\nEnd of function\n')
#

def dumpfunc():
    func = idaapi.get_func(idaapi.get_screen_ea())
    if func is None:
        idaapi.msg('Must be at a function!\n')
    coff = Coff()
    dump_function(func, coff)
    name = '%08x.obj' % (func.startEA)
    with open(name, 'wb') as fp:
        coff.write(fp)

## implib.py
"""
Process the import library and fill in entries for import symbol restoration.
"""

import struct
import sys

try:
    fp = open(sys.argv[1], 'rb')
except IndexError:
    print "Usage: %s <import.lib>" % sys.argv[0]
    exit(1)

signature = fp.read(8)
if signature != "!<arch>\x0a":
    print "Invalid signature!"
    exit(1)

while True:
    # NOTE: the spec says archive headers should be aligned on even offsets.
    fp.seek(fp.tell() & 1, 1)

    header = fp.read(60)
    header_len = len(header)
    if header_len == 0:
        break
    if header_len < 60:
        print "Trailing bytes encountered!"
        exit(1)

    entry_name = header[:16].rstrip()
    entry_size = int(header[48:58])
    # I don't care about the rest of the header data.

    data = fp.read(entry_size)
    if len(data) < entry_size:
        print "EOF encountered while reading member data!"
        exit(1)

    # Skip linker entries; I want import entries only.
    if entry_name == '/' or entry_name == '//':
        continue

    # Process import headers
    if data[:6] != "\x00\x00\xFF\xFF\x00\x00":
        continue

    machine, datetime, data_size, ordinal, flags = struct.unpack('<HIIHH', data[6:20])
    import_type = flags & 0x0003
    import_name_type = (flags >> 2) & 0x0007
    import_name, dll, dummy = data[20:].split("\x00")
    symbol_name = import_name
    # FIXME: import by ordinal...
    if import_name_type == 2:
        import_name = import_name.lstrip('_@?')
    elif import_name_type == 3:
        import_name = import_name.lstrip('_@?')
        import_name, dummy, dummy = import_name.partition('@')
    else:
        continue
    print "import_restore['%s:%s'] = '%s'" % (dll.lower(), import_name, symbol_name)
#
	#
	# BUGS:
	#
	# * Data is not handled at all.
	#
	# * Imports which are either IMPORT_NAME_NOPREFIX or IMPORT_NAME_UNDECORATE
	# should be properly restored -- make a huge map?..
	#

	import idaapi
	import idautils
	import struct
	import os

	class CoffSection:
	def __init__(self, name, chars=0x60500020):
	self.name = name

	self.data = None
	self.relocs = []

	self.data_size = 0
	self.data_offset = 0
	self.relocs_offset = 0

	self.characteristics = chars

	def pack(self):
	return struct.pack('<8sIIIIIIHHI',
	self.name, 0, 0,
	self.data_size, self.data_offset,
	self.relocs_offset, 0,
	len(self.relocs), 0,
	self.characteristics)

	class CoffStringTableBuilder:
	"""Helper class to build a COFF string table."""
	def __init__(self):
	self.offset = 4
	self.data = ''
	self.items = {}

	def put(self, str):
	"""Store a string in the table."""
	try:
	return self.items[str]
	except KeyError:
	offset = self.offset
	self.items[str] = offset
	self.data += str + "\0"
	self.offset += len(str) + 1
	return offset

	def pack(self):
	"""Get the packed table."""
	return struct.pack('<I', self.offset) + self.data

	class CoffReloc:
	# The relocation is ignored.
	I386_ABSOLUTE = 0x0000
	# The target's 32-bit VA.
	I386_DIR32 = 0x0006
	# The target's 32-bit RVA.
	I386_DIR32NB = 0x0007
	# The 16-bit section index of the section that contains the target.
	# This is used to support debugging information.
	I386_SECTION = 0x000A
	# The 32-bit offset of the target from the beginning of its section.
	# This is used to support debugging information and static thread local storage.
	I386_SECREL = 0x000B
	# The CLR token.
	I386_TOKEN = 0x000C
	# A 7-bit offset from the base of the section that contains the target.
	I386_SECREL7 = 0x000D
	# The 32-bit relative displacement of the target.
	# This supports the x86 relative branch and call instructions.
	I386_REL32 = 0x0014

	def __init__(self, type, rva, symbol_index):
	self.rva = rva
	self.symbol_index = symbol_index
	self.type = type
	def __str__(self):
	return 'Relocation type %d <%08x -> #%d>' % (self.type, self.rva, self.symbol_index)
	def __repr__(self):
	return str(self)
	def pack(self):
	return struct.pack('<IIH', self.rva, self.symbol_index, self.type)

	class CoffSymbol:
	# The Value field indicates the size if the section number is IMAGE_SYM_UNDEFINED (0).
	# If the section number is not zero, then the Value field specifies the offset within the section.
	CLASS_EXTERNAL = 2
	# The offset of the symbol within the section.
	# If the Value field is zero, then the symbol represents a section name.
	CLASS_STATIC = 3

	def __init__(self, name, type, stg_class, value=0, section_num=0):
	self.name = name
	self.name_offset = 0
	self.type = type
	self.stg_class = stg_class
	self.value = value
	self.section_num = section_num
	self.aux_count = 0

	def pack(self):
	s = struct.pack('<IHHBB', self.value, self.section_num, self.type, self.stg_class, self.aux_count)
	if self.name_offset == 0:
	return struct.pack('<8s', self.name) + s
	return struct.pack('<II', 0, self.name_offset) + s

	class CoffSymbolTable:
	def __init__(self):
	self.symbols = []
	def get_index(self, name):
	index = 0
	count = len(self.symbols)
	while index < count:
	s = self.symbols[index]
	if s.name == name:
	return index
	index += 1 + s.aux_count
	raise IndexError('No such symbol has been defined')
	def add_symbol(self, sym):
	self.symbols.append(sym)
	return len(self.symbols) - 1

	class Coff:
	"""The container for COFF data."""
	def __init__(self):
	self.sections = []
	self.symtab = CoffSymbolTable()
	self.strtab = CoffStringTableBuilder()

	def add_section(self, sct):
	"""add_section(section) -> index

	Add a section to the section table and obtain a corresponding index.
	"""
	self.sections.append(sct)
	return len(self.sections) - 1

	def write(self, fp):
	# File layout:
	# File header
	# Section headers
	# Raw data for section #1
	# Relocations for section #1
	# ...
	# Raw data for section #N
	# Relocations for section #N
	# Symbol table
	# String table

	# Calculate the offsets
	offset = 0x14 + len(self.sections) * 0x28
	for s in self.sections:
	if s.data is not None:
	s.data_offset = offset
	offset += len(s.data)
	if len(s.relocs) > 0:
	s.relocs_offset = offset
	offset += len(s.relocs) * 0x0A
	symtab_offset = offset

	fp.write(struct.pack('<HHIIIHH',
	0x014C, len(self.sections), 0,
	symtab_offset, len(self.symtab.symbols),
	0, 0))

	for s in self.sections:
	fp.write(s.pack())

	for s in self.sections:
	if s.data is not None:
	fp.write(s.data)
	for r in s.relocs:
	fp.write(r.pack())

	index = 0
	symbols_count = len(self.symtab.symbols)
	while index < symbols_count:
	s = self.symtab.symbols[index]
	if len(s.name) > 8:
	s.name_offset = self.strtab.put(s.name)
	index += 1 + s.aux_count

	for s in self.symtab.symbols:
	fp.write(s.pack())

	fp.write(self.strtab.pack())
	#

	def dump_function(func, coff):
	"""Dumps a function into a COFF container."""
	section = CoffSection('.text')
	section_index = coff.add_section(section)

	# FIXME: Add a section symbol?

	idaapi.msg('Dumping function at %08x\n' % func.startEA)

	bytes = bytearray(idaapi.get_many_bytes(func.startEA, func.endEA - func.startEA))
	section.data = bytes
	section.data_size = len(bytes)

	func_name = idaapi.get_func_name(func.startEA)
	idaapi.msg('Function name: %s\n' % func_name)
	coff.symtab.add_symbol(CoffSymbol(func_name, 0x20, CoffSymbol.CLASS_EXTERNAL, 0, section_index + 1))

	for insn_ea in idautils.FuncItems(func.startEA):
	insn_offset = insn_ea - func.startEA
	idaapi.msg('\nInsn at: %08x +%08x ' % (insn_ea, insn_offset))

	xref_to = None
	for xref in XrefsFrom(insn_ea, 0):
	# Ignore ordinary control flow
	if xref.type == 21:
	continue
	# Ignore writes to/from structs on stack?
	if xref_to >= 0xFF000000:
	continue
	# Ignore xrefs within the function.
	if (xref.type >= 16) and (func.startEA <= xref_to < func.endEA):
	continue
	xref_to = xref.to
	break
	else:
	# Go for the next insn
	continue

	xref_name = idaapi.get_true_name(idaapi.BADADDR, xref_to)
	idaapi.msg('(XREF:%s %08x %s) ' % (idautils.XrefTypeName(xref.type), xref_to, xref_name))

	insn = idautils.DecodeInstruction(insn_ea)

	# Walk the operands, see which one causes the reference
	for op in insn.Operands:
	if op.type in (idaapi.o_mem, idaapi.o_near, idaapi.o_far):
	value = op.addr
	break
	elif op.type == idaapi.o_imm:
	value = op.value
	break
	else:
	idaapi.warn('HUH? XREF exists, but no matching operand?')
	# Go for the next insn
	continue

	# Create/fetch a symbol
	try:
	sym_index = coff.symtab.get_index(xref_name)
	except IndexError:
	if (idaapi.getFlags(xref_to) & idaapi.MS_CLS) == idaapi.FF_CODE:
	sym_type = 0x20
	else:
	sym_type = 0x00
	sym_index = coff.symtab.add_symbol(CoffSymbol(xref_name, sym_type, CoffSymbol.CLASS_EXTERNAL))

	# Decide between I386_DIR32 and I386_REL32
	# Currently, only E8 and E9 insns need to be REL32
	# I can see a point in handling 0F8x cond jumps in this way as well
	reloc_type = CoffReloc.I386_DIR32
	if insn.itype == idaapi.NN_call:
	reloc_type = CoffReloc.I386_REL32
	elif insn.itype == idaapi.NN_jmp and insn.size == 5:
	reloc_type = CoffReloc.I386_REL32
	elif insn.size == 6 and (idaapi.NN_ja <= insn.itype <= idaapi.NN_jc or idaapi.NN_je <= insn.itype <= idaapi.NN_jz):
	idaapi.warn('Meh! Spotted 0F8x jump.')
	reloc_type = CoffReloc.I386_REL32

	# Store the relocation record
	reloc_rva = insn_offset + op.offb
	reloc = CoffReloc(reloc_type, reloc_rva, sym_index)
	section.relocs.append(reloc)

	# Correct the offset in bytes (ugly...)
	bytes[reloc_rva:reloc_rva + 4] = struct.pack('<I', value - xref_to)

	idaapi.msg('\nEnd of function\n')
	#

	def dumpfunc():
	func = idaapi.get_func(idaapi.get_screen_ea())
	if func is None:
	idaapi.msg('Must be at a function!\n')
	coff = Coff()
	dump_function(func, coff)
	name = '%08x.obj' % (func.startEA)
	with open(name, 'wb') as fp:
	coff.write(fp)
	"""
	Process the import library and fill in entries for import symbol restoration.
	"""

	import struct
	import sys

	try:
	fp = open(sys.argv[1], 'rb')
	except IndexError:
	print "Usage: %s <import.lib>" % sys.argv[0]
	exit(1)

	signature = fp.read(8)
	if signature != "!<arch>\x0a":
	print "Invalid signature!"
	exit(1)

	while True:
	# NOTE: the spec says archive headers should be aligned on even offsets.
	fp.seek(fp.tell() & 1, 1)

	header = fp.read(60)
	header_len = len(header)
	if header_len == 0:
	break
	if header_len < 60:
	print "Trailing bytes encountered!"
	exit(1)

	entry_name = header[:16].rstrip()
	entry_size = int(header[48:58])
	# I don't care about the rest of the header data.

	data = fp.read(entry_size)
	if len(data) < entry_size:
	print "EOF encountered while reading member data!"
	exit(1)

	# Skip linker entries; I want import entries only.
	if entry_name == '/' or entry_name == '//':
	continue

	# Process import headers
	if data[:6] != "\x00\x00\xFF\xFF\x00\x00":
	continue

	machine, datetime, data_size, ordinal, flags = struct.unpack('<HIIHH', data[6:20])
	import_type = flags & 0x0003
	import_name_type = (flags >> 2) & 0x0007
	import_name, dll, dummy = data[20:].split("\x00")
	symbol_name = import_name
	# FIXME: import by ordinal...
	if import_name_type == 2:
	import_name = import_name.lstrip('_@?')
	elif import_name_type == 3:
	import_name = import_name.lstrip('_@?')
	import_name, dummy, dummy = import_name.partition('@')
	else:
	continue
	print "import_restore['%s:%s'] = '%s'" % (dll.lower(), import_name, symbol_name)
	#