Skip to content

Instantly share code, notes, and snippets.

@marpie
Created June 1, 2012 07:37
Show Gist options
  • Save marpie/2849980 to your computer and use it in GitHub Desktop.
Save marpie/2849980 to your computer and use it in GitHub Desktop.
PE file format parsing *incomplete*
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" peEvade
TEMPLATE Description
Author: marpie (marpie@a12d404.net)
Last Update: 20120531
Created: 20120530
"""
# Imports
import struct
import os.path
# Version Information
__version__ = "0.0.1"
__program__ = "peEvade v" + __version__
__author__ = "marpie"
__email__ = "marpie+peEvade@a12d404.net"
__license__ = "BSD License"
__copyright__ = "Copyright 2011, a12d404.net"
__status__ = "Prototype" # ("Prototype", "Development", "Testing", "Production")
#SCRIPT_PATH = os.path.dirname( os.path.realpath( __file__ ) )
POINTER_SIZE = 4
IMAGE_ORDINAL_FLAG_64 = 0x8000000000000000
IMAGE_ORDINAL_FLAG_32 = 0x80000000
IMAGE_DOS_SIGNATURE = 0x5A4D # MZ
IMAGE_NT_SIGNATURE = 0x00004550 # PE00
IMAGE_DIRECTORY_ENTRY_EXPORT = 0
IMAGE_DIRECTORY_ENTRY_IMPORT = 1
IMAGE_DIRECTORY_ENTRY_RESOURCE = 2
IMAGE_DIRECTORY_ENTRY_EXCEPTION = 3
IMAGE_DIRECTORY_ENTRY_SECURITY = 4
IMAGE_DIRECTORY_ENTRY_BASERELOC = 5
IMAGE_DIRECTORY_ENTRY_DEBUG = 6
IMAGE_DIRECTORY_ENTRY_COPYRIGHT = 7
IMAGE_DIRECTORY_ENTRY_GLOBALPTR = 8
IMAGE_DIRECTORY_ENTRY_TLS = 9
IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG = 10
IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT = 11
IMAGE_DIRECTORY_ENTRY_IAT = 12
#############################################################################
# Helper functions
def getString(data):
res = ""
pos = 0
while ord(data[pos]) <> 0x00:
res += data[pos]
pos += 1
return res
#############################################################################
# Windows DEFs
class IMAGE_DOS_HEADER(object):
def __init__(self, raw_data):
self.e_magic, \
self.e_cblp, \
self.e_cp, \
self.e_crlc, \
self.e_cparhdr, \
self.e_minalloc, \
self.e_maxalloc, \
self.e_ss, \
self.e_sp, \
self.e_csum, \
self.e_ip, \
self.e_cs, \
self.e_lfarlc, \
self.e_ovno = struct.unpack('<HHHHHHHHHHHHHH', raw_data[:28])
from_pos = struct.calcsize('<HHHHHHHHHHHHHH')+8
to_pos = from_pos+struct.calcsize('<HH')
self.e_oemid, self.e_oeminfo = struct.unpack('<HH', raw_data[from_pos:to_pos])
from_pos = to_pos + 20
to_pos = from_pos + struct.calcsize('<L')
self.e_lfanew = struct.unpack('<L', raw_data[from_pos:to_pos])[0]
self.__size = to_pos+4
def size(self):
return self.__size
def valid(self):
return self.e_magic == IMAGE_DOS_SIGNATURE
class IMAGE_FILE_HEADER(object):
HDR_DEF = r'<HHLLLHH'
def __init__(self, raw_data):
self.__size = struct.calcsize(self.HDR_DEF)
self.Machine, \
self.NumberOfSections, \
self.TimeDateStamp, \
self.PointerToSymbolTable, \
self.NumberOfSymbols, \
self.SizeOfOptionalHeader, \
self.Characteristics = struct.unpack(self.HDR_DEF, raw_data[:self.__size])
def size(self):
return self.__size
class IMAGE_DATA_DIRECTORY(object):
SIZE = 8
def __init__(self, raw_data):
self.VirtualAddress, self.Size = struct.unpack('<LL', raw_data[:self.SIZE])
def size(self):
return self.SIZE
class IMAGE_OPTIONAL_HEADER(object):
HDR_DEF = r'<HBBLLLLLLLLLHHHHHHLLLLHHLLLLLL'
def __init__(self, raw_data):
self.__size = struct.calcsize(self.HDR_DEF)
self.Magic, \
self.MajorLinkerVersion, \
self.MinorLinkerVersion, \
self.SizeOfCode, \
self.SizeOfInitializedData, \
self.SizeOfUninitializedData, \
self.AddressOfEntryPoint, \
self.BaseOfCode, \
self.BaseOfData, \
self.ImageBase, \
self.SectionAlignment, \
self.FileAlignment, \
self.MajorOperatingSystemVersion, \
self.MinorOperatingSystemVersion, \
self.MajorImageVersion, \
self.MinorImageVersion, \
self.MajorSubsystemVersion, \
self.MinorSubsystemVersion, \
self.Win32VersionValue, \
self.SizeOfImage, \
self.SizeOfHeaders, \
self.CheckSum, \
self.Subsystem, \
self.DllCharacteristics, \
self.SizeOfStackReserve, \
self.SizeOfStackCommit, \
self.SizeOfHeapReserve, \
self.SizeOfHeapCommit, \
self.LoaderFlags, \
self.NumberOfRvaAndSizes = struct.unpack(self.HDR_DEF, raw_data[:self.__size])
self.DataDirectory = []
for i in xrange(0, self.NumberOfRvaAndSizes):
self.DataDirectory.append(IMAGE_DATA_DIRECTORY(raw_data[self.__size:]))
self.__size += 8
def size(self):
return self.__size
def getDataDirectory(self, directory_idx):
if len(self.DataDirectory) < directory_idx:
return None
return self.DataDirectory[directory_idx]
class IMAGE_NT_HEADERS(object):
def __init__(self, raw_data):
self.Signature = struct.unpack('<L', raw_data[:4])[0]
self.FileHeader = IMAGE_FILE_HEADER(raw_data[4:])
self.OptionalHeader = IMAGE_OPTIONAL_HEADER(raw_data[self.FileHeader.size()+4:])
self.__size = 4+self.FileHeader.size()+self.OptionalHeader.size()
def size(self):
return self.__size
def valid(self):
return (self.Signature == IMAGE_NT_SIGNATURE) and \
(self.OptionalHeader.size() == self.FileHeader.SizeOfOptionalHeader)
class IMAGE_IMPORT_DESCRIPTOR(object):
HDR_DEF = r'<LLLLL'
def __init__(self, raw_data):
self.__size = struct.calcsize(self.HDR_DEF)
self.Characteristics, \
self.TimeDateStamp, \
self.ForwarderChain, \
self.Name, \
self.FirstThunk = struct.unpack(self.HDR_DEF, raw_data[:self.__size])
self.OriginalFirstThunk = self.Characteristics
def size(self):
return self.__size
class IMAGE_IMPORT_BY_NAME(object):
def __init__(self, raw_data):
self.Hint = struct.unpack('<H', raw_data[:2])
self.Name = getString(raw_data[2:])
class IMAGE_THUNK_DATA(object):
def __init__(self, raw_data):
self.__size = 4
self.ForwarderString = struct.unpack('<L', raw_data[:self.__size])[0]
self.Function = self.ForwarderString
self.Ordinal = self.ForwarderString
self.AddressOfData = self.ForwarderString
class IMAGE_SECTION_HEADER(object):
def __init__(self, raw_data, rawIndex):
self.Name = getString(raw_data[:8])
self.rawIndex = rawIndex
self.__size = struct.calcsize('<LLLLLLHHL')+8
self.PhysicalAddress, \
self.VirtualAddress, \
self.SizeOfRawData, \
self.PointerToRawData, \
self.PointerToRelocations, \
self.PointerToLinenumbers, \
self.NumberOfRelocations, \
self.NumberOfLinenumbers, \
self.Characteristics = struct.unpack('<LLLLLLHHL', raw_data[8:self.__size])
self.VirtualSize = self.PhysicalAddress
def size(self):
return self.__size
#############################################################################
# PE file format parsing classes
class Lib(object):
""" Lib represents one PE-Import-Dictionary-Entry. """
def __init__(self, pe, importDescriptor):
self.__pe = pe
self.__importDescriptor = importDescriptor
self.name = getString(self.__pe.imageByRva(importDescriptor.Name))
self.functions = []
for function in self.__parseFunctions():
self.functions.append(function)
def __repr__(self):
return self.__str__()
def __str__(self):
fmtd = ""
for func in self.functions:
if type(func) == str:
fmtd += "\t" + func + "\n"
else:
fmtd += "\tOrdinal: " + str(func) + "\n"
return "Library: " + self.name + "\n" + fmtd
def valid(self):
return (self.__importDescriptor.Name != 0) and (self.name != "")
def __parseFunctions(self):
ptrFirstThunk = self.__importDescriptor.FirstThunk
ptrThunkRef = self.__importDescriptor.OriginalFirstThunk
if ptrThunkRef == 0:
ptrThunkRef = ptrFirstThunk
while True:
thunkRef = IMAGE_THUNK_DATA(self.__pe.imageByRva(ptrThunkRef))
if thunkRef.AddressOfData == 0:
break
if thunkRef.Ordinal & IMAGE_ORDINAL_FLAG_32:
importName = thunkRef.Ordinal & 0xffff
else:
importName = IMAGE_IMPORT_BY_NAME(self.__pe.imageByRva(thunkRef.AddressOfData)).Name
if not importName:
break
ptrFirstThunk += POINTER_SIZE
ptrThunkRef += POINTER_SIZE
yield importName
class PE(object):
"""
PE implements all functions needed to parse the PE file format.
The implementation is *incomplete* and only written to parse
the imports to generate the fasm template file.
"""
def __init__(self, raw_data):
self.pe_image = raw_data
try:
self.dos_hdr = IMAGE_DOS_HEADER(self.pe_image)
self.pe_hdr = IMAGE_NT_HEADERS(self.pe_image[self.dos_hdr.e_lfanew:])
except:
raise TypeError
if not (self.dos_hdr.valid() and self.pe_hdr.valid()):
raise TypeError
self.__sections = None
self.__imports = None
def __iterateDescriptors(self, directory):
adr = self.rva2ptr(directory.VirtualAddress)
while True:
importDescriptor = IMAGE_IMPORT_DESCRIPTOR(self.pe_image[adr:])
if importDescriptor.Name == 0:
break
lib = Lib(self, importDescriptor)
if not lib.valid():
break
yield lib
adr += importDescriptor.size()
def parseSections(self):
""" parseSections tries to parse all sections of the PE file. """
if self.__sections != None:
return self.__sections
self.__sections = []
ptr = self.dos_hdr.e_lfanew + self.pe_hdr.size()
for idx in xrange(0, self.pe_hdr.FileHeader.NumberOfSections):
section = IMAGE_SECTION_HEADER(self.pe_image[ptr:], ptr)
if section.size() > 0:
self.__sections.append(section)
ptr += section.size()
return self.__sections
def dumpSection(self, sectionName, fileName):
section = None
for iSection in self.__sections:
if iSection.Name == sectionName:
section = iSection
if not section:
return None
with open(fileName, 'w') as f:
f.write(self.imageByRva(section.VirtualAddress)[:section.SizeOfRawData])
return True
def parseImports(self):
""" parseImports builds a list of all static imports. """
if self.__imports != None:
return self.__imports
self.__imports = []
directory = self.pe_hdr.OptionalHeader.getDataDirectory(IMAGE_DIRECTORY_ENTRY_IMPORT)
if not directory:
return None
if not self.parseSections():
return None
if directory.VirtualAddress == 0:
return None
for lib in self.__iterateDescriptors(directory):
self.__imports.append(lib)
return self.__imports
def getCurrentSectionHeader(self, rva):
"""
getCurrentSectionHeader returns the section that rva
belongs to or None.
"""
for section in self.__sections:
if (rva >= section.VirtualAddress) and \
(rva < (section.VirtualAddress + section.VirtualSize)):
return section
return None
def imageByRva(self, rva):
"""
imageByRva returns the PE-Image beginning at the file
position that rva belongs to.
"""
ptr = self.rva2ptr(rva)
if not ptr:
return None
return self.pe_image[ptr:]
def rva2ptr(self, rva):
"""
rva2ptr returns the position in the PE-Image (on disk)
that rva belongs to.
"""
sectionHeader = self.getCurrentSectionHeader(rva)
if not sectionHeader:
return None
diff = sectionHeader.VirtualAddress - sectionHeader.PointerToRawData
return rva-diff
#############################################################################
# fasm template
FASM_TEMPLATE = r"""format PE GUI 4.0 at 0x00200000
entry start
include 'win32a.inc'
section '.code' code readable executable
code_start:
file "code_section.bin"
proc start
invoke MessageBoxA,0,_beginMsg,_caption,MB_ICONINFORMATION+MB_OK
jmp code_start
invoke MessageBoxA,0,_doneMsg,_caption,MB_ICONINFORMATION+MB_OK
.exit:
invoke ExitProcess, 0
.unreachable:
jmp .exit
%%fake_imports%%
endp
section '.data' data readable
_beginMsg db 'Start',0
_doneMsg db 'Start',0
_caption db 'peEvade TEMPLATE',0
section '.idata' import data readable
%%imports%%
"""
class FasmSection(object):
def __init__(self, name, attributes):
self.name = name
self.attributes = attributes
def __str__(self):
return "section '" + self.name + "' " + ' '.join(self.attributes)
class FasmTemplate(object):
def __init__(self):
self.__libs = {}
self.__sections = []
def addLib(self, lib):
lib_name = os.path.splitext(os.path.basename(lib.name))[0].lower()
self.__libs[lib_name] = lib
def addSection(self, name, attributes):
self.__sections.append(FasmSection(name, attributes))
def __repr__(self):
return str(self)
def __str__(self):
fake_imports, imports = self.__prepareImports()
template = FASM_TEMPLATE.replace("%%fake_imports%%", fake_imports)
template = template.replace("%%imports%%", imports)
return template
def __prepareImports(self):
outStr = "\nlibrary "
first = True
for lib_name, lib in self.__libs.items():
if not first:
outStr += ",\\\n "
else:
first = False
outStr += "%s,'%s'" % (lib_name, lib.name)
outStr += "\n\n"
fake_imports = ""
for lib_name, lib in self.__libs.items():
outStr += "import " + lib_name
for func in lib.functions:
outStr += ",\\\n " + func + ",'" + func + "'"
fake_imports += "\n invoke " + func
outStr += "\n\n"
return (fake_imports, outStr,)
#############################################################################
# Main
def main(argv):
with open(argv[1], 'rb') as f:
pe_image = f.read()
template = FasmTemplate()
pe = PE(pe_image)
for lib in pe.parseImports():
template.addLib(lib)
pe.dumpSection(".code", "code_section.bin")
print(template)
return True
#############################################################################
if __name__ == "__main__":
import sys
#print( __doc__ )
sys.exit( not main( sys.argv ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment