Skip to content

Instantly share code, notes, and snippets.

@williballenthin
Last active March 31, 2021 20:14
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save williballenthin/cbc102d561e2eb647f7aec3c3753ba55 to your computer and use it in GitHub Desktop.
Save williballenthin/cbc102d561e2eb647f7aec3c3753ba55 to your computer and use it in GitHub Desktop.
Dump some PE file features from memory images.
#!/usr/bin/env python2
'''
Dump some PE file features from memory images.
author: Willi Ballenthin
email: william.ballenthin@fireeye.com
website: https://gist.github.com/williballenthin/cbc102d561e2eb647f7aec3c3753ba55
'''
import os
import sys
import hashlib
import logging
import datetime
import contextlib
# from pypi::
#
# pip install pytz argparse
import pytz
import argparse
# from vivisect::
#
# pip install https://github.com/williballenthin/vivisect/zipball/master
import PE
logger = logging.getLogger(__name__)
@contextlib.contextmanager
def restoring_offset(f):
'''
context manager that restores the current offset of the file pointer
after some block of operations.
Example::
with open('test.bin', 'rb') as f:
assert f.tell() == 0x0
with restoring_offset(f):
f.seek(0x200)
assert f.tell() == 0x200
assert f.tell() == 0x0
'''
t = f.tell()
try:
yield
finally:
f.seek(t, os.SEEK_SET)
class FileView(object):
'''
Given an open file object, provide read access to a subsection of the file
as if it were its own file object. This is a bit like `losetup(8)`, except a
file-like object in Python.
Example::
with open('logical-process-memory.bin', 'rb') as f:
g = FileView(f, 0x401000)
assert g.read(0x2) == 'MZ'
'''
def __init__(self, f, start=0, length=None):
super(FileView, self).__init__()
self.f = f
self.start = start
self.f.seek(self.start)
if length is None:
with restoring_offset(f):
f.seek(0, os.SEEK_END)
self.length = f.tell() - self.start
else:
self.length = length
def tell(self):
return self.f.tell() - self.start
def seek(self, offset, whence=os.SEEK_SET):
final_offset = 0
if whence == os.SEEK_SET:
final_offset = self.start + offset
elif whence == os.SEEK_CUR:
final_offset = self.f.tell() + offset
elif whence == os.SEEK_END:
final_offset = self.start + self.length - offset
else:
raise IOError('unknown seek whence')
logger.debug('seek offset: 0x%x whence: 0x%x final offset: 0x%x',
offset, whence, final_offset)
if final_offset < self.start:
raise IOError('cant read offset %d (underrun)' % (final_offset - self.start))
if final_offset > self.start + self.length:
raise IOError('cant read offset %d (overrun)' % (final_offset - self.start))
self.f.seek(final_offset)
def read(self, length=None):
max_length = self.length - self.tell()
logger.debug('read length: 0x%x', length or max_length)
if length is None:
return self.f.read(max_length)
else:
if max_length < length:
return self.f.read(max_length)
else:
return self.f.read(length)
def md5(buf):
m = hashlib.md5()
m.update(buf)
return m.hexdigest()
def get_imphash(pe):
impstrs = []
exts = ['ocx', 'sys', 'dll']
for (off, libname, funcname) in pe.getImports():
parts = libname.rsplit('.', 1)
if len(parts) > 1 and parts[1] in exts:
libname = parts[0]
impstrs.append('%s.%s' % (libname.lower(), funcname.lower()))
return md5(','.join(impstrs).encode())
def guess_is_memory_image(f):
'''
guess if the provided file is a PE from memory or on disk.
it works by exploiting the differing alignment between file
sectors (0x200, PE file alignment) and memory pages (0x1000,
PE section alignment). on disk, the first section's content
typically begins at offset 0x400, while in memory, it usually
begins at 0x1000.
Example::
with open('kernel32.dll', 'rb') as f:
assert guess_is_memory_image(f) == False
with open('0x401000.bin', 'rb') as f:
assert guess_is_memory_image(f) == True
'''
with restoring_offset(f):
f.seek(0x400)
return f.read(0x200) == '\x00' * 0x200
def output_normal_mode(pe, args):
ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc)
print('timestamp: ' + ts.isoformat())
print('checksum: ' + hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum))
if pe.getExportName():
print('export name: ' + pe.getExportName())
print('exports:')
for (_, ord_, funcname) in pe.getExports():
print(' %d) %s' % (ord_, funcname))
if not args.no_imports:
print('imports:')
for (_, libname, funcname) in pe.getImports():
print(' - %s.%s' % (libname, funcname))
print('sections:')
for section in pe.getSections():
print(' - ' + section.Name)
print(' virtual address: ' + hex(section.VirtualAddress) + '\tsize: ' + hex(section.VirtualSize))
print(' raw address: ' + hex(section.PointerToRawData) + '\tsize: ' + hex(section.SizeOfRawData))
print('imphash: ' + get_imphash(pe))
def output_bulk_mode(pe, args):
filename = args.input
offset = args.offset
export_name = pe.getExportName() or ''
ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc)
timestamp = ts.isoformat()
checksum = hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum)
imphash = get_imphash(pe)
print('{filename}|{offset}|{export_name}|{timestamp}|{checksum}|{imphash}'.format(**locals()))
def number(s):
if s.startswith('0x'):
return int(s, 0x10)
else:
return int(s)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Dump some PE file features features from memory images.")
parser.add_argument("input", type=str,
help="Path to input file")
parser.add_argument("offset", type=number,
help="Offset from which to parse the PE image.")
parser.add_argument("-v", "--verbose", action="store_true",
help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true",
help="Disable all output but errors")
parser.add_argument("--no-imports", dest='no_imports', action="store_true",
help="Don't show imports")
parser.add_argument("--bulk-mode", dest='bulk_mode', action="store_true",
help="Output in bulk mode (|SV)")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
elif args.quiet:
logging.basicConfig(level=logging.ERROR)
else:
logging.basicConfig(level=logging.INFO)
logging.debug('offset: 0x%x', args.offset)
with open(args.input, 'rb') as f:
fv = FileView(f, args.offset)
pe = PE.PE(fv, inmem=guess_is_memory_image(fv))
with restoring_offset(fv):
if fv.read(0x2) != 'MZ':
logger.warning('missing PE header!')
if args.bulk_mode:
output_bulk_mode(pe, args)
else:
output_normal_mode(pe, args)
if __name__ == "__main__":
sys.exit(main())
@williballenthin
Copy link
Author

williballenthin commented Jan 25, 2017

example output:

$ python memdumppe.py SystemMemory__0x0000000000550000-0x00000000005a0fff.VAD 0 --no-imports
WARNING:__main__:missing PE header!
timestamp: 2015-06-23T07:47:32+00:00
checksum: 0x50152
export name: ihctrl32_setup.dll
exports:
  0) ?CheckDLLStatus@@YGKPAPAD@Z
  1) ?GetPluginData@@YGKPAPADPAK@Z
  2) ?InitializePlugin@@YGKXZ
  3) ?IsReleased@@YG_NXZ
  4) ?ReleaseDLL@@YGXXZ
sections:
  - .text
    virtual address: 0x1000     size: 0x9df2
    raw address:     0x400      size: 0x9e00
  - .rdata
    virtual address: 0xb000     size: 0x34f7
    raw address:     0xa200     size: 0x3600
  - .data
    virtual address: 0xf000     size: 0x3e1bc
    raw address:     0xd800     size: 0x3c200
  - .rsrc
    virtual address: 0x4e000    size: 0x1b4
    raw address:     0x49a00    size: 0x200
  - .reloc
    virtual address: 0x4f000    size: 0x16c6
    raw address:     0x49c00    size: 0x1800
imphash: a7ead4ef90d9981e25728e824a1ba3ef

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment