hiddenillusion/extract_mft_strings.py

## extract_mft_strings.py
#!/usr/bin/env python

# created by Glenn P. Edwards Jr.
#   https://hiddenillusion.github.io
#       @hiddenillusion
# Date: 2016-10-10
# (while at FireEye)

'''
Based on https://github.com/williballenthin/INDXParse/blob/master/get_file_info.py
'''

import argparse
import array
import os
import re

from BinaryParser import Mmap
from MFT import Cache
from MFT import MFTEnumerator

ASCII_BYTE = " !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~"

def ascii_strings(buf, n=4):
  all_ascii = []
  reg = "([%s]{%d,})" % (ASCII_BYTE, n)
  ascii_re = re.compile(reg)
  for match in ascii_re.finditer(buf):
    if isinstance(match.group(), array.array):
      all_ascii.append(match.group().tostring().decode("ascii"))
    else:
      all_ascii.append(match.group().decode("ascii"))

  return all_ascii

def unicode_strings(buf, n=4):
  all_unicode = []
  reg = b"((?:[%s]\x00){4,})" % (ASCII_BYTE)
  ascii_re = re.compile(reg)
  for match in ascii_re.finditer(buf):
    try:
      if isinstance(match.group(), array.array):
        all_unicode.append(match.group().tostring().decode("utf-16"))
      else:
        all_unicode.append(match.group().decode("utf-16"))
    except UnicodeDecodeError:
      pass

  return all_unicode

def get_info(record, path, source_artifact_filepath):
  active_data = record.active_data()
  slack_data = record.slack_data()
  data = {
      'source_artifact_filepath': os.path.abspath(source_artifact_filepath),
      'inode': record.inode,
      'path': path,
      'active_ascii_strings': ascii_strings(active_data),
      'active_unicode_strings': unicode_strings(active_data),
      'slack_ascii_strings': ascii_strings(slack_data),
      'slack_unicode_strings': unicode_strings(slack_data),
  }

  return data

def output_json(data):
  import json
  return json.dumps(data, indent=4, sort_keys=True)

def output_tsv(data):
  for k, v in data.iteritems():
    if isinstance(v, list):
      data[k] = ' '.join(x for x in v)
  return '\t'.join('{0}'.format(v) for k,v in data.iteritems())

def main():
  parser = argparse.ArgumentParser(description='Inspect '
                                   'a given MFT file record.')
  parser.add_argument('-a', action="store", metavar="cache_size", type=int,
                      dest="cache_size", default=1024,
                      help="Size of cache.")
  parser.add_argument('-p', action="store", metavar="prefix",
                      nargs=1, dest="prefix", default="\\.",
                      help="Prefix paths with `prefix` rather than \\.\\")
  parser.add_argument('mft', action="store",
                      help="Path to MFT")
  parser.add_argument('record_or_path', action="store",
                      help="MFT record or file path to inspect")

  results = parser.parse_args()

  with Mmap(results.mft) as buf:
    record_cache = Cache(results.cache_size)
    path_cache = Cache(results.cache_size)

    enum = MFTEnumerator(buf,
                         record_cache=record_cache,
                         path_cache=path_cache)

    should_use_inode = False
    try:
      record_num = int(results.record_or_path)
      should_use_inode = True
    except ValueError:
      should_use_inode = False

    if should_use_inode:
      record = enum.get_record(record_num)
      path = results.prefix + enum.get_path(record)
      #print get_info(record, path, results.mft)
      #print output_tsv(get_info(record, path, results.mft))
      print output_json(get_info(record, path, results.mft))
      else:
        path = results.record_or_path
        # this will match exact
        #record = enum.get_record_by_path(path)
        # this will get all paths so we can ask if what we want is _in_ the path
        for enum_record, enum_path in enum.enumerate_paths():
          if path.lower() in enum_path.lower():
            #print output_tsv(get_info(enum_record, results.prefix + path, results.mft))
            print output_json(get_info(enum_record, results.prefix + path, results.mft))

if __name__ == "__main__":
  main()
	#!/usr/bin/env python

	# created by Glenn P. Edwards Jr.
	# https://hiddenillusion.github.io
	# @hiddenillusion
	# Date: 2016-10-10
	# (while at FireEye)

	'''
	Based on https://github.com/williballenthin/INDXParse/blob/master/get_file_info.py
	'''

	import argparse
	import array
	import os
	import re

	from BinaryParser import Mmap
	from MFT import Cache
	from MFT import MFTEnumerator

	ASCII_BYTE = " !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\\|\}\\\~"

	def ascii_strings(buf, n=4):
	all_ascii = []
	reg = "([%s]{%d,})" % (ASCII_BYTE, n)
	ascii_re = re.compile(reg)
	for match in ascii_re.finditer(buf):
	if isinstance(match.group(), array.array):
	all_ascii.append(match.group().tostring().decode("ascii"))
	else:
	all_ascii.append(match.group().decode("ascii"))

	return all_ascii

	def unicode_strings(buf, n=4):
	all_unicode = []
	reg = b"((?:[%s]\x00){4,})" % (ASCII_BYTE)
	ascii_re = re.compile(reg)
	for match in ascii_re.finditer(buf):
	try:
	if isinstance(match.group(), array.array):
	all_unicode.append(match.group().tostring().decode("utf-16"))
	else:
	all_unicode.append(match.group().decode("utf-16"))
	except UnicodeDecodeError:
	pass

	return all_unicode

	def get_info(record, path, source_artifact_filepath):
	active_data = record.active_data()
	slack_data = record.slack_data()
	data = {
	'source_artifact_filepath': os.path.abspath(source_artifact_filepath),
	'inode': record.inode,
	'path': path,
	'active_ascii_strings': ascii_strings(active_data),
	'active_unicode_strings': unicode_strings(active_data),
	'slack_ascii_strings': ascii_strings(slack_data),
	'slack_unicode_strings': unicode_strings(slack_data),
	}

	return data

	def output_json(data):
	import json
	return json.dumps(data, indent=4, sort_keys=True)

	def output_tsv(data):
	for k, v in data.iteritems():
	if isinstance(v, list):
	data[k] = ' '.join(x for x in v)
	return '\t'.join('{0}'.format(v) for k,v in data.iteritems())

	def main():
	parser = argparse.ArgumentParser(description='Inspect '
	'a given MFT file record.')
	parser.add_argument('-a', action="store", metavar="cache_size", type=int,
	dest="cache_size", default=1024,
	help="Size of cache.")
	parser.add_argument('-p', action="store", metavar="prefix",
	nargs=1, dest="prefix", default="\\.",
	help="Prefix paths with `prefix` rather than \\.\\")
	parser.add_argument('mft', action="store",
	help="Path to MFT")
	parser.add_argument('record_or_path', action="store",
	help="MFT record or file path to inspect")

	results = parser.parse_args()

	with Mmap(results.mft) as buf:
	record_cache = Cache(results.cache_size)
	path_cache = Cache(results.cache_size)

	enum = MFTEnumerator(buf,
	record_cache=record_cache,
	path_cache=path_cache)

	should_use_inode = False
	try:
	record_num = int(results.record_or_path)
	should_use_inode = True
	except ValueError:
	should_use_inode = False

	if should_use_inode:
	record = enum.get_record(record_num)
	path = results.prefix + enum.get_path(record)
	#print get_info(record, path, results.mft)
	#print output_tsv(get_info(record, path, results.mft))
	print output_json(get_info(record, path, results.mft))
	else:
	path = results.record_or_path
	# this will match exact
	#record = enum.get_record_by_path(path)
	# this will get all paths so we can ask if what we want is _in_ the path
	for enum_record, enum_path in enum.enumerate_paths():
	if path.lower() in enum_path.lower():
	#print output_tsv(get_info(enum_record, results.prefix + path, results.mft))
	print output_json(get_info(enum_record, results.prefix + path, results.mft))

	if __name__ == "__main__":
	main()