Skip to content

Instantly share code, notes, and snippets.

@ctkirkman
Created June 5, 2019 01:07
Show Gist options
  • Save ctkirkman/1c20689350460a4b31cc6ea384366da2 to your computer and use it in GitHub Desktop.
Save ctkirkman/1c20689350460a4b31cc6ea384366da2 to your computer and use it in GitHub Desktop.
import hashlib
import lxml.etree as ET
import olefile
import magic
from io import BytesIO
from oletools import oleobj
from oletools.olevba3 import VBA_Parser
from operator import itemgetter
from zipfile import ZipFile
def read_magic(contentBytes):
magicType = None
try:
with magic.Magic() as m:
magicType = m.from_buffer(contentBytes)
except Exception as e:
print("libmagic")
print(str(e))
finally:
return magicType
def read_office_file(zip_file, zip_object):
zip_info = dict()
try:
zip_info["name"] = zip_file.filename[0:100]
if zip_file.is_dir() is False:
zip_info["size"] = zip_file.file_size
if bool(zip_file.flag_bits & 0x1) is True:
zip_info["protected"] = True
else:
try:
zip_file_bytes = zip_object.read(zip_file.filename)
zip_info["md5"] = hashlib.md5(zip_file_bytes).hexdigest()
zip_info["sha1"] = hashlib.sha1(zip_file_bytes).hexdigest()
magic = read_magic(zip_file_bytes)
if magic is not None:
zip_info["magicType"] = magic
if magic == "application/CDFV2":
try:
ole = olefile.OleFileIO(zip_file_bytes)
native_file = next(
iter([o for o in ole.listdir() if o == ['\x01Ole10Native']]), [None]
)[0]
if native_file is not None:
native_dta = ole.openstream(native_file).read()
native_stream = oleobj.OleNativeStream(native_dta)
zip_info["name"] = native_stream.filename[0:100]
zip_info["size"] = native_stream.actual_size
zip_info["md5"] = hashlib.md5(native_stream.data).hexdigest()
zip_info["sha1"] = hashlib.sha1(native_stream.data).hexdigest()
magic = read_magic(native_stream.data)
if magic is not None:
zip_info["magicType"] = magic
except Exception as e:
print("Native File Error:")
print(zip_info.get("name"))
print(zip_info.get("md5"))
print(str(e))
except Exception as e:
zip_info["error"] = str(e).strip()[0:100]
pass
finally:
return zip_info
def read_openxml(contentBytes):
openxml_metadata = dict()
errors = None
try:
zip_bytes = BytesIO(contentBytes)
zip_object = ZipFile(zip_bytes)
if "docProps/core.xml" in zip_object.namelist():
props = zip_object.read("docProps/core.xml")
xml_props = ET.fromstring(props)
openxml_metadata["properties"] = dict((ET.QName(t.tag).localname, t.text) for t in xml_props if t.text is not None)
if "docProps/custom.xml" in zip_object.namelist():
try:
custom_props = zip_object.read("docProps/custom.xml")
custom_xml_props = ET.fromstring(custom_props)
custom_metadata = dict((t.attrib["name"], next(t.itertext(), "")) for t in custom_xml_props.iterchildren() if next(t.itertext(), "") != "")
openxml_metadata["properties"] = {**openxml_metadata.get("properties"), **custom_metadata}
except Exception as e:
print("docProps/custom.xml")
print(str(e))
try:
vbaparser = VBA_Parser("vbaFile", contentBytes)
macro_result = vbaparser.analyze_macros()
if vbaparser.contains_macros is True and macro_result is not None:
macro_metadata = dict([(f"{v[0]}:{v[1]}", v[2].split(" (")[0]) for v in macro_result])
if bool(macro_metadata):
openxml_metadata["properties"] = {**openxml_metadata.get("properties"), **macro_metadata}
except Exception as e:
print("VBA Macro")
print(str(e))
file_list = [f for f in [read_office_file(z, zip_object) for z in zip_object.filelist] if f.get("magicType") not in ["text/xml", "text/plain"]]
if len(file_list) > 10:
file_list = sorted(file_list, key=itemgetter("size"), reverse=True)[0:10]
file_list.append("(List Truncated)")
if file_list is not None and len(file_list) > 0:
openxml_metadata["contents"] = file_list
except Exception as e:
if str(e).strip() != "":
errors = str(e).strip()[0:100]
finally:
if bool(openxml_metadata) is True:
return openxml_metadata
elif errors is not None:
return {"error": errors}
else:
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment