Created
June 5, 2019 01:07
-
-
Save ctkirkman/1c20689350460a4b31cc6ea384366da2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import lxml.etree as ET | |
import olefile | |
import magic | |
from io import BytesIO | |
from oletools import oleobj | |
from oletools.olevba3 import VBA_Parser | |
from operator import itemgetter | |
from zipfile import ZipFile | |
def read_magic(contentBytes): | |
magicType = None | |
try: | |
with magic.Magic() as m: | |
magicType = m.from_buffer(contentBytes) | |
except Exception as e: | |
print("libmagic") | |
print(str(e)) | |
finally: | |
return magicType | |
def read_office_file(zip_file, zip_object): | |
zip_info = dict() | |
try: | |
zip_info["name"] = zip_file.filename[0:100] | |
if zip_file.is_dir() is False: | |
zip_info["size"] = zip_file.file_size | |
if bool(zip_file.flag_bits & 0x1) is True: | |
zip_info["protected"] = True | |
else: | |
try: | |
zip_file_bytes = zip_object.read(zip_file.filename) | |
zip_info["md5"] = hashlib.md5(zip_file_bytes).hexdigest() | |
zip_info["sha1"] = hashlib.sha1(zip_file_bytes).hexdigest() | |
magic = read_magic(zip_file_bytes) | |
if magic is not None: | |
zip_info["magicType"] = magic | |
if magic == "application/CDFV2": | |
try: | |
ole = olefile.OleFileIO(zip_file_bytes) | |
native_file = next( | |
iter([o for o in ole.listdir() if o == ['\x01Ole10Native']]), [None] | |
)[0] | |
if native_file is not None: | |
native_dta = ole.openstream(native_file).read() | |
native_stream = oleobj.OleNativeStream(native_dta) | |
zip_info["name"] = native_stream.filename[0:100] | |
zip_info["size"] = native_stream.actual_size | |
zip_info["md5"] = hashlib.md5(native_stream.data).hexdigest() | |
zip_info["sha1"] = hashlib.sha1(native_stream.data).hexdigest() | |
magic = read_magic(native_stream.data) | |
if magic is not None: | |
zip_info["magicType"] = magic | |
except Exception as e: | |
print("Native File Error:") | |
print(zip_info.get("name")) | |
print(zip_info.get("md5")) | |
print(str(e)) | |
except Exception as e: | |
zip_info["error"] = str(e).strip()[0:100] | |
pass | |
finally: | |
return zip_info | |
def read_openxml(contentBytes): | |
openxml_metadata = dict() | |
errors = None | |
try: | |
zip_bytes = BytesIO(contentBytes) | |
zip_object = ZipFile(zip_bytes) | |
if "docProps/core.xml" in zip_object.namelist(): | |
props = zip_object.read("docProps/core.xml") | |
xml_props = ET.fromstring(props) | |
openxml_metadata["properties"] = dict((ET.QName(t.tag).localname, t.text) for t in xml_props if t.text is not None) | |
if "docProps/custom.xml" in zip_object.namelist(): | |
try: | |
custom_props = zip_object.read("docProps/custom.xml") | |
custom_xml_props = ET.fromstring(custom_props) | |
custom_metadata = dict((t.attrib["name"], next(t.itertext(), "")) for t in custom_xml_props.iterchildren() if next(t.itertext(), "") != "") | |
openxml_metadata["properties"] = {**openxml_metadata.get("properties"), **custom_metadata} | |
except Exception as e: | |
print("docProps/custom.xml") | |
print(str(e)) | |
try: | |
vbaparser = VBA_Parser("vbaFile", contentBytes) | |
macro_result = vbaparser.analyze_macros() | |
if vbaparser.contains_macros is True and macro_result is not None: | |
macro_metadata = dict([(f"{v[0]}:{v[1]}", v[2].split(" (")[0]) for v in macro_result]) | |
if bool(macro_metadata): | |
openxml_metadata["properties"] = {**openxml_metadata.get("properties"), **macro_metadata} | |
except Exception as e: | |
print("VBA Macro") | |
print(str(e)) | |
file_list = [f for f in [read_office_file(z, zip_object) for z in zip_object.filelist] if f.get("magicType") not in ["text/xml", "text/plain"]] | |
if len(file_list) > 10: | |
file_list = sorted(file_list, key=itemgetter("size"), reverse=True)[0:10] | |
file_list.append("(List Truncated)") | |
if file_list is not None and len(file_list) > 0: | |
openxml_metadata["contents"] = file_list | |
except Exception as e: | |
if str(e).strip() != "": | |
errors = str(e).strip()[0:100] | |
finally: | |
if bool(openxml_metadata) is True: | |
return openxml_metadata | |
elif errors is not None: | |
return {"error": errors} | |
else: | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment