Last active
December 21, 2023 14:41
-
-
Save tsumarios/50be13493349d69d67f021e04cb4e8e4 to your computer and use it in GitHub Desktop.
eml_4n6 is a simple Python script designed for email forensics and attachment extraction. With a focus on preserving chain of custody, it extracts attachments from .eml files, logs metadata, and uncovers forensic artefacts such as URLs within email payloads.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Script: eml_4n6.py | |
Description: Extract attachments and log metadata from EML files. | |
Author: tsumarios | |
Date: 21/12/2023 | |
Note: This script is based on https://github.com/diogo-alves/eml-extractor and extends it with logging and other forensics features. | |
Usage: | |
./eml_4n6.py [OPTIONS] | |
Options: | |
-a, --analyst ANALYST_NAME | |
Analyst name for chain of custody information (default: system name). | |
-s, --source PATH | |
The directory containing the .eml files to extract attachments (default: current working directory). | |
-r, --recursive | |
Allow recursive search for .eml files under the SOURCE directory. | |
-f, --files FILE [FILE ...] | |
Specify a .eml file or a list of .eml files to extract attachments. | |
-d, --destination PATH | |
The directory to extract attachments to (default: current working directory). | |
""" | |
import os | |
import re | |
import logging | |
from argparse import ArgumentParser, ArgumentTypeError | |
from datetime import datetime | |
import hashlib | |
from email import message_from_file, policy | |
from pathlib import Path | |
# Constants | |
EML_FILE_EXTENSION = ".eml" | |
ILLEGAL_CHARS_REGEX = r'[/\\|\[\]\{\}:<>+=;,?!*"~#$%&@\']' | |
# Logging configuration | |
logging.basicConfig( | |
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s" | |
) | |
logger = logging.getLogger(__name__) | |
def extract_attachments(file: Path, destination: Path, analyst_name: str) -> None: | |
""" | |
Extract attachments from an EML file. | |
Args: | |
file (Path): Path to the EML file. | |
destination (Path): Path to the destination directory for extracted attachments. | |
analyst_name (str): Name of the analyst for chain of custody information. | |
""" | |
try: | |
logger.info(f'Processing file: "{file}"') | |
# Log Chain of Custody information | |
logger.info(f"Analyst: {analyst_name}, Date: {datetime.now()}") | |
with file.open(errors="ignore") as f: | |
email_message = message_from_file(f, policy=policy.default) | |
email_subject = email_message.get("Subject") | |
basepath = destination / sanitise_foldername(email_subject) | |
# Extract and log additional metadata | |
email_sender = email_message.get("From") | |
email_receiver = email_message.get("To") | |
email_date_sent = email_message.get("Date") | |
email_date_received = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# Log metadata | |
logger.info(f"Subject: {email_subject}") | |
logger.info(f"Sender: {email_sender}") | |
logger.info(f"Receiver: {email_receiver}") | |
logger.info(f"Date Sent: {email_date_sent}") | |
logger.info(f"Date Received: {email_date_received}") | |
# Ignore inline attachments | |
attachments = [ | |
item | |
for item in email_message.iter_attachments() | |
if item.is_attachment() | |
] | |
if not attachments: | |
logger.info("No attachments found.") | |
return | |
for attachment in attachments: | |
filename = attachment.get_filename() | |
logger.info(f"Attachment found: {filename}") | |
filepath = basepath / filename | |
payload = attachment.get_payload(decode=True) | |
if isinstance(payload, str): | |
# Handle the string payload as needed | |
logger.info("Payload is a string:", payload) | |
else: | |
# Handle the bytes payload, decoding as UTF-8 | |
try: | |
decoded_payload = payload.decode("utf-8", errors="replace") | |
log_forensic_artefacts(decoded_payload) | |
except UnicodeDecodeError as e: | |
logger.error(f"Error decoding payload: {e}") | |
if filepath.exists(): | |
overwrite = input( | |
f'The file "{filename}" already exists! Overwrite it (Y/N)? ' | |
) | |
if overwrite.upper() == "Y": | |
save_attachment(filepath, payload) | |
else: | |
logger.info("Skipping...") | |
else: | |
basepath.mkdir(exist_ok=True) | |
save_attachment(filepath, payload) | |
# Log hash value | |
file_hash = hash_file(filepath) | |
logger.info(f"Attachment hash ({filename}): {file_hash}") | |
except Exception as e: | |
logger.error(f"Error processing file: {file}. Exception: {e}", exc_info=True) | |
def log_forensic_artefacts(payload: str) -> None: | |
""" | |
Log forensic artifacts found in the payload. | |
Args: | |
payload (str): Payload to analyse for forensic artifacts. | |
""" | |
# Extract and log URLs | |
urls = re.findall( | |
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", | |
payload, | |
) | |
for url in urls: | |
logger.info(f"Found URL: {url}") | |
def sanitise_foldername(name: str) -> str: | |
""" | |
Sanitise a folder name by replacing illegal characters. | |
Args: | |
name (str): Folder name to sanitise. | |
Returns: | |
str: Sanitised folder name. | |
""" | |
return re.sub(ILLEGAL_CHARS_REGEX, "_", name) | |
def save_attachment(file: Path, payload: bytes) -> None: | |
""" | |
Save an attachment to a file. | |
Args: | |
file (Path): Path to save the attachment. | |
payload (bytes): Attachment payload to save. | |
""" | |
with file.open("wb") as f: | |
logger.info(f'Saving attachment to "{file}"') | |
f.write(payload) | |
def hash_file(file_path: Path) -> str: | |
""" | |
Calculate the SHA-256 hash of a file. | |
Args: | |
file_path (Path): Path to the file. | |
Returns: | |
str: SHA-256 hash of the file. | |
""" | |
hasher = hashlib.sha256() | |
with file_path.open("rb") as file: | |
while chunk := file.read(8192): | |
hasher.update(chunk) | |
return hasher.hexdigest() | |
def get_eml_files_from(path: Path, recursively: bool = False) -> list: | |
""" | |
Get a list of EML files from a directory. | |
Args: | |
path (Path): Directory path. | |
recursively (bool, optional): Whether to search recursively. Defaults to False. | |
Returns: | |
list: List of EML files. | |
""" | |
if recursively: | |
return list(path.rglob("*.eml")) | |
return list(path.glob("*.eml")) | |
def check_file(arg_value: str) -> Path: | |
""" | |
Check if the argument is a valid EML file. | |
Args: | |
arg_value (str): Argument value. | |
Returns: | |
Path: Valid EML file path. | |
Raises: | |
ArgumentTypeError: If the argument is not a valid EML file. | |
""" | |
file = Path(arg_value) | |
if file.is_file() and file.suffix == ".eml": | |
return file | |
raise ArgumentTypeError(f'"{file}" is not a valid EML file.') | |
def check_path(arg_value: str) -> Path: | |
""" | |
Check if the argument is a valid directory. | |
Args: | |
arg_value (str): Argument value. | |
Returns: | |
Path: Valid directory path. | |
Raises: | |
ArgumentTypeError: If the argument is not a valid directory. | |
""" | |
path = Path(arg_value) | |
if path.is_dir(): | |
return path | |
raise ArgumentTypeError(f'"{path}" is not a valid directory.') | |
def parse_arguments(): | |
""" | |
Parse command-line arguments. | |
Returns: | |
Namespace: Parsed arguments. | |
""" | |
parser = ArgumentParser( | |
usage="%(prog)s [OPTIONS]", description="Extracts attachments from .eml files" | |
) | |
# Include analyst argument with default value | |
parser.add_argument( | |
"-a", | |
"--analyst", | |
type=str, | |
default=os.getenv("COMPUTERNAME") or os.getenv("HOSTNAME") or "DefaultAnalyst", | |
metavar="ANALYST_NAME", | |
help="Analyst name for chain of custody information (default: system name)", | |
) | |
# force the use of --source or --files, not both | |
source_group = parser.add_mutually_exclusive_group() | |
source_group.add_argument( | |
"-s", | |
"--source", | |
type=check_path, | |
default=Path.cwd(), | |
metavar="PATH", | |
help="the directory containing the .eml files to extract attachments (default: current working directory)", | |
) | |
parser.add_argument( | |
"-r", | |
"--recursive", | |
action="store_true", | |
help="allow recursive search for .eml files under SOURCE directory", | |
) | |
source_group.add_argument( | |
"-f", | |
"--files", | |
nargs="+", | |
type=check_file, | |
metavar="FILE", | |
help="specify a .eml file or a list of .eml files to extract attachments", | |
) | |
parser.add_argument( | |
"-d", | |
"--destination", | |
type=check_path, | |
default=Path.cwd(), | |
metavar="PATH", | |
help="the directory to extract attachments to (default: current working directory)", | |
) | |
return parser.parse_args() | |
def main(): | |
""" | |
Main function to execute the script. | |
""" | |
args = parse_arguments() | |
eml_files = args.files or get_eml_files_from(args.source, args.recursive) | |
if not eml_files: | |
logger.info("No EML files found!") | |
for file in eml_files: | |
try: | |
extract_attachments( | |
file, destination=args.destination, analyst_name=args.analyst | |
) | |
except Exception as e: | |
logger.error( | |
f"Error processing file: {file}. Exception: {e}", exc_info=True | |
) | |
continue | |
logger.info("Done.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment