Skip to content

Instantly share code, notes, and snippets.

@tsumarios
Last active December 21, 2023 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tsumarios/50be13493349d69d67f021e04cb4e8e4 to your computer and use it in GitHub Desktop.
Save tsumarios/50be13493349d69d67f021e04cb4e8e4 to your computer and use it in GitHub Desktop.
eml_4n6 is a simple Python script designed for email forensics and attachment extraction. With a focus on preserving chain of custody, it extracts attachments from .eml files, logs metadata, and uncovers forensic artefacts such as URLs within email payloads.
#!/usr/bin/env python3
"""
Script: eml_4n6.py
Description: Extract attachments and log metadata from EML files.
Author: tsumarios
Date: 21/12/2023
Note: This script is based on https://github.com/diogo-alves/eml-extractor and extends it with logging and other forensics features.
Usage:
./eml_4n6.py [OPTIONS]
Options:
-a, --analyst ANALYST_NAME
Analyst name for chain of custody information (default: system name).
-s, --source PATH
The directory containing the .eml files to extract attachments (default: current working directory).
-r, --recursive
Allow recursive search for .eml files under the SOURCE directory.
-f, --files FILE [FILE ...]
Specify a .eml file or a list of .eml files to extract attachments.
-d, --destination PATH
The directory to extract attachments to (default: current working directory).
"""
import os
import re
import logging
from argparse import ArgumentParser, ArgumentTypeError
from datetime import datetime
import hashlib
from email import message_from_file, policy
from pathlib import Path
# Constants
EML_FILE_EXTENSION = ".eml"
ILLEGAL_CHARS_REGEX = r'[/\\|\[\]\{\}:<>+=;,?!*"~#$%&@\']'
# Logging configuration
logging.basicConfig(
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)
def extract_attachments(file: Path, destination: Path, analyst_name: str) -> None:
"""
Extract attachments from an EML file.
Args:
file (Path): Path to the EML file.
destination (Path): Path to the destination directory for extracted attachments.
analyst_name (str): Name of the analyst for chain of custody information.
"""
try:
logger.info(f'Processing file: "{file}"')
# Log Chain of Custody information
logger.info(f"Analyst: {analyst_name}, Date: {datetime.now()}")
with file.open(errors="ignore") as f:
email_message = message_from_file(f, policy=policy.default)
email_subject = email_message.get("Subject")
basepath = destination / sanitise_foldername(email_subject)
# Extract and log additional metadata
email_sender = email_message.get("From")
email_receiver = email_message.get("To")
email_date_sent = email_message.get("Date")
email_date_received = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Log metadata
logger.info(f"Subject: {email_subject}")
logger.info(f"Sender: {email_sender}")
logger.info(f"Receiver: {email_receiver}")
logger.info(f"Date Sent: {email_date_sent}")
logger.info(f"Date Received: {email_date_received}")
# Ignore inline attachments
attachments = [
item
for item in email_message.iter_attachments()
if item.is_attachment()
]
if not attachments:
logger.info("No attachments found.")
return
for attachment in attachments:
filename = attachment.get_filename()
logger.info(f"Attachment found: {filename}")
filepath = basepath / filename
payload = attachment.get_payload(decode=True)
if isinstance(payload, str):
# Handle the string payload as needed
logger.info("Payload is a string:", payload)
else:
# Handle the bytes payload, decoding as UTF-8
try:
decoded_payload = payload.decode("utf-8", errors="replace")
log_forensic_artefacts(decoded_payload)
except UnicodeDecodeError as e:
logger.error(f"Error decoding payload: {e}")
if filepath.exists():
overwrite = input(
f'The file "{filename}" already exists! Overwrite it (Y/N)? '
)
if overwrite.upper() == "Y":
save_attachment(filepath, payload)
else:
logger.info("Skipping...")
else:
basepath.mkdir(exist_ok=True)
save_attachment(filepath, payload)
# Log hash value
file_hash = hash_file(filepath)
logger.info(f"Attachment hash ({filename}): {file_hash}")
except Exception as e:
logger.error(f"Error processing file: {file}. Exception: {e}", exc_info=True)
def log_forensic_artefacts(payload: str) -> None:
"""
Log forensic artifacts found in the payload.
Args:
payload (str): Payload to analyse for forensic artifacts.
"""
# Extract and log URLs
urls = re.findall(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
payload,
)
for url in urls:
logger.info(f"Found URL: {url}")
def sanitise_foldername(name: str) -> str:
"""
Sanitise a folder name by replacing illegal characters.
Args:
name (str): Folder name to sanitise.
Returns:
str: Sanitised folder name.
"""
return re.sub(ILLEGAL_CHARS_REGEX, "_", name)
def save_attachment(file: Path, payload: bytes) -> None:
"""
Save an attachment to a file.
Args:
file (Path): Path to save the attachment.
payload (bytes): Attachment payload to save.
"""
with file.open("wb") as f:
logger.info(f'Saving attachment to "{file}"')
f.write(payload)
def hash_file(file_path: Path) -> str:
"""
Calculate the SHA-256 hash of a file.
Args:
file_path (Path): Path to the file.
Returns:
str: SHA-256 hash of the file.
"""
hasher = hashlib.sha256()
with file_path.open("rb") as file:
while chunk := file.read(8192):
hasher.update(chunk)
return hasher.hexdigest()
def get_eml_files_from(path: Path, recursively: bool = False) -> list:
"""
Get a list of EML files from a directory.
Args:
path (Path): Directory path.
recursively (bool, optional): Whether to search recursively. Defaults to False.
Returns:
list: List of EML files.
"""
if recursively:
return list(path.rglob("*.eml"))
return list(path.glob("*.eml"))
def check_file(arg_value: str) -> Path:
"""
Check if the argument is a valid EML file.
Args:
arg_value (str): Argument value.
Returns:
Path: Valid EML file path.
Raises:
ArgumentTypeError: If the argument is not a valid EML file.
"""
file = Path(arg_value)
if file.is_file() and file.suffix == ".eml":
return file
raise ArgumentTypeError(f'"{file}" is not a valid EML file.')
def check_path(arg_value: str) -> Path:
"""
Check if the argument is a valid directory.
Args:
arg_value (str): Argument value.
Returns:
Path: Valid directory path.
Raises:
ArgumentTypeError: If the argument is not a valid directory.
"""
path = Path(arg_value)
if path.is_dir():
return path
raise ArgumentTypeError(f'"{path}" is not a valid directory.')
def parse_arguments():
"""
Parse command-line arguments.
Returns:
Namespace: Parsed arguments.
"""
parser = ArgumentParser(
usage="%(prog)s [OPTIONS]", description="Extracts attachments from .eml files"
)
# Include analyst argument with default value
parser.add_argument(
"-a",
"--analyst",
type=str,
default=os.getenv("COMPUTERNAME") or os.getenv("HOSTNAME") or "DefaultAnalyst",
metavar="ANALYST_NAME",
help="Analyst name for chain of custody information (default: system name)",
)
# force the use of --source or --files, not both
source_group = parser.add_mutually_exclusive_group()
source_group.add_argument(
"-s",
"--source",
type=check_path,
default=Path.cwd(),
metavar="PATH",
help="the directory containing the .eml files to extract attachments (default: current working directory)",
)
parser.add_argument(
"-r",
"--recursive",
action="store_true",
help="allow recursive search for .eml files under SOURCE directory",
)
source_group.add_argument(
"-f",
"--files",
nargs="+",
type=check_file,
metavar="FILE",
help="specify a .eml file or a list of .eml files to extract attachments",
)
parser.add_argument(
"-d",
"--destination",
type=check_path,
default=Path.cwd(),
metavar="PATH",
help="the directory to extract attachments to (default: current working directory)",
)
return parser.parse_args()
def main():
"""
Main function to execute the script.
"""
args = parse_arguments()
eml_files = args.files or get_eml_files_from(args.source, args.recursive)
if not eml_files:
logger.info("No EML files found!")
for file in eml_files:
try:
extract_attachments(
file, destination=args.destination, analyst_name=args.analyst
)
except Exception as e:
logger.error(
f"Error processing file: {file}. Exception: {e}", exc_info=True
)
continue
logger.info("Done.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment