Skip to content

Instantly share code, notes, and snippets.

@cdunklau
Last active August 27, 2021 22:14
Show Gist options
  • Save cdunklau/9001357 to your computer and use it in GitHub Desktop.
Save cdunklau/9001357 to your computer and use it in GitHub Desktop.
A simple (and probably naive) script to extract attachments from .eml files for those of us who don't use software email clients.
#!/usr/bin/env python
"""
Extract all attachments from MS Outlook '.eml' file EML_FILE into
directory OUTPUT_DIR. If OUTPUT_DIR does not exist, it will be
created.
Usage: extract_attachments.py EML_FILE OUTPUT_DIR
"""
from __future__ import print_function
import sys
import os
import os.path
from collections import defaultdict
from email.parser import Parser
def parse_message(filename):
with open(filename) as f:
return Parser().parse(f)
def find_attachments(message):
"""
Return a tuple of parsed content-disposition dict, message object
for each attachment found.
"""
found = []
for part in message.walk():
if 'content-disposition' not in part:
continue
cdisp = part['content-disposition'].split(';')
cdisp = [x.strip() for x in cdisp]
if cdisp[0].lower() != 'attachment':
continue
parsed = {}
for kv in cdisp[1:]:
key, _, val = kv.partition('=')
if val.startswith('"'):
val = val.strip('"')
elif val.startswith("'"):
val = val.strip("'")
parsed[key] = val
found.append((parsed, part))
return found
def run(eml_filename, output_dir):
msg = parse_message(eml_filename)
attachments = find_attachments(msg)
print("Found {0} attachments...".format(len(attachments)))
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for cdisp, part in attachments:
cdisp_filename = os.path.normpath(cdisp['filename'])
# prevent malicious crap
if os.path.isabs(cdisp_filename):
cdisp_filename = os.path.basename(cdisp_filename)
towrite = os.path.join(output_dir, cdisp_filename)
print("Writing", towrite)
with open(towrite, 'wb') as fp:
data = part.get_payload(decode=True)
fp.write(data)
def main():
args = sys.argv[1:]
if len(args) != 2:
print('Usage: extract_attachments.py EML_FILE OUTPUT_DIR')
sys.exit(1)
filename, outdir = args
run(filename, outdir)
if __name__ == '__main__':
main()
@propriocept
Copy link

Really nice, thanks! It is also great to feed with find -exec to get all attachments from all .eml files in a directory (or directory tree).

@thshim
Copy link

thshim commented Nov 26, 2020

excellent!!! thanks!!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment