-
-
Save cdunklau/9001357 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
""" | |
Extract all attachments from MS Outlook '.eml' file EML_FILE into | |
directory OUTPUT_DIR. If OUTPUT_DIR does not exist, it will be | |
created. | |
Usage: extract_attachments.py EML_FILE OUTPUT_DIR | |
""" | |
from __future__ import print_function | |
import sys | |
import os | |
import os.path | |
from collections import defaultdict | |
from email.parser import Parser | |
def parse_message(filename): | |
with open(filename) as f: | |
return Parser().parse(f) | |
def find_attachments(message): | |
""" | |
Return a tuple of parsed content-disposition dict, message object | |
for each attachment found. | |
""" | |
found = [] | |
for part in message.walk(): | |
if 'content-disposition' not in part: | |
continue | |
cdisp = part['content-disposition'].split(';') | |
cdisp = [x.strip() for x in cdisp] | |
if cdisp[0].lower() != 'attachment': | |
continue | |
parsed = {} | |
for kv in cdisp[1:]: | |
key, _, val = kv.partition('=') | |
if val.startswith('"'): | |
val = val.strip('"') | |
elif val.startswith("'"): | |
val = val.strip("'") | |
parsed[key] = val | |
found.append((parsed, part)) | |
return found | |
def run(eml_filename, output_dir): | |
msg = parse_message(eml_filename) | |
attachments = find_attachments(msg) | |
print("Found {0} attachments...".format(len(attachments))) | |
if not os.path.isdir(output_dir): | |
os.mkdir(output_dir) | |
for cdisp, part in attachments: | |
cdisp_filename = os.path.normpath(cdisp['filename']) | |
# prevent malicious crap | |
if os.path.isabs(cdisp_filename): | |
cdisp_filename = os.path.basename(cdisp_filename) | |
towrite = os.path.join(output_dir, cdisp_filename) | |
print("Writing", towrite) | |
with open(towrite, 'wb') as fp: | |
data = part.get_payload(decode=True) | |
fp.write(data) | |
def main(): | |
args = sys.argv[1:] | |
if len(args) != 2: | |
print('Usage: extract_attachments.py EML_FILE OUTPUT_DIR') | |
sys.exit(1) | |
filename, outdir = args | |
run(filename, outdir) | |
if __name__ == '__main__': | |
main() |
Great stuff
Thanks!
ValueError: too many values to unpack
Any idea how to fix it?
Traceback (most recent call last):
File "extract_attachments.py", line 71, in
main()
File "extract_attachments.py", line 67, in main
run(filename, outdir)
File "extract_attachments.py", line 45, in run
attachments = find_attachments(msg)
File "extract_attachments.py", line 34, in find_attachments
key, val = kv.split('=')
ValueError: too many values to unpack
File "D:\Users\prashant.kochar\PycharmProjects\TABLE_COPY_AND_OTHER_STUFF\try_1.py", line 70, in
main()
main()
File "D:\Users\prashant.kochar\PycharmProjects\TABLE_COPY_AND_OTHER_STUFF\try_1.py", line 66, in main
run(filename, outdir)
File "D:\Users\prashant.kochar\PycharmProjects\TABLE_COPY_AND_OTHER_STUFF\try_1.py", line 49, in run
cdisp_filename = os.path.normpath(cdisp['filename'])
KeyError: 'filename'
@pkochar263 Did you have a question?
Really nice, thanks! It is also great to feed with find -exec to get all attachments from all .eml files in a directory (or directory tree).
excellent!!! thanks!!!
Thanks, works great. Quick diff for Python3: