-
-
Save divayprakash/ad4a267c714bdd98baf3df90f9d7b4ab to your computer and use it in GitHub Desktop.
Extract all attachments from mbox file (in order of appearance in the file).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Modified. | |
# Original script source: | |
# http://blog.marcbelmont.com/2012/10/script-to-extract-email-attachments.html | |
# Usage: | |
# Run the script from a folder with file "all.mbox" | |
# Attachments will be extracted into subfolder "attachments" | |
# with prefix "n " where "n" is an order of attachment in mbox file. | |
import mailbox, pickle, traceback, os | |
from email.header import decode_header | |
mb = mailbox.mbox('all.mbox') | |
prefs_path = '.save-attachments' | |
save_to = 'attachments/' | |
if not os.path.exists(save_to): os.makedirs(save_to) | |
prefs = dict(start=0) | |
total = 0 | |
failed = 0 | |
def save_attachments(mid): | |
msg = mb.get_message(mid) | |
if msg.is_multipart(): | |
for part in msg.get_payload(): | |
if part.get_content_type() != 'application/octet-stream': | |
continue | |
global total | |
total = total + 1 | |
print() | |
try: | |
decoded_name = decode_header(part.get_filename()) | |
print(decoded_name) | |
if isinstance(decoded_name[0][0], str): | |
name = decoded_name[0][0] | |
else: | |
name_encoding = decoded_name[0][1] | |
name = decoded_name[0][0].decode(name_encoding) | |
name = '%s %s' % (total, name) | |
print('Saving %s' % (name)) | |
with open(save_to + name, 'wb') as f: | |
f.write(part.get_payload(decode=True)) | |
except: | |
traceback.print_exc() | |
global failed | |
failed = failed + 1 | |
for i in range(prefs['start'], 1000000): | |
try: | |
save_attachments(i) | |
except KeyError: | |
break | |
prefs['start'] = i | |
print() | |
print('Total: %s' % (total)) | |
print('Failed: %s' % (failed)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment