Skip to content

Instantly share code, notes, and snippets.

@maroonmed
Last active November 25, 2018 23:09
Show Gist options
  • Save maroonmed/eef22ad9548451e0ab372394eb8989ca to your computer and use it in GitHub Desktop.
Save maroonmed/eef22ad9548451e0ab372394eb8989ca to your computer and use it in GitHub Desktop.
Download all emails from Exchange/Office365 to an mbox file
#!/usr/bin/env python3
import mailbox
import os
import sys
import traceback
from exchangelib import Account, Configuration, Credentials, DELEGATE
USERNAME = ''
PASSWORD = ''
SERVER = 'outlook.office365.com'
ID_FILE = '.read_ids'
def create_mailbox_message(e_msg):
m = mailbox.mboxMessage(e_msg.mime_content)
if e_msg.is_read:
m.set_flags('S')
return m
def get_read_ids():
if os.path.exists(ID_FILE):
with open(ID_FILE, 'r') as f:
return set([s for s in f.read().splitlines() if s])
else:
return set()
def set_read_ids(ids):
with open(ID_FILE, 'w') as f:
for i in ids:
if i:
f.write(i)
f.write(os.linesep)
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: {} folder_name mbox_file".format(sys.argv[0]))
sys.exit()
credentials = Credentials(USERNAME, PASSWORD)
config = Configuration(server=SERVER, credentials=credentials)
account = Account(primary_smtp_address=USERNAME, config=config, autodiscover=False, access_type=DELEGATE)
mbox = mailbox.mbox(sys.argv[2])
mbox.lock()
read_ids_local = get_read_ids()
folder = getattr(account, sys.argv[1], None)
item_ids_remote = list(folder.all().order_by('-datetime_received').values_list('item_id', 'changekey'))
total_items_remote = len(item_ids_remote)
new_ids = [x for x in item_ids_remote if x[0] not in read_ids_local]
read_ids = set()
print("Total items in folder {}: {}".format(sys.argv[1], total_items_remote))
for i, item in enumerate(account.fetch(new_ids), 1):
try:
msg = create_mailbox_message(item)
mbox.add(msg)
mbox.flush()
except Exception as e:
traceback.print_exc()
print("[ERROR] {} {}".format(item.datetime_received, item.subject))
else:
if item.item_id:
read_ids.add(item.item_id)
print("[{}/{}] {} {}".format(i, len(new_ids), str(item.datetime_received), item.subject))
mbox.unlock()
set_read_ids(read_ids_local | read_ids)
#!/usr/bin/env python3
# adapted from https://stackoverflow.com/a/368067/1027246
from datetime import datetime
from email.utils import parsedate
import mailbox
import sys
import time
def extract_date(email):
date = email.get('Date')
pd = parsedate(date)
if pd:
return time.mktime(pd)
else:
return time.time()
if __name__ == '__main__':
mbox = mailbox.mbox(sys.argv[1])
sorted_mails = sorted(mbox, key=extract_date)
mbox.update(enumerate(sorted_mails))
mbox.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment