Skip to content

Instantly share code, notes, and snippets.

Created June 25, 2021 12:35
Show Gist options
  • Save hervenivon/e2dcd9d73af34c0f4e136f715b31dd5a to your computer and use it in GitHub Desktop.
Save hervenivon/e2dcd9d73af34c0f4e136f715b31dd5a to your computer and use it in GitHub Desktop.
Outlook Mac `olm` archive contact extractor
#!/usr/bin/env python
import re
import sys
import xml.etree.ElementTree as ET
from collections import Counter
from zipfile import ZipFile
messageRe = re.compile('.*message_[0-9]{5}\.xml')
full_email_list = []
parsedEmails = 0
name = False
debug = False
blacklist = ['reply',
with ZipFile('OutlookArchive.olm', 'r') as zipObj:
# Get list of files names in zip
fileList = zipObj.namelist()
# Iterate over the list of file names in given list & print them
for file in fileList:
# keep only email related files
if (messageRe.match(file)):
message =
root = None
root = ET.fromstring(message)
print('"{0}" couldn\'t parsed'.format(file))
print('Unexpected error:', sys.exc_info()[0])
for item in root.iter('emailAddress'):
if 'OPFContactEmailAddressAddress' in item.attrib:
if any(to_check in item.attrib['OPFContactEmailAddressAddress'].lower() for to_check in blacklist):
if 'OPFContactEmailAddressName' in item.attrib and name:
to_append = item.attrib['OPFContactEmailAddressName']+' <'+item.attrib['OPFContactEmailAddressAddress'].lower()+'>'
to_append = item.attrib['OPFContactEmailAddressAddress'].lower()
parsedEmails += 1
if parsedEmails == 100 and debug:
print('Parsed emails: {0}'.format(parsedEmails))
counts = Counter(full_email_list)
counts = dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))
with open('extract.csv', 'wb') as f:
for key in counts.keys():
f.write('{0},{1}\n'.format(key, counts[key]).encode('utf8'))
# make every email unique
email_list = list(set(full_email_list))
with open('extract.txt', 'wb') as f:
print('extraction completed')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment