Skip to content

Instantly share code, notes, and snippets.

Last active Feb 24, 2022
What would you like to do?
demo script to parse runs in Outlook Inbox
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
demo script to parse runs in Outlook client Inbox
if Outlook not already running, launch Outlook
locate new messages in INBOX / SOURCE_FOLDER
for each new message
send message to parser
save parsed results JSON blob in temp directory
move parsed message to COMPLETED_FOLDER
Things this script does not do
- it does not save results to a local database, only to local JSON files
- other than timestamps and SOURCE_FOLDER / COMPLETED_FOLDER message placements,
it has no real sense of what needs to be parsed and what has already been parsed
Python for Win32 (pywin32) extensions
import datetime
import base64
import json
import logging
import logging.handlers
import os
from pathlib import Path
import sys
import tempfile
import time
from urllib.parse import quote
import requests
import win32com.client # pylint: disable=import-error
import win32ui # pylint: disable=import-error
# Configuration Constants
API_KEY = "MY_API_KEY" # provided by Empirasign
API_SECRET = "MY_API_SECRET" # provided by Empirasign
API_URL = '' # Corporate runs parser
# for other parsing endpoints, see docs:
# If you need to locate your proxy server, the easiest way to do this is by
# using our tool on the "proxy_finder" tab the of demo api spreadsheet,
# Click the "Find Proxy" button, results will show up in cell B4
# demo api spreadsheet can be found at:
# click on Excel icon to download
# this script maintains state via folder usage
# once an email is moved into COMPLETED_FOLDER, no further parsing attempts
# will be made
RESULTS_DIR = Path(tempfile.gettempdir())
TEMP_DIR = Path(tempfile.gettempdir())
rfh = logging.handlers.RotatingFileHandler(filename=TEMP_DIR / "parse_outlook_inbox.log",
format="%(asctime)s - %(levelname)-8s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[rfh, logging.StreamHandler(sys.stdout)])
logger = logging.getLogger()
def _proxies_dict(proxy_server):
return proxy dictionary as needed by requests library
if this helper function is not comprehensive enough for your use case, consult
if proxy_server:
return {'http': 'http://' + PROXY_SERVER, 'https': 'https://' + PROXY_SERVER}
return {}
def get_target_mail_items(inbox, dt0, dt1=None):
return list of MailItems uids for a date or date range, label and max_size
if not dt1:
dt1 = + datetime.timedelta(1)
if dt1 < dt0:
dt0, dt1 = dt1, dt0
dt1 += datetime.timedelta(1) # this makes the search inlusive?
dt0_str = dt0.strftime(r'%Y-%m-%d %H:%M %p')
dt1_str = dt1.strftime(r'%Y-%m-%d %H:%M %p')
filter_str = "[ReceivedTime] >= '{}' And [ReceivedTime] < '{}'".format(dt0_str, dt1_str)"applying filter on inbox: %s", filter_str)
target_items = inbox.Restrict(filter_str)'Detected %s target emails to parse', len(target_items))
return target_items
def _safe_create_folder(root_folder, subfolder):
create subfolder under root_folder (aka INBOX) if it does not already exist
subfolders = [folder.Name for folder in root_folder.Folders]
if subfolder not in subfolders:"creating folder %s under INBOX", subfolder)
root_folder.Folders.Add(subfolder)"%s already exists as subfolder inder INBOX", subfolder)
# reutrn the destination folder as an object
return root_folder.Folders[subfolder]
def msg_to_disk(msg):
Save an Outlook Mailitem object as a .msg file to the temp directory
fname = quote(msg.EntryID, safe="") + ".msg"
msg_path = str(TEMP_DIR / fname)
return msg_path
def main():
the main event
t0 = time.time()" starting")
# Check if Outlook is opened
win32ui.FindWindow(None, "Microsoft Outlook")
except win32ui.error:
logger.warning("Outlook is not running, trying to start")
os.startfile("outlook") # pylint: disable=no-member
except Exception: # pylint: disable=broad-except
logger.exception("Cannot find Outlook")
outlook = win32com.client.Dispatch('outlook.application')
mapi = outlook.GetNamespace('MAPI')
# retrieve user's email address or exit if there are no accounts logged in
if mapi.Accounts.Count == 0:
logger.warning("Outlook is running but default user is not authenticated vs email server")
raise RuntimeError
inbox_folder = mapi.GetDefaultFolder(6)
dest_folder = _safe_create_folder(inbox_folder, COMPLETED_FOLDER)
target_lst = get_target_mail_items(inbox_folder.Items,
if not target_lst:
logger.warning('No emails to parse')
done_lst = []
for msg in target_lst:
msg_path = msg_to_disk(msg)
post_data = {
"api_key": API_KEY,
"api_secret": API_SECRET,
"msg": base64.b64encode(open(msg_path, "rb").read()).decode("ascii")
resp =, json=post_data, proxies=_proxies_dict(PROXY_SERVER),
if resp.status_code == 200:
obj = resp.json()
# compute a filename that's safe on Windows and Linux
fname = quote(msg.EntryID, safe="") + ".json"
# reversible via unquote(fname)[:-5]
with open(RESULTS_DIR / fname, "wt") as fp:
fp.write(json.dumps(obj, sort_keys=True, indent=2, ensure_ascii=False))"saved parsed results MailItem.EntryID: %s filename: %s", msg.EntryID,
# check remaininq quota
if obj["meta"]["api_req_left"] < 1:
logger.warning("early exit of loop, daily quota exhausted, processed %s / %s",
len(done_lst), len(target_lst))
fname = quote(msg.EntryID, safe="") + "-error.json"
with open(RESULTS_DIR / fname, "wt") as fp:
logger.error("error occurred, http status code: %s, error file: %s", resp.status_code,
tot_runtime = round(time.time() - t0)"finished total run time secs: %s, target_lst: %s processed_lst: %s ", tot_runtime,
target_lst, len(done_lst))
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment