-
-
Save empirasign/8237e8e68243e9f2532e88cf17cb84ca to your computer and use it in GitHub Desktop.
demo script to parse runs in Outlook Inbox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
parse_outlook_inbox.py | |
https://gist.github.com/empirasign/8237e8e68243e9f2532e88cf17cb84ca | |
demo script to parse runs in Outlook client Inbox | |
if Outlook not already running, launch Outlook | |
locate new messages in INBOX / SOURCE_FOLDER | |
for each new message | |
send message to parser | |
save parsed results JSON blob in temp directory | |
move parsed message to COMPLETED_FOLDER | |
fin | |
Things this script does not do | |
- it does not save results to a local database, only to local JSON files | |
- other than timestamps and SOURCE_FOLDER / COMPLETED_FOLDER message placements, | |
it has no real sense of what needs to be parsed and what has already been parsed | |
THIS SCRIPT WORKS ONLY ON WINDOWS | |
Requirements | |
Python3 | |
https://www.python.org/downloads/ | |
Python for Win32 (pywin32) extensions | |
https://github.com/mhammond/pywin32 | |
requests | |
https://docs.python-requests.org/en/latest/ | |
""" | |
import datetime | |
import base64 | |
import json | |
import logging | |
import logging.handlers | |
import os | |
from pathlib import Path | |
import sys | |
import tempfile | |
import time | |
from urllib.parse import quote | |
import requests | |
import win32com.client # pylint: disable=import-error | |
import win32ui # pylint: disable=import-error | |
# Configuration Constants | |
API_KEY = "MY_API_KEY" # provided by Empirasign | |
API_SECRET = "MY_API_SECRET" # provided by Empirasign | |
API_URL = 'https://api.empirasign.com/v1/parse-corp/' # Corporate runs parser | |
# for other parsing endpoints, see docs: https://www.empirasign.com/api-docs/ | |
PROXY_SERVER = "" | |
# If you need to locate your proxy server, the easiest way to do this is by | |
# using our tool on the "proxy_finder" tab the of demo api spreadsheet, | |
# Click the "Find Proxy" button, results will show up in cell B4 | |
# demo api spreadsheet can be found at: | |
# https://www.empirasign.com/api-mbs/ click on Excel icon to download | |
# the proxy host usually looks like proxy.mycompany.net:8080 | |
# if you get a 407 Proxy Authentication Required error, you need to set | |
# PROXY_SERVER to something like username:password@proxy.mycompany.net:8080 | |
SOURCE_FOLDER = "INBOX" | |
COMPLETED_FOLDER = "PARSED" | |
# this script maintains state via folder usage | |
# once an email is moved into COMPLETED_FOLDER, no further parsing attempts | |
# will be made | |
RESULTS_DIR = Path(tempfile.gettempdir()) | |
TEMP_DIR = Path(tempfile.gettempdir()) | |
rfh = logging.handlers.RotatingFileHandler(filename=TEMP_DIR / "parse_outlook_inbox.log", | |
maxBytes=5000000) | |
logging.basicConfig(level=logging.INFO, | |
format="%(asctime)s - %(levelname)-8s - %(message)s", | |
datefmt="%Y-%m-%d %H:%M:%S", | |
handlers=[rfh, logging.StreamHandler(sys.stdout)]) | |
logger = logging.getLogger() | |
def _proxies_dict(proxy_server): | |
""" | |
return proxy dictionary as needed by requests library | |
if this helper function is not comprehensive enough for your use case, consult | |
http://docs.python-requests.org/en/latest/user/advanced/#proxies | |
""" | |
if proxy_server: | |
return {'https': 'http://' + PROXY_SERVER} | |
return {} | |
def get_target_mail_items(inbox, dt0, dt1=None): | |
""" | |
return list of MailItems uids for a date or date range, label and max_size | |
""" | |
if not dt1: | |
dt1 = datetime.date.today() + datetime.timedelta(1) | |
if dt1 < dt0: | |
dt0, dt1 = dt1, dt0 | |
dt1 += datetime.timedelta(1) # this makes the search inlusive? | |
dt0_str = dt0.strftime(r'%Y-%m-%d %H:%M %p') | |
dt1_str = dt1.strftime(r'%Y-%m-%d %H:%M %p') | |
# https://documentation.help/Microsoft-Outlook-Visual-Basic-Reference/olmthRestrict.htm | |
filter_str = "[ReceivedTime] >= '{}' And [ReceivedTime] < '{}'".format(dt0_str, dt1_str) | |
logger.info("applying filter on inbox: %s", filter_str) | |
target_items = inbox.Restrict(filter_str) | |
logger.info('Detected %s target emails to parse', len(target_items)) | |
return target_items | |
def _safe_create_folder(root_folder, subfolder): | |
""" | |
create subfolder under root_folder (aka INBOX) if it does not already exist | |
""" | |
subfolders = [folder.Name for folder in root_folder.Folders] | |
if subfolder not in subfolders: | |
logger.info("creating folder %s under INBOX", subfolder) | |
root_folder.Folders.Add(subfolder) | |
logger.info("%s already exists as subfolder inder INBOX", subfolder) | |
# reutrn the destination folder as an object | |
return root_folder.Folders[subfolder] | |
def msg_to_disk(msg): | |
""" | |
Save an Outlook Mailitem object as a .msg file to the temp directory | |
""" | |
fname = quote(msg.EntryID, safe="") + ".msg" | |
msg_path = str(TEMP_DIR / fname) | |
msg.SaveAs(msg_path) | |
return msg_path | |
def main(): | |
""" | |
the main event | |
""" | |
t0 = time.time() | |
logger.info("parse_outlook_inbox.py starting") | |
# Check if Outlook is opened | |
try: | |
win32ui.FindWindow(None, "Microsoft Outlook") | |
except win32ui.error: | |
logger.warning("Outlook is not running, trying to start") | |
try: | |
os.startfile("outlook") # pylint: disable=no-member | |
except Exception: # pylint: disable=broad-except | |
logger.exception("Cannot find Outlook") | |
raise | |
outlook = win32com.client.Dispatch('outlook.application') | |
mapi = outlook.GetNamespace('MAPI') | |
# retrieve user's email address or exit if there are no accounts logged in | |
if mapi.Accounts.Count == 0: | |
logger.warning("Outlook is running but default user is not authenticated vs email server") | |
raise RuntimeError | |
inbox_folder = mapi.GetDefaultFolder(6) | |
dest_folder = _safe_create_folder(inbox_folder, COMPLETED_FOLDER) | |
# https://docs.microsoft.com/en-us/office/vba/api/outlook.oldefaultfolders | |
target_lst = get_target_mail_items(inbox_folder.Items, datetime.date.today()) | |
if not target_lst: | |
logger.warning('No emails to parse') | |
done_lst = [] | |
for msg in target_lst: | |
msg_path = msg_to_disk(msg) | |
post_data = { | |
"api_key": API_KEY, | |
"api_secret": API_SECRET, | |
"msg": base64.b64encode(open(msg_path, "rb").read()).decode("ascii") | |
} | |
resp = requests.post(API_URL, json=post_data, proxies=_proxies_dict(PROXY_SERVER), | |
timeout=3.5) | |
if resp.status_code == 200: | |
done_lst.append(msg.EntryID) | |
obj = resp.json() | |
# compute a filename that's safe on Windows and Linux | |
# https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file | |
fname = quote(msg.EntryID, safe="") + ".json" | |
# reversible via unquote(fname)[:-5] | |
with open(RESULTS_DIR / fname, "wt") as fp: | |
fp.write(json.dumps(obj, sort_keys=True, indent=2, ensure_ascii=False)) | |
logger.info("saved parsed results MailItem.EntryID: %s filename: %s", msg.EntryID, | |
fname) | |
msg.Move(dest_folder) | |
# check remaininq quota | |
if obj["meta"]["api_req_left"] < 1: | |
logger.warning("early exit of loop, daily quota exhausted, processed %s / %s", | |
len(done_lst), len(target_lst)) | |
break | |
else: | |
fname = quote(msg.EntryID, safe="") + "-error.json" | |
with open(RESULTS_DIR / fname, "wt") as fp: | |
fp.write(resp.text) | |
logger.error("error occurred, http status code: %s, error file: %s", resp.status_code, | |
fname) | |
tot_runtime = round(time.time() - t0) | |
logger.info("finished total run time secs: %s, target_lst: %s processed_lst: %s ", tot_runtime, | |
target_lst, len(done_lst)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment