Skip to content

Instantly share code, notes, and snippets.

@mikehwang
Last active March 5, 2020 15:55
Show Gist options
  • Save mikehwang/f0c581b73e1f5e5b8bbecec6217841fc to your computer and use it in GitHub Desktop.
Save mikehwang/f0c581b73e1f5e5b8bbecec6217841fc to your computer and use it in GitHub Desktop.
Script to list emails using IMAP for the purpose of trying to **understand** how to parse emails for an ETL project
from imaplib import IMAP4_SSL
import email
# Usage: python list_emails.py <hostname> <user> <password>
# https://stackoverflow.com/questions/2230037/how-to-fetch-an-email-body-using-imaplib-in-python
# Was very helpful meaning I probably copied code from there
def list_emails(hostname, user, password):
try:
with IMAP4_SSL(hostname, port=993) as im:
im.login(user, password)
# default is to select inbox
im.select(readonly=True)
(result, indices) = im.search(None, "ALL")
if result == "OK":
# Apparently its safe to assume that you will get a list of
# length one. I think..
# Also fetch emails from latest to oldest
for i in reversed(indices[0].split()):
(result, msg) = im.fetch(i, "(RFC822)")
# ALL didn't turn out what I expected. See RFC
# https://tools.ietf.org/html/rfc2060.html#section-6.4.5
#(result, msg) = im.fetch(i, "(ALL)")
msg = email.message_from_string(msg[0][1].decode("utf-8"))
print("\n".join(["MESSAGE:"
, msg["Subject"]
, msg["From"]
, msg["thread-index"]
, msg["thread-topic"]
, msg["message-id"]
]))
print("WALK:")
for part in msg.walk():
# The following line printed
# ['policy', '_headers', '_unixfrom', '_payload',
# '_charset', 'preamble', 'epilogue', 'defects',
# '_default_type']
# I was curious what else is in here..
# print(list(part.__dict__.keys()))
print("\n".join(
list(map(str, [part.get_content_type(),
part.get("content-id"),
# Reveals a lot of details about this part like
# whether this part is an attachment, filename,
# filesize, etc
part.get("content-disposition")]))
))
print("---")
from email.iterators import _structure
print("STRUCTURE:")
_structure(msg)
print("")
else:
print("What happened?")
except Exception as e:
print(e)
if __name__ == "__main__":
import sys
hostname, user, password = sys.argv[1:]
print("List emails: {0} {1}".format(hostname, user))
list_emails(hostname, user, password)
@mikehwang
Copy link
Author

mikehwang commented Mar 5, 2020

Understanding multipart/* is crucial. The MIME wikipedia page has a subsection on it to understand at a concept/definition level of the different kinds of multiparts.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment