Created
December 23, 2015 01:08
-
-
Save jeffskinnerbox/088020e38933615336af to your computer and use it in GitHub Desktop.
This script test the overall process of logging into an email account, going to a specific folder, looking for emails from a specific sender, and extracting desired information from those email messages.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
This script test the overall process of logging into an email account, | |
going to a specific folder, looking for emails from a specific sender, | |
and extracting desired information from those email messages. | |
Sources and Inspirations: | |
Extract emails from Gmail with Python via IMAP - http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/ # NOQA | |
""" | |
import re # regular expression operations | |
import sys # operating system specific functions | |
import email # email handling and generator package | |
import imaplib # connect with IMAP4 protocol client | |
import getpass # get user password | |
import datetime # manipulation of dates and times | |
# identify your target email server, emailbox, email message source, and folder | |
# these are the only messages to be processed and all others skipped | |
IMAP_SERVER = "imap.gmail.com" | |
EMAIL_FOLDER = "INBOX" | |
EMAIL_SOURCE = "WaterReporting@leesburgva.gov" | |
EMAIL_ACCOUNT = "waterusagejirland@gmail.com" | |
# this is where a copy of your target emails will be dumped for debugging | |
OUTPUT_DIRECTORY = "/home/jeff/tmp/email_dump" | |
# dump the whole email message into a file for debugging | |
def dump_message(num, data): | |
print "\n\nWriting message", num, "to debug directory" | |
# open the file where the message is to be dumped | |
try: | |
f = open('%s/%s.eml' % (OUTPUT_DIRECTORY, num), 'wb') | |
except (OSError, IOError) as e: | |
print "ERROR: Problem with debug dumping directory", e | |
sys.exit(1) | |
# write the message into the file | |
f.write(data[0][1]) | |
f.close() | |
def parse_message(num, data): | |
# get the whole message in the form of a string | |
msg = email.message_from_string(data[0][1]) | |
# parse the message's sender, receiver, and content type | |
str1 = re.sub('[<>]', '', msg['From']) | |
str2 = re.sub('[<>]', '', msg['Delivered-To']) | |
if str1 == EMAIL_SOURCE: | |
print "Sender:", str1 | |
print "Receiver:", str2 | |
print "Content Type:", msg['Content-Type'] | |
else: | |
print "This is not a target message" | |
return 0 | |
# parse out the message's subject and date | |
print 'Message %s Subject: %s' % (num, msg['Subject']) | |
print 'Raw Date:', msg['Date'] | |
# reformat date to make it more user friendly | |
date_tuple = email.utils.parsedate_tz(msg['Date']) | |
if date_tuple: | |
local_date = datetime.datetime.fromtimestamp( | |
email.utils.mktime_tz(date_tuple)) | |
print "Day, Date, Time:", \ | |
local_date.strftime("%a, %d %b %Y, %H:%M:%S") | |
# parse out the message's body (assumes the message is not multi-part) | |
print "multipart = ", msg.is_multipart() | |
if msg.is_multipart(): | |
print "ERROR: Payload is multipart and can't be processed." | |
print "Message Body:", msg.get_payload() | |
return 1 | |
else: | |
print "Message Body:", msg.get_payload() | |
return 0 | |
def process_mailbox(M): | |
# get the message sequence number so you can loop over all the messages | |
rv, data = M.search(None, "ALL") | |
if rv != 'OK': | |
print "No messages found in mailbox!\n" | |
return 0 | |
# fetch the raw message contents for each of the messages | |
for num in data[0].split(): | |
rv, data = M.fetch(num, '(RFC822)') | |
if rv != 'OK': | |
print "ERROR: Error getting message. error code = ", num | |
return 1 | |
# dump the mail message to a directory | |
dump_message(num, data) | |
# parse the email message | |
rtn = parse_message(num, data) | |
return rtn | |
def list_folders(M): | |
rv, folders = M.list() | |
if rv == 'OK': | |
print EMAIL_ACCOUNT, "Folders are:" | |
print folders, "\n" | |
return 0 | |
else: | |
print "ERROR: Unable to get list of mailbox folders ", rv | |
return 1 | |
def main(): | |
# get the password from the user | |
PASSWORD = getpass.getpass() | |
# create an IMAP4 instance (of the SSL variant for security) | |
# and connected to the Gmail server at imap.gmail.com | |
mailbox = imaplib.IMAP4_SSL(IMAP_SERVER, 993) | |
# attempt to login, and if the login fails, raise and exception | |
try: | |
rv, data = mailbox.login(EMAIL_ACCOUNT, PASSWORD) | |
except imaplib.IMAP4.error: | |
print "ERROR: LOGIN FAILED!!!" | |
sys.exit(1) | |
print rv, data, "\n" | |
# get a list of the folders associated with the mailbox | |
if list_folders(mailbox): | |
sys.exit(1) | |
# open the mailbox folder you are interested in and process it | |
rv, data = mailbox.select(EMAIL_FOLDER) | |
if rv == 'OK': | |
print "Processing mailbox folder", EMAIL_FOLDER, "..." | |
rtn = process_mailbox(mailbox) | |
mailbox.close() | |
sys.exit(rtn) | |
else: | |
print "ERROR: Unable to open mailbox ", rv | |
mailbox.logout() | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment