Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script to extract all email addresses from all emails in all folders of an IMAP account.
"""Create a connection to an IMAP server and find ALL email addresses
Original script by abought: https://gist.github.com/abought/15a1e08705b121c1b7bd
References:
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/
and
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
"""
__author__ = 'jmb'
import email
import imaplib
import getpass
import sys
import re
import os
# EDIT these as required:
FILENAME = 'out.txt'
DEFAULT_MAIL_SERVER = 'imap.server'
# No user parameters below this line
# ADDR_PATTERN = re.compile('<(.*?)>') # Finds email as <nospam@nospam.com>
# Find ALL email address in all fields:
ADDR_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def connect(user, pwd, server=DEFAULT_MAIL_SERVER):
"""Connect to [the specified] mail server. Return an open connection"""
conn = imaplib.IMAP4_SSL(server)
try:
conn.login(user, pwd)
except imaplib.IMAP4.error:
print "Failed to login"
sys.exit(1)
return conn
def print_folders(conn):
"""Print a list of open mailbox folders"""
for f in conn.list():
print " ", f
def get_folder(conn, folder_name):
"""Fetch a specific folder (or label) from server"""
if conn.state == "SELECTED":
# Explicitly close any previously opened folders; may not be necessary
conn.close()
rv, data = conn.select(folder_name)
if rv != 'OK':
print "Could not open specified folder. Known labels:"
print_folders(conn)
return conn
def get_email_ids(conn, query='ALL'):
"""Get the numeric IDs for all emails in a given folder"""
if conn.state != "SELECTED":
raise imaplib.IMAP4.error("Cannot search without selecting a folder")
rv, data = conn.uid('search', None, query)
if rv != 'OK':
print "Could not fetch email ids" # for some reason...
return []
return data[0].split()
def fetch_message(conn, msg_uid):
"""
Fetch a specific message uid (not sequential id!) from the given folder;
return the parsed message. User must ensure that specified
message ID exists in that folder.
"""
# TODO: Could we fetch just the envelope of the response to save bandwidth?
rv, data = conn.uid('fetch', msg_uid, "(RFC822)")
if rv != 'OK':
print "ERROR fetching message #", msg_uid
return {}
return email.message_from_string(data[0][1]) # dict-like object
def get_recipients(msg_parsed):
"""Given a parsed message, extract and return recipient list"""
recipients = []
addr_fields = ['From', 'To', 'Cc', 'Bcc']
for f in addr_fields:
rfield = msg_parsed.get(f, "") # Empty string if field not present
rlist = re.findall(ADDR_PATTERN, rfield)
recipients.extend(rlist)
return recipients
if __name__ == "__main__":
username = raw_input("Full email address: ")
password = getpass.getpass()
# Connect
mail_conn = connect(username, password)
# Open output file
file = open(FILENAME, "a")
# Go through each folder
for f in mail_conn.list()[1]:
folder = f.split()[2].strip('"')
if folder == ".":
continue
mail_conn = get_folder(mail_conn, folder)
msg_uid_list = get_email_ids(mail_conn)
print "Scanning folder: ", folder, " with ", len(msg_uid_list), " messages"
# Fetch a list of recipients
all_recipients = []
for msg_uid in msg_uid_list:
msg = fetch_message(mail_conn, msg_uid)
recip_list = get_recipients(msg)
all_recipients.extend(recip_list)
print "Writing", len(set(all_recipients)), "email addresses to file", file.name
output = ""
for address in set(all_recipients):
output = output + address + "\n"
file.write(output)
file.flush()
os.fsync(file.fileno())
file.close()
print("\nWritten to file: " + FILENAME)
try:
mail_conn.close() # Close currently selected folder (if any)
finally:
mail_conn.logout()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.