Created
January 15, 2012 10:12
-
-
Save woodrow/1615309 to your computer and use it in GitHub Desktop.
Determine who you sent mail to from an expiring/expired email addresss
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import imaplib | |
import email.parser | |
import pickle | |
import pprint | |
GMAIL_USERNAME = 'yourname@gmail.com' | |
GMAIL_PASSWORD = 'yourgmailpassword' | |
OLD_SENDER_ADDRESS = 'defuntaccount@mit.edu' | |
def main(): | |
parser = email.parser.HeaderParser() | |
rfields = ['to', 'cc', 'bcc'] | |
rcpt_data = {} | |
gmail = imaplib.IMAP4_SSL('imap.gmail.com') | |
gmail.login(GMAIL_USERNAME, GMAIL_PASSWORD) | |
gmail.select('[Gmail]/All Mail', readonly=True) | |
typ, data = gmail.uid( | |
'search', None, 'X-GM-RAW', '"from:%s"' % OLD_SENDER_ADDRESS) | |
msg_ids = data[0].split() | |
for msg_id in msg_ids: | |
typ, msg_data = gmail.uid('fetch', msg_id, '(BODY.PEEK[HEADER])') | |
for response_part in msg_data: | |
if isinstance(response_part, tuple): | |
e = parser.parsestr(response_part[1], headersonly=True) | |
for field in rfields: | |
if e[field]: | |
recipients = split_recipients(e[field]) | |
for rcpt in recipients: | |
name, addr = parse_recipient(rcpt) | |
if addr not in rcpt_data: | |
rcpt_data[addr] = new_recipient() | |
r = rcpt_data[addr] | |
r['names'].add(name) | |
r['count'][field] += 1 | |
ranked_recipients = list_top_recipients(rcpt_data) | |
pprint.pprint(ranked_recipients) | |
f = open('recipients.csv', 'w') | |
f.write('recipient,num_messages\n') | |
for rcpt in ranked_recipients: | |
f.write('%s,%d\n' % rcpt) | |
f.close() | |
f = open('recipient_data.pickle', 'w') | |
pickle.dump(rcpt_data, f) | |
f.close() | |
def list_top_recipients(rcpt_data): | |
l = [] | |
for addr in rcpt_data: | |
l.append((addr, sum(rcpt_data[addr]['count'].values()))) | |
l.sort(key=lambda x: x[1], reverse=True) | |
return l | |
def split_recipients(s): | |
recipients = [] | |
parts = s.partition('@') | |
while parts[1] == '@': | |
remainder = parts[2].split(' ', 1) | |
recipients.append( | |
''.join((parts[0], parts[1], remainder[0].strip(',')))) | |
try: | |
parts = s.partition(remainder[1]) | |
except IndexError: | |
break | |
return recipients | |
def parse_recipient(s): | |
name = None | |
addr = None | |
parts = s.strip().rsplit(' ', 1) | |
if len(parts) > 1: | |
name = parts[0] | |
addr = parts[1] | |
else: | |
addr = parts[0] | |
addr = addr.lower() | |
addr = addr.strip('<>" ') | |
return name, addr | |
def new_recipient(): | |
d = {} | |
d['names'] = set() | |
d['count'] = {} | |
d['count']['to'] = 0 | |
d['count']['cc'] = 0 | |
d['count']['bcc'] = 0 | |
return(d) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment