Skip to content

Instantly share code, notes, and snippets.

@woodrow
Created January 15, 2012 10:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save woodrow/1615309 to your computer and use it in GitHub Desktop.
Save woodrow/1615309 to your computer and use it in GitHub Desktop.
Determine who you sent mail to from an expiring/expired email addresss
#!/usr/bin/env python
import imaplib
import email.parser
import pickle
import pprint
GMAIL_USERNAME = 'yourname@gmail.com'
GMAIL_PASSWORD = 'yourgmailpassword'
OLD_SENDER_ADDRESS = 'defuntaccount@mit.edu'
def main():
parser = email.parser.HeaderParser()
rfields = ['to', 'cc', 'bcc']
rcpt_data = {}
gmail = imaplib.IMAP4_SSL('imap.gmail.com')
gmail.login(GMAIL_USERNAME, GMAIL_PASSWORD)
gmail.select('[Gmail]/All Mail', readonly=True)
typ, data = gmail.uid(
'search', None, 'X-GM-RAW', '"from:%s"' % OLD_SENDER_ADDRESS)
msg_ids = data[0].split()
for msg_id in msg_ids:
typ, msg_data = gmail.uid('fetch', msg_id, '(BODY.PEEK[HEADER])')
for response_part in msg_data:
if isinstance(response_part, tuple):
e = parser.parsestr(response_part[1], headersonly=True)
for field in rfields:
if e[field]:
recipients = split_recipients(e[field])
for rcpt in recipients:
name, addr = parse_recipient(rcpt)
if addr not in rcpt_data:
rcpt_data[addr] = new_recipient()
r = rcpt_data[addr]
r['names'].add(name)
r['count'][field] += 1
ranked_recipients = list_top_recipients(rcpt_data)
pprint.pprint(ranked_recipients)
f = open('recipients.csv', 'w')
f.write('recipient,num_messages\n')
for rcpt in ranked_recipients:
f.write('%s,%d\n' % rcpt)
f.close()
f = open('recipient_data.pickle', 'w')
pickle.dump(rcpt_data, f)
f.close()
def list_top_recipients(rcpt_data):
l = []
for addr in rcpt_data:
l.append((addr, sum(rcpt_data[addr]['count'].values())))
l.sort(key=lambda x: x[1], reverse=True)
return l
def split_recipients(s):
recipients = []
parts = s.partition('@')
while parts[1] == '@':
remainder = parts[2].split(' ', 1)
recipients.append(
''.join((parts[0], parts[1], remainder[0].strip(','))))
try:
parts = s.partition(remainder[1])
except IndexError:
break
return recipients
def parse_recipient(s):
name = None
addr = None
parts = s.strip().rsplit(' ', 1)
if len(parts) > 1:
name = parts[0]
addr = parts[1]
else:
addr = parts[0]
addr = addr.lower()
addr = addr.strip('<>" ')
return name, addr
def new_recipient():
d = {}
d['names'] = set()
d['count'] = {}
d['count']['to'] = 0
d['count']['cc'] = 0
d['count']['bcc'] = 0
return(d)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment