Last active
August 29, 2015 14:10
-
-
Save stuartlynn/c9926e7faebed6d2b4c3 to your computer and use it in GitHub Desktop.
Purge unsubscribed emails
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mailbox import mbox | |
# from bs4 import BeautifulSoup | |
from pyquery import PyQuery as pq | |
import code | |
m = mbox("noreply.mbox") | |
reject_emails = [] | |
warnings = [] | |
done = 0 | |
for message in m: | |
if done%1000 == 0: | |
print "done " + str(done) + " have " + str(len(reject_emails)) +" rejects and " + str(len(warnings)) | |
done += 1 | |
if message['subject'] == "ZOONIVERSE2: Daily error monitoring report" or message['subject'] == "ZOONIVERSE: Daily error monitoring report" : | |
for part in message.walk(): | |
if(part.get_content_type()=="text/html"): | |
body = str(part) | |
parsed_html = pq(body) | |
mailtos = parsed_html('a[href^=mailto]') | |
try: | |
split_index = body.index("currently being monitored") | |
except: | |
split_index = len(body) | |
for email_link in mailtos: | |
# import code | |
# code.interact(local=locals()) | |
email = email_link.get("href").split(':')[-1] | |
try: | |
email_index = body.index(str(email)) | |
except: | |
reject_emails.append(email) | |
if email_index < split_index: | |
reject_emails.append(email) | |
else: | |
warnings.append(email) | |
f = open("reject.csv", "w") | |
for email in set(reject_emails): | |
f.write(email +"\n") | |
f = open("warnings.csv", "w") | |
for email in set(warnings): | |
f.write(email +"\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment