Skip to content

Instantly share code, notes, and snippets.

@peacing
Created August 23, 2020 20:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peacing/bdf973c23eaf0e954e02b6e9f534b3ab to your computer and use it in GitHub Desktop.
Save peacing/bdf973c23eaf0e954e02b6e9f534b3ab to your computer and use it in GitHub Desktop.
import imaplib
from bs4 import BeautifulSoup
from collections import defaultdict
def run():
imap_url = 'imap.gmail.com'
con = imaplib.IMAP4_SSL(imap_url)
con.login('youremail@gmail.com','exampleapppassword')
con.select('"INBOX"')
result, data = con.search(None, '(FROM "Tristan Handy" SUBJECT "DSR #")')
msg_ids = data[0].decode().split(' ')
sources = defaultdict(int)
for msg_id in msg_ids:
resp, msg = con.fetch(msg_id, '(BODY.PEEK[TEXT])')
html = msg[0][1]
soup = BeautifulSoup(html, 'html.parser')
links = soup.find_all('a')
for link in links:
if link.get('target') == '3D"_blank"' and link.get('style') == '3D"text-decoration:':
url = link.contents[0]
if url:
url = str(url).strip().replace('\r\n', '')
if url == 'www.stitchdata.com':
break # stop once hit sponsors
if 'Shar' not in url.replace('=','') and url != '':
print(f'url: {url}')
sources[url] += 1
sorted_sources = sorted(sources.items(), key=lambda kv: kv[1], reverse=True)
# [('towardsdatascience.com', 104)('medium.com', 84),...]
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment