Skip to content

Instantly share code, notes, and snippets.

@jmduke
Created January 2, 2018 19:53
Show Gist options
  • Save jmduke/b8929d9466c5d4b9d2b8e21968665ad3 to your computer and use it in GitHub Desktop.
Save jmduke/b8929d9466c5d4b9d2b8e21968665ad3 to your computer and use it in GitHub Desktop.
Tinyletter Archiver
from datetime import datetime
import requests
from bs4 import BeautifulSoup
# FILL THIS IN!
tinyletter_username = 'your_username_here'
url = "http://tinyletter.com/{}/archive?recs=100&sort=desc&q=".format(tinyletter_username)
archive_response = requests.get(url)
if archive_response.status_code == 404:
raise Exception(
'No archive exists for username: ' + tinyletter_username
)
archive_html = BeautifulSoup(archive_response.text, 'html.parser')
links = archive_html.find_all('a', class_='message-link')
urls = [link.get('href') for link in links]
for email_url in reversed(urls):
email_response = requests.get(email_url)
email_html = BeautifulSoup(email_response.content.decode('utf8'), 'html.parser')
subject = email_html.find('h1', class_='subject').text.strip()
body = email_html.find('div', class_='message-body').__str__().strip()
publish_date_string = email_html.find('div', class_='date').text.strip()
publish_date = datetime.strptime(publish_date_string, '%B %d, %Y')
assert subject is not None
assert body is not None
assert publish_date is not None
filename = '{}.txt'.format(subject)
with open(filename, 'w') as outfile:
outfile.write(body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment