Created
January 2, 2018 19:53
-
-
Save jmduke/b8929d9466c5d4b9d2b8e21968665ad3 to your computer and use it in GitHub Desktop.
Tinyletter Archiver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import requests | |
from bs4 import BeautifulSoup | |
# FILL THIS IN! | |
tinyletter_username = 'your_username_here' | |
url = "http://tinyletter.com/{}/archive?recs=100&sort=desc&q=".format(tinyletter_username) | |
archive_response = requests.get(url) | |
if archive_response.status_code == 404: | |
raise Exception( | |
'No archive exists for username: ' + tinyletter_username | |
) | |
archive_html = BeautifulSoup(archive_response.text, 'html.parser') | |
links = archive_html.find_all('a', class_='message-link') | |
urls = [link.get('href') for link in links] | |
for email_url in reversed(urls): | |
email_response = requests.get(email_url) | |
email_html = BeautifulSoup(email_response.content.decode('utf8'), 'html.parser') | |
subject = email_html.find('h1', class_='subject').text.strip() | |
body = email_html.find('div', class_='message-body').__str__().strip() | |
publish_date_string = email_html.find('div', class_='date').text.strip() | |
publish_date = datetime.strptime(publish_date_string, '%B %d, %Y') | |
assert subject is not None | |
assert body is not None | |
assert publish_date is not None | |
filename = '{}.txt'.format(subject) | |
with open(filename, 'w') as outfile: | |
outfile.write(body) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment