Fetch recently played episodes from Overcast.fm. Then, publish history to my website.
''' | |
You'll need to pip install some dependencies: | |
* python-dateutil | |
* requests | |
Also, populate your EMAIL and PASSWORD below. | |
''' | |
from xml.etree import ElementTree | |
from datetime import datetime | |
from dateutil.tz import UTC | |
from dateutil.parser import parse as parse_dt | |
import conf | |
import re | |
import sys | |
import requests | |
import pickle | |
import os.path | |
import json | |
# load stored session, or re-authenticate | |
if os.path.exists(conf.SESSION_PATH): | |
print('Found saved session. Restoring!') | |
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read()) | |
else: | |
print('No saved session. Authenticating!') | |
session = requests.Session() | |
response = session.post('https://overcast.fm/login', data={ | |
'email': conf.EMAIL, | |
'password': conf.PASSWORD | |
}) | |
if response.status_code != 200: | |
print('Authentication failed') | |
sys.exit(0) | |
print('Authenticated successfully. Saving session.') | |
with open(conf.SESSION_PATH, 'wb') as saved_session: | |
saved_session.write(pickle.dumps(session)) | |
# fetch the latest detailed OPML export from Overcast | |
print('Fetching latest OPML export from Overcast') | |
response = session.get('https://overcast.fm/account/export_opml/extended') | |
if response.status_code != 200: | |
print('Failed to fetch OPML. Exiting.') | |
print(response.text) | |
print(response.headers) | |
sys.exit(0) | |
# cache the last OPML file | |
try: | |
with open('/tmp/overcast.opml', 'w') as f: | |
f.write(response.text) | |
except: | |
print('Unable to cache OPML file.') | |
# parse the OPML | |
tree = ElementTree.fromstring(response.text) | |
# find all podcasts and their episodes | |
podcasts = tree.findall(".//*[@type='rss']") | |
# look for recently played episodes | |
now = datetime.utcnow().astimezone(UTC) | |
for podcast in podcasts: | |
pod_title = podcast.attrib['title'] | |
for episode in list(podcast): | |
# skip unplayed episodes | |
played = episode.attrib.get('played', '0') == '1' | |
if not played: | |
continue | |
# skip episodes played over 5 days ago | |
user_activity_date_raw = episode.attrib.get('userUpdatedDate') | |
user_activity_date = parse_dt(user_activity_date_raw) | |
recency = now - user_activity_date | |
if recency.days > 5: | |
continue | |
# parse out the remaining details we care about | |
title = episode.attrib['title'] | |
published = parse_dt(episode.attrib['pubDate']) | |
url = episode.attrib['url'] | |
overcast_url = episode.attrib['overcastUrl'] | |
overcast_id = episode.attrib['overcastId'] | |
progress = episode.attrib.get('progress') | |
# fetch the epside artwork | |
response = session.get(overcast_url) | |
results = re.findall('img class="art fullart" src="(.*)"', response.text) | |
artwork_url = '' | |
if len(results) == 1: | |
artwork_url = results[0] | |
else: | |
print('Cannot find podcast artwork for this show... skipping...') | |
# fetch the episode summary | |
results = re.findall('meta name="og:description" content="(.*)"', response.text) | |
summary = title | |
if len(results) == 1 and len(results[0]): | |
summary = results[0] | |
# publish if needed | |
footprint = conf.HISTORY_PATH + '/' + overcast_id | |
if os.path.exists(footprint): | |
print('Skipping already published ep ->', title) | |
continue | |
print('Played episode of ', pod_title) | |
print(' ->', title) | |
print(' ->', summary) | |
print(' ->', artwork_url) | |
print(' ->', url) | |
print(' ->', overcast_url) | |
print(' ->', user_activity_date_raw) | |
# build payload | |
data = { | |
'title': title, | |
'summary': summary, | |
'type': 'podcast', | |
'author': pod_title, | |
'link': overcast_url, | |
'listenDateTime': user_activity_date_raw | |
} | |
poster_response = requests.get(artwork_url) | |
response = requests.post(conf.ENDPOINT_URL, data={ | |
'payload': json.dumps(data) | |
}, files={ | |
'photo': ( | |
artwork_url.rsplit('/', 1)[1], | |
poster_response.content, | |
poster_response.headers['Content-Type'], | |
{'Expires': '0'} | |
) | |
}) | |
if response.status_code in (200, 201, 202): | |
open(footprint, 'w').write(json.dumps(data)) | |
print('Successfully published!') | |
print(response.headers) | |
else: | |
print('Failed to publish!') | |
print(response.status_code) | |
print(response.text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment