Skip to content

Instantly share code, notes, and snippets.

@cleverdevil
Last active March 4, 2024 09:28
Show Gist options
  • Star 30 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save cleverdevil/a8215850420493c1ee06364161e281c0 to your computer and use it in GitHub Desktop.
Save cleverdevil/a8215850420493c1ee06364161e281c0 to your computer and use it in GitHub Desktop.
Fetch recently played episodes from Overcast.fm. Then, publish history to my website.
'''
You'll need to pip install some dependencies:
* python-dateutil
* requests
Also, populate your EMAIL and PASSWORD below.
'''
from xml.etree import ElementTree
from datetime import datetime
from dateutil.tz import UTC
from dateutil.parser import parse as parse_dt
import conf
import re
import sys
import requests
import pickle
import os.path
import json
# load stored session, or re-authenticate
if os.path.exists(conf.SESSION_PATH):
print('Found saved session. Restoring!')
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
else:
print('No saved session. Authenticating!')
session = requests.Session()
response = session.post('https://overcast.fm/login', data={
'email': conf.EMAIL,
'password': conf.PASSWORD
})
if response.status_code != 200:
print('Authentication failed')
sys.exit(0)
print('Authenticated successfully. Saving session.')
with open(conf.SESSION_PATH, 'wb') as saved_session:
saved_session.write(pickle.dumps(session))
# fetch the latest detailed OPML export from Overcast
print('Fetching latest OPML export from Overcast')
response = session.get('https://overcast.fm/account/export_opml/extended')
if response.status_code != 200:
print('Failed to fetch OPML. Exiting.')
print(response.text)
print(response.headers)
sys.exit(0)
# cache the last OPML file
try:
with open('/tmp/overcast.opml', 'w') as f:
f.write(response.text)
except:
print('Unable to cache OPML file.')
# parse the OPML
tree = ElementTree.fromstring(response.text)
# find all podcasts and their episodes
podcasts = tree.findall(".//*[@type='rss']")
# look for recently played episodes
now = datetime.utcnow().astimezone(UTC)
for podcast in podcasts:
pod_title = podcast.attrib['title']
for episode in list(podcast):
# skip unplayed episodes
played = episode.attrib.get('played', '0') == '1'
if not played:
continue
# skip episodes played over 5 days ago
user_activity_date_raw = episode.attrib.get('userUpdatedDate')
user_activity_date = parse_dt(user_activity_date_raw)
recency = now - user_activity_date
if recency.days > 5:
continue
# parse out the remaining details we care about
title = episode.attrib['title']
published = parse_dt(episode.attrib['pubDate'])
url = episode.attrib['url']
overcast_url = episode.attrib['overcastUrl']
overcast_id = episode.attrib['overcastId']
progress = episode.attrib.get('progress')
# fetch the epside artwork
response = session.get(overcast_url)
results = re.findall('img class="art fullart" src="(.*)"', response.text)
artwork_url = ''
if len(results) == 1:
artwork_url = results[0]
else:
print('Cannot find podcast artwork for this show... skipping...')
# fetch the episode summary
results = re.findall('meta name="og:description" content="(.*)"', response.text)
summary = title
if len(results) == 1 and len(results[0]):
summary = results[0]
# publish if needed
footprint = conf.HISTORY_PATH + '/' + overcast_id
if os.path.exists(footprint):
print('Skipping already published ep ->', title)
continue
print('Played episode of ', pod_title)
print(' ->', title)
print(' ->', summary)
print(' ->', artwork_url)
print(' ->', url)
print(' ->', overcast_url)
print(' ->', user_activity_date_raw)
# build payload
data = {
'title': title,
'summary': summary,
'type': 'podcast',
'author': pod_title,
'link': overcast_url,
'listenDateTime': user_activity_date_raw
}
poster_response = requests.get(artwork_url)
response = requests.post(conf.ENDPOINT_URL, data={
'payload': json.dumps(data)
}, files={
'photo': (
artwork_url.rsplit('/', 1)[1],
poster_response.content,
poster_response.headers['Content-Type'],
{'Expires': '0'}
)
})
if response.status_code in (200, 201, 202):
open(footprint, 'w').write(json.dumps(data))
print('Successfully published!')
print(response.headers)
else:
print('Failed to publish!')
print(response.status_code)
print(response.text)
@cleverdevil
Copy link
Author

@StrikeZW everything seems to be working fine for me. Have you created a conf.py file with SESSION_PATH, EMAIL, and PASSWORD, HISTORY_PATH, and ENDPOINT_URL defined?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment