Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Fetch recently played episodes from Overcast.fm. Then, publish history to my website.
'''
You'll need to pip install some dependencies:
* python-dateutil
* requests
Also, populate your EMAIL and PASSWORD below.
'''
from xml.etree import ElementTree
from datetime import datetime
from dateutil.tz import UTC
from dateutil.parser import parse as parse_dt
import conf
import re
import sys
import requests
import pickle
import os.path
import json
# load stored session, or re-authenticate
if os.path.exists(conf.SESSION_PATH):
print('Found saved session. Restoring!')
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
else:
print('No saved session. Authenticating!')
session = requests.Session()
response = session.post('https://overcast.fm/login', data={
'email': conf.EMAIL,
'password': conf.PASSWORD
})
if response.status_code != 200:
print('Authentication failed')
sys.exit(0)
print('Authenticated successfully. Saving session.')
with open(conf.SESSION_PATH, 'wb') as saved_session:
saved_session.write(pickle.dumps(session))
# fetch the latest detailed OPML export from Overcast
print('Fetching latest OPML export from Overcast')
response = session.get('https://overcast.fm/account/export_opml/extended')
if response.status_code != 200:
print('Failed to fetch OPML. Exiting.')
print(response.text)
print(response.headers)
sys.exit(0)
# cache the last OPML file
try:
with open('/tmp/overcast.opml', 'w') as f:
f.write(response.text)
except:
print('Unable to cache OPML file.')
# parse the OPML
tree = ElementTree.fromstring(response.text)
# find all podcasts and their episodes
podcasts = tree.findall(".//*[@type='rss']")
# look for recently played episodes
now = datetime.utcnow().astimezone(UTC)
for podcast in podcasts:
pod_title = podcast.attrib['title']
for episode in list(podcast):
# skip unplayed episodes
played = episode.attrib.get('played', '0') == '1'
if not played:
continue
# skip episodes played over 5 days ago
user_activity_date_raw = episode.attrib.get('userUpdatedDate')
user_activity_date = parse_dt(user_activity_date_raw)
recency = now - user_activity_date
if recency.days > 5:
continue
# parse out the remaining details we care about
title = episode.attrib['title']
published = parse_dt(episode.attrib['pubDate'])
url = episode.attrib['url']
overcast_url = episode.attrib['overcastUrl']
overcast_id = episode.attrib['overcastId']
progress = episode.attrib.get('progress')
# fetch the epside artwork
response = session.get(overcast_url)
results = re.findall('img class="art fullart" src="(.*)"', response.text)
artwork_url = ''
if len(results) == 1:
artwork_url = results[0]
else:
print('Cannot find podcast artwork for this show... skipping...')
# fetch the episode summary
results = re.findall('meta name="og:description" content="(.*)"', response.text)
summary = title
if len(results) == 1 and len(results[0]):
summary = results[0]
# publish if needed
footprint = conf.HISTORY_PATH + '/' + overcast_id
if os.path.exists(footprint):
print('Skipping already published ep ->', title)
continue
print('Played episode of ', pod_title)
print(' ->', title)
print(' ->', summary)
print(' ->', artwork_url)
print(' ->', url)
print(' ->', overcast_url)
print(' ->', user_activity_date_raw)
# build payload
data = {
'title': title,
'summary': summary,
'type': 'podcast',
'author': pod_title,
'link': overcast_url,
'listenDateTime': user_activity_date_raw
}
poster_response = requests.get(artwork_url)
response = requests.post(conf.ENDPOINT_URL, data={
'payload': json.dumps(data)
}, files={
'photo': (
artwork_url.rsplit('/', 1)[1],
poster_response.content,
poster_response.headers['Content-Type'],
{'Expires': '0'}
)
})
if response.status_code in (200, 201, 202):
open(footprint, 'w').write(json.dumps(data))
print('Successfully published!')
print(response.headers)
else:
print('Failed to publish!')
print(response.status_code)
print(response.text)
@StrikeZW
Copy link

StrikeZW commented Feb 14, 2022

Hello,

is extended opml download broken somehow? i can do it via browser, but with this python script, i only get a file which is not the xml i was looking for. It seems that the authentication is not valid any longer after the session is saved ... any hints?

@cleverdevil
Copy link
Author

cleverdevil commented Feb 22, 2022

@StrikeZW everything seems to be working fine for me. Have you created a conf.py file with SESSION_PATH, EMAIL, and PASSWORD, HISTORY_PATH, and ENDPOINT_URL defined?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment