Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Fetch recently played episodes from Then, publish history to my website.
You'll need to pip install some dependencies:
* python-dateutil
* requests
Also, populate your EMAIL and PASSWORD below.
from xml.etree import ElementTree
from datetime import datetime
from import UTC
from dateutil.parser import parse as parse_dt
import conf
import re
import sys
import requests
import pickle
import os.path
import json
# load stored session, or re-authenticate
if os.path.exists(conf.SESSION_PATH):
print('Found saved session. Restoring!')
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
print('No saved session. Authenticating!')
session = requests.Session()
response ='', data={
'email': conf.EMAIL,
'password': conf.PASSWORD
if response.status_code != 200:
print('Authentication failed')
print('Authenticated successfully. Saving session.')
with open(conf.SESSION_PATH, 'wb') as saved_session:
# fetch the latest detailed OPML export from Overcast
print('Fetching latest OPML export from Overcast')
response = session.get('')
if response.status_code != 200:
print('Failed to fetch OPML. Exiting.')
# cache the last OPML file
with open('/tmp/overcast.opml', 'w') as f:
print('Unable to cache OPML file.')
# parse the OPML
tree = ElementTree.fromstring(response.text)
# find all podcasts and their episodes
podcasts = tree.findall(".//*[@type='rss']")
# look for recently played episodes
now = datetime.utcnow().astimezone(UTC)
for podcast in podcasts:
pod_title = podcast.attrib['title']
for episode in list(podcast):
# skip unplayed episodes
played = episode.attrib.get('played', '0') == '1'
if not played:
# skip episodes played over 5 days ago
user_activity_date_raw = episode.attrib.get('userUpdatedDate')
user_activity_date = parse_dt(user_activity_date_raw)
recency = now - user_activity_date
if recency.days > 5:
# parse out the remaining details we care about
title = episode.attrib['title']
published = parse_dt(episode.attrib['pubDate'])
url = episode.attrib['url']
overcast_url = episode.attrib['overcastUrl']
overcast_id = episode.attrib['overcastId']
progress = episode.attrib.get('progress')
# fetch the epside artwork
response = session.get(overcast_url)
results = re.findall('img class="art fullart" src="(.*)"', response.text)
artwork_url = ''
if len(results) == 1:
artwork_url = results[0]
print('Cannot find podcast artwork for this show... skipping...')
# fetch the episode summary
results = re.findall('meta name="og:description" content="(.*)"', response.text)
summary = title
if len(results) == 1 and len(results[0]):
summary = results[0]
# publish if needed
footprint = conf.HISTORY_PATH + '/' + overcast_id
if os.path.exists(footprint):
print('Skipping already published ep ->', title)
print('Played episode of ', pod_title)
print(' ->', title)
print(' ->', summary)
print(' ->', artwork_url)
print(' ->', url)
print(' ->', overcast_url)
print(' ->', user_activity_date_raw)
# build payload
data = {
'title': title,
'summary': summary,
'type': 'podcast',
'author': pod_title,
'link': overcast_url,
'listenDateTime': user_activity_date_raw
poster_response = requests.get(artwork_url)
response =, data={
'payload': json.dumps(data)
}, files={
'photo': (
artwork_url.rsplit('/', 1)[1],
{'Expires': '0'}
if response.status_code in (200, 201, 202):
open(footprint, 'w').write(json.dumps(data))
print('Successfully published!')
print('Failed to publish!')
Copy link

StrikeZW commented Feb 14, 2022


is extended opml download broken somehow? i can do it via browser, but with this python script, i only get a file which is not the xml i was looking for. It seems that the authentication is not valid any longer after the session is saved ... any hints?

Copy link

cleverdevil commented Feb 22, 2022

@StrikeZW everything seems to be working fine for me. Have you created a file with SESSION_PATH, EMAIL, and PASSWORD, HISTORY_PATH, and ENDPOINT_URL defined?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment