Skip to content

Instantly share code, notes, and snippets.

@yayadrian
Forked from cleverdevil/overcast-recently-played.py
Last active May 25, 2023 07:59
Show Gist options
  • Save yayadrian/67ea73d5e6635d8a41561f4608053539 to your computer and use it in GitHub Desktop.
Save yayadrian/67ea73d5e6635d8a41561f4608053539 to your computer and use it in GitHub Desktop.
Fetch recently played episodes from Overcast.fm. Then, create a CSV and create files in Obsidian using a template
# conf.py
SESSION_PATH = "./sessions" # will create the sessions file in the current folder
EMAIL = "CHANGE-ME"
PASSWORD = "CHANGE-ME"
HISTORY_PATH = "./history" # will create the records of episodes in the history folder
ENDPOINT_URL = "https://api.example.com/endpoint"
EXPORT_FILENAME = "./playedEpisodes.csv" # export file for the episodes played
MD_TEMPLATE = 'CHANGE-ME' # path to the obsidian template MD
OUTPUT_FOLDER = 'CHANGE-ME' # path for the Obsidian output folder
NUMBER_DAYS_BACK = 5 # how many days to go back in history to record
'''
https://gist.github.com/cleverdevil/a8215850420493c1ee06364161e281c0
You'll need to pip install some dependencies:
* python-dateutil
* requests
Also, populate your EMAIL and PASSWORD below.
'''
from xml.etree import ElementTree
from datetime import datetime
from dateutil.tz import UTC
from dateutil.parser import parse as parse_dt
import conf
import re
import sys
import requests
import pickle
import os.path
import json
import csv
# load stored session, or re-authenticate
if os.path.exists(conf.SESSION_PATH):
print('Found saved session. Restoring!')
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
else:
print('No saved session. Authenticating!')
session = requests.Session()
response = session.post('https://overcast.fm/login', data={
'email': conf.EMAIL,
'password': conf.PASSWORD
})
if response.status_code != 200:
print('Authentication failed')
sys.exit(0)
print('Authenticated successfully. Saving session.')
with open(conf.SESSION_PATH, 'wb') as saved_session:
saved_session.write(pickle.dumps(session))
# fetch the latest detailed OPML export from Overcast
print('Fetching latest OPML export from Overcast')
response = session.get('https://overcast.fm/account/export_opml/extended')
if response.status_code != 200:
print('Failed to fetch OPML. Exiting.')
print(response.text)
print(response.headers)
sys.exit(0)
# cache the last OPML file
try:
with open('/tmp/overcast.opml', 'w') as f:
f.write(response.text)
except:
print('Unable to cache OPML file.')
# parse the OPML
tree = ElementTree.fromstring(response.text)
# find all podcasts and their episodes
podcasts = tree.findall(".//*[@type='rss']")
# look for recently played episodes
now = datetime.utcnow().astimezone(UTC)
for podcast in podcasts:
pod_title = podcast.attrib['title']
for episode in list(podcast):
# skip unplayed episodes
played = episode.attrib.get('played', '0') == '1'
if not played:
continue
# skip episodes played over 5 days ago
user_activity_date_raw = episode.attrib.get('userUpdatedDate')
user_activity_date = parse_dt(user_activity_date_raw)
recency = now - user_activity_date
if recency.days > conf.NUMBER_DAYS_BACK:
continue
# parse out the remaining details we care about
title = episode.attrib['title']
published = parse_dt(episode.attrib['pubDate'])
published_str = episode.attrib['pubDate']
url = episode.attrib['url']
overcast_url = episode.attrib['overcastUrl']
overcast_id = episode.attrib['overcastId']
progress = episode.attrib.get('progress')
# Format the datetime object as a string in the "YYYY-MM-DD" format
published_date_str = published.strftime('%Y-%m-%d')
listen_date_str = user_activity_date.strftime('%Y-%m-%d')
# fetch the epside artwork
response = session.get(overcast_url)
results = re.findall('img class="art fullart" src="(.*)"', response.text)
artwork_url = ''
if len(results) == 1:
artwork_url = results[0]
else:
print('Cannot find podcast artwork for this show... skipping...')
# fetch the episode summary
results = re.findall('meta name="og:description" content="(.*)"', response.text)
summary = title
if len(results) == 1 and len(results[0]):
summary = results[0]
# fetch the episode length (timeremaining)
# TODO: Find where to get length from
# results = re.findall('<span id="timeremaining" class="lighttext">(.*)', response.text)
duration = '00:00'
# if len(results) == 1:
# duration = results[0]
# if len(results) == 1:
# print('****** Found DURATION!!!! ')
# duration = results[0]
# else:
# print('****** can not find duration')
# publish if needed
footprint = conf.HISTORY_PATH + '/' + overcast_id
if os.path.exists(footprint):
print('Skipping already published ep ->', title)
continue
print('Played episode of ', pod_title)
print(' ->', title)
print(' ->', summary)
print(' ->', published)
print(' ->', artwork_url)
print(' ->', url)
print(' ->', overcast_url)
print(' ->', duration)
print(' ->', user_activity_date_raw)
# build payload
episodeData = {
'overcast_id': overcast_id,
'title': title,
'summary': summary,
'published': published,
'type': 'podcast',
'author': pod_title,
'link': overcast_url,
'duration': duration,
'listenDateTime': user_activity_date_raw,
'artwork_url': artwork_url
}
poster_response = requests.get(artwork_url)
# CREATE CSV
# Open the CSV file in append mode
with open(conf.EXPORT_FILENAME, 'a', newline='') as csvfile:
# Create a CSV writer using a fieldnames list
fieldnames = ['overcast_id', 'title', 'summary', 'published', 'type', 'author', 'link', 'duration', 'listenDateTime', 'artwork_url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# Add a new row to the CSV file with the values you want
writer.writerow(episodeData)
open(footprint, 'w').write("All good")
# Obsidian create page
# Open the template file in read mode
with open(conf.MD_TEMPLATE, 'r') as template_file:
# Read the contents of the file into a string
template_str = template_file.read()
# Perform find and replace operations on the template string
modified_str = template_str.replace('{{Title}}', episodeData['title'])
modified_str = modified_str.replace('{{Description}}', episodeData['summary'])
modified_str = modified_str.replace('{{PodcastURL}}', episodeData['link'])
modified_str = modified_str.replace('{{listenedDate}}', listen_date_str)
modified_str = modified_str.replace('{{EpisodeDate}}', published_date_str)
modified_str = modified_str.replace('{{author}}', episodeData['author'])
modified_str = modified_str.replace('{{ImageURL}}', episodeData['artwork_url'])
# Clean the title and author strings
clean_title = re.sub(r'\W+', ' ', episodeData['title'])
clean_author = re.sub(r'\W+', ' ', episodeData['author'])
# Format the output file name using the cleaned strings
output_file_name = conf.OUTPUT_FOLDER + '{} - {}.md'.format(clean_title, clean_author)
# Open the output file in write mode using the formatted file name
with open(output_file_name, 'w') as output_file:
# Write the modified string to the output file
output_file.write(modified_str)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment