Skip to content

Instantly share code, notes, and snippets.

@tyndyll
Last active November 4, 2021 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tyndyll/5fcdfa75bd1663e7f976 to your computer and use it in GitHub Desktop.
Save tyndyll/5fcdfa75bd1663e7f976 to your computer and use it in GitHub Desktop.
Download Podcast from Overcast Permalinks

Download From Overcast

Overcast on iOS is my primary podcasting listening method, but I have an occasional need to download podcasts for archival or offline listening purposes. This script takes advantage of Overcast's permalink and showpage to get the podcast author and title before downloading the podcast itself from the original page.

Usage: python download_from_overcast.py <overcast_url>
e.g. python download_from_overcast.py https://overcast.fm/+MWUwqlFc

Also available as an Alfred Workflow

#!/usr/bin/env python
#
# [https://overcast.fm/](Overcast) on iOS is my primary podcasting
# listening method, but I have an occasional need to download podcasts for
# archival or offline listening purposes. This script takes advantage of
# Overcast's permalink and showpage to get the podcast author and title
# before downloading the podcast itself from the original page.
#
# Usage: python download_from_overcast.py <overcast_url>
# e.g. python download_from_overcast.py https://overcast.fm/+MWUwqlFc
import os
import re
import sys
from urllib2 import Request
from urllib2 import urlopen
from urllib import urlretrieve
def get_title(html_str):
"""Get the title from the meta tags"""
title = re.findall(r"<meta name=\"og:title\" content=\"(.+)\"", html_str)
if len(title) == 1:
return title[0].replace("&mdash;", "-")
return None
def get_description(html_str):
"""Get the description from the Meta tag"""
desc_re = r"<meta name=\"og:description\" content=\"(.+)\""
description = re.findall(desc_re, html_str)
if len(description) == 1:
return description[0]
return None
def get_url(html_string):
"""Find the URL from the <audio><source>.... tag"""
url = re.findall(r"<source src=\"(.+?)\"", html_string)
if len(url) == 1:
# strip off the last 4 characters to cater for the #t=0 in the URL
# which urlretrieve flags as invalid
return url[0][:-4]
return None
def download(source_url):
"""Given a Overcast source URL fetch the file it points to"""
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.11 (KHTML, like Gecko) "
"Chrome/23.0.1271.64 Safari/537.11",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
}
req = Request(source_url, None, headers)
source_data = urlopen(req).read()
title = get_title(source_data)
url = get_url(source_data)
if url is None or title is None:
sys.exit("Could not find parse URL")
output_format = url[-4:] if url[-4] == "." else ".mp3"
output_file = "{}{}".format(title, output_format)
urlretrieve(url, output_file)
if __name__ == "__main__":
if len(sys.argv) != 2:
sys.exit("{} <overcast_url>".format(__file__))
download(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment