tyndyll/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Download From Overcast

Overcast on iOS is my primary podcasting
listening method, but I have an occasional need to download podcasts for
archival or offline listening purposes. This script takes advantage of
Overcast's permalink and showpage to get the podcast author and title
before downloading the podcast itself from the original page.
Usage: python download_from_overcast.py <overcast_url>
e.g. python download_from_overcast.py https://overcast.fm/+MWUwqlFc

Also available as an Alfred Workflow

  
## download_from_overcast.py
#!/usr/bin/env python
#
# [https://overcast.fm/](Overcast) on iOS is my primary podcasting
# listening method, but I have an occasional need to download podcasts for
# archival or offline listening purposes. This script takes advantage of
# Overcast's permalink and showpage to get the podcast author and title
# before downloading the podcast itself from the original page.
#
# Usage: python download_from_overcast.py <overcast_url>
# e.g. python download_from_overcast.py https://overcast.fm/+MWUwqlFc

import os
import re
import sys

from urllib2 import Request
from urllib2 import urlopen
from urllib import urlretrieve


def get_title(html_str):
    """Get the title from the meta tags"""

    title = re.findall(r"<meta name=\"og:title\" content=\"(.+)\"", html_str)
    if len(title) == 1:
        return title[0].replace("&mdash;", "-")
    return None


def get_description(html_str):
    """Get the description from the Meta tag"""

    desc_re = r"<meta name=\"og:description\" content=\"(.+)\""
    description = re.findall(desc_re, html_str)
    if len(description) == 1:
        return description[0]
    return None


def get_url(html_string):
    """Find the URL from the <audio><source>.... tag"""

    url = re.findall(r"<source src=\"(.+?)\"", html_string)
    if len(url) == 1:
        # strip off the last 4 characters to cater for the #t=0 in the URL
        # which urlretrieve flags as invalid
        return url[0][:-4]
    return None


def download(source_url):
    """Given a Overcast source URL fetch the file it points to"""
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
        "AppleWebKit/537.11 (KHTML, like Gecko) "
        "Chrome/23.0.1271.64 Safari/537.11",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
        "Accept-Encoding": "none",
        "Accept-Language": "en-US,en;q=0.8",
        "Connection": "keep-alive",
    }
    req = Request(source_url, None, headers)
    source_data = urlopen(req).read()
    title = get_title(source_data)
    url = get_url(source_data)

    if url is None or title is None:
        sys.exit("Could not find parse URL")

    output_format = url[-4:] if url[-4] == "." else ".mp3"
    output_file = "{}{}".format(title, output_format)
    urlretrieve(url, output_file)


if __name__ == "__main__":
    if len(sys.argv) != 2:
        sys.exit("{} <overcast_url>".format(__file__))
    download(sys.argv[1])
	#!/usr/bin/env python
	#
	# [https://overcast.fm/](Overcast) on iOS is my primary podcasting
	# listening method, but I have an occasional need to download podcasts for
	# archival or offline listening purposes. This script takes advantage of
	# Overcast's permalink and showpage to get the podcast author and title
	# before downloading the podcast itself from the original page.
	#
	# Usage: python download_from_overcast.py <overcast_url>
	# e.g. python download_from_overcast.py https://overcast.fm/+MWUwqlFc

	import os
	import re
	import sys

	from urllib2 import Request
	from urllib2 import urlopen
	from urllib import urlretrieve


	def get_title(html_str):
	"""Get the title from the meta tags"""

	title = re.findall(r"<meta name=\"og:title\" content=\"(.+)\"", html_str)
	if len(title) == 1:
	return title[0].replace("—", "-")
	return None


	def get_description(html_str):
	"""Get the description from the Meta tag"""

	desc_re = r"<meta name=\"og:description\" content=\"(.+)\""
	description = re.findall(desc_re, html_str)
	if len(description) == 1:
	return description[0]
	return None


	def get_url(html_string):
	"""Find the URL from the <audio><source>.... tag"""

	url = re.findall(r"<source src=\"(.+?)\"", html_string)
	if len(url) == 1:
	# strip off the last 4 characters to cater for the #t=0 in the URL
	# which urlretrieve flags as invalid
	return url[0][:-4]
	return None


	def download(source_url):
	"""Given a Overcast source URL fetch the file it points to"""
	headers = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
	"AppleWebKit/537.11 (KHTML, like Gecko) "
	"Chrome/23.0.1271.64 Safari/537.11",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
	"Accept-Encoding": "none",
	"Accept-Language": "en-US,en;q=0.8",
	"Connection": "keep-alive",
	}
	req = Request(source_url, None, headers)
	source_data = urlopen(req).read()
	title = get_title(source_data)
	url = get_url(source_data)

	if url is None or title is None:
	sys.exit("Could not find parse URL")

	output_format = url[-4:] if url[-4] == "." else ".mp3"
	output_file = "{}{}".format(title, output_format)
	urlretrieve(url, output_file)


	if __name__ == "__main__":
	if len(sys.argv) != 2:
	sys.exit("{} <overcast_url>".format(__file__))
	download(sys.argv[1])