Skip to content

Instantly share code, notes, and snippets.

@xfxf
Forked from janetriley/download_my_podcasts.py
Last active August 23, 2017 04:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xfxf/8c5a7d8b745b20943378e48cc527f6b3 to your computer and use it in GitHub Desktop.
Save xfxf/8c5a7d8b745b20943378e48cc527f6b3 to your computer and use it in GitHub Desktop.
A Python script to download podcasts from an xml feed and tag the MP3s.
#!/usr/bin/python
"""
A script for downloading some podcasts.
Originally from https://gist.githubusercontent.com/janetriley/7762552/raw/9dcc46acb727757c4dd57d884484e04f5b664871/download_my_podcasts.py
Modified to allow fetching podcast via HTTP/HTTPS URI, bypass CloudFlare protection by detecting user agent,
removing custom tagging, and other fixes
"""
import pycurl
import os.path
import sys
import requests
from BeautifulSoup import BeautifulStoneSoup
import eyed3
from eyed3.mp3 import isMp3File
OUTPUT_DIRECTORY = "./"
def generateFilename(title, url):
url = url.split('.')
title = title.replace(' ','_')
return "%s.%s" % ( title, url[-1])
def generateFilepath(filename):
return OUTPUT_DIRECTORY + filename
def fetchContent(output_filepath, url):
'''
Download and save the content
'''
#Don't re-download
if os.path.exists(output_filepath):
return True
#Downloads are slow. Give a status.
print "Fetching", url
user_agent_header = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
destination = None
try:
destination = open(output_filepath, 'w')
remote_file_location = pycurl.Curl()
remote_file_location.setopt(pycurl.USERAGENT, user_agent_header)
remote_file_location.setopt(remote_file_location.URL, url)
remote_file_location.setopt(remote_file_location.WRITEFUNCTION, destination.write)
remote_file_location.perform()
except IOError as ex:
print "IO error saving ", url, ":", ex
destination.close()
os.remove(output_filepath)
return False
except Exception as ex:
print "Error: ", ex, " did not fetch ", output_filepath
destination.close()
os.remove(output_filepath)
return False
destination.close()
print "... done."
return True
def addTags(filepath, title, author, album):
'''
Set info tags on MP3 so iTunes will search and sort
'''
mp3 = None
try:
#Ran into some NotImplemented errors on this step
#so gave it its own special try block.
if not eyed3.mp3.isMp3File(filepath):
return
mp3 = eyed3.load(filepath)
except NotImplementedError as ex:
print "Couldn't open file %s to add tag, error was %s " % ( filepath, ex )
return
try:
if not mp3.tag:
mp3.initTag()
tag = mp3.tag
tag.artist = unicode(author)
tag.title = unicode(title)
tag.album = unicode(album)
tag.save()
except NotImplementedError as ex:
print "Couldn't add tag to %s, error was %s" % ( filepath, ex )
except Exception as ex:
print "Save tag failed, error was ", ex
if __name__ == '__main__':
#Get the rss feed with the list of items
if len(sys.argv) < 2:
print "Error: you didn't give the xml file with the RSS info. Exiting."
exit()
#rss_xml = open(sys.argv[1])
rss_xml = requests.get(sys.argv[1])
parser = BeautifulStoneSoup(rss_xml.text)
rss_xml.close()
episodes = parser.findAll("item")
for podcast in episodes:
#mp3s only - skip the videos
if podcast.enclosure['type'] != u'audio/mpeg':
continue
#Get the podcast attributes
title = str(podcast.title.string)
url = str(podcast.guid.string)
author = str(podcast.find('itunes:author').string)
album = "Downloaded Podcast"
output_filepath = generateFilepath(generateFilename(title, url))
#download and tag the file
if fetchContent(output_filepath, url):
addTags(output_filepath, title, author, album)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment