Skip to content

Instantly share code, notes, and snippets.

Last active February 4, 2020 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mbafford/5333101 to your computer and use it in GitHub Desktop.
Save mbafford/5333101 to your computer and use it in GitHub Desktop.
Quick and simple program to convert the Diecast podcast ( RSS feed into something my podcast reader can handle. It simply pulls the mp3 URL from the description and adds an enclosure element pointing at that URL.
#!/usr/bin/env python3
# Expects the AWS_ enviornment variables to be set for boto to know how to connect to AWS/S3 - they are:
# Change the bucket name from mbafford-static for your own uses.
# Obvious enhancements would be to add the necessary tags for displaying artwork for the show in podcast software.
# This is just a hacky way to take the existing blog RSS feed and make it readable by podcast software
# validated and found mostly not broken with:
import re
from urllib.request import urlopen, Request
from xml.dom.minidom import parseString
import boto
def findSourceURL(item, type):
itemxml = item.toxml()
m ="<source[^>]+src=[\"']([^\"']*\\.mp3)[\"']", itemxml)
if m: return
return None
def fetch_rss_feed_xml():
# Circumvent a potential bot blocker, see
url = ''
req = Request(url, headers={'User-Agent' : "Diecast feed generator ("})
conn = urlopen(req)
rss =
# a mix of UTF-8 and windows-1252 makes for an XML parsing error
# in the case of this script, fixing the errors isn't that important
# so just get rid of them
rss = rss.decode('utf-8', errors='replace').replace(u"\uFFFD", "")
rssxml = parseString(rss)
return rssxml
def podcastify_xml(rssxml):
# For iTunes
feedNodes = rssxml.getElementsByTagName("rss")
if feedNodes: feedNodes[0].setAttribute("xmlns:itunes", "")
for link in rssxml.getElementsByTagName("atom:link"):
link.parentNode.removeChild( link )
for item in rssxml.getElementsByTagName("item"):
mp3url = findSourceURL(item, 'mp3')
if mp3url:
if not mp3url.startswith("http"):
if mp3url.startswith("/"):
mp3url = "" + mp3url
mp3url = "" + mp3url
# remove existing enclosures, if any
enclosures = item.getElementsByTagName("enclosure")
for enclosure in enclosures:
enclosure.parentNode.removeChild( enclosure )
enclosure = rssxml.createElement("enclosure")
enclosure.setAttribute("url", mp3url)
enclosure.setAttribute("type", "audio/mpeg")
enclosure.setAttribute("length", "75000000") # for iTunes
channel = rssxml.getElementsByTagName("channel")[0]
image = rssxml.createElement("itunes:image")
image.setAttribute("href", "")
channel.insertBefore(image, channel.firstChild)
author = rssxml.createElement("itunes:author")
author.appendChild( rssxml.createTextNode("Twenty Sided") )
channel.insertBefore(author, channel.firstChild)
category = rssxml.createElement("itunes:category")
category.setAttribute("text", "Games & Hobbies")
channel.insertBefore(category, channel.firstChild)
explicit = rssxml.createElement("itunes:explicit")
explicit.appendChild( rssxml.createTextNode("no") )
channel.insertBefore(explicit, channel.firstChild)
link = rssxml.createElement("atom:link")
link.setAttribute("href", "")
link.setAttribute("rel", "self")
link.setAttribute("type", "application/rss+xml")
channel.insertBefore( link, channel.firstChild )
def upload_xml( rssxml ):
s3 = boto.connect_s3( is_secure=False )
bucket = s3.get_bucket('mbafford-static')
s3key = boto.s3.key.Key(bucket)
s3key.key = 'diecast.xml'
s3key.set_contents_from_string( rssxml.toprettyxml(encoding='utf-8'), headers={'Content-Type' : 'application/rss+xml'}, policy='public-read' )
def write_xml_to_file( rssxml, filename ):
with open(filename, 'w') as f:
f.write( rssxml.toprettyxml(encoding='utf-8').decode('utf-8') )
rssxml = fetch_rss_feed_xml()
write_xml_to_file( rssxml, 'original-rss.xml' )
podcastify_xml( rssxml )
write_xml_to_file( rssxml, 'diecast.xml' )
upload_xml( rssxml )
Copy link

Merged and re-run. Updated feed at works with iTunes 11.0.2 now. Thank you for the patch!

It's a shame that iTunes requires a length parameter that it's going to then ignore. I remember having to do this a long time ago with another similar project, but I'd forgotten all about that requirement.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment