Skip to content

Instantly share code, notes, and snippets.

@teffalump
Created May 13, 2011 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save teffalump/971388 to your computer and use it in GitHub Desktop.
Save teffalump/971388 to your computer and use it in GitHub Desktop.
Extract xml file from itunes subscribe page. That is, get the rss podcast feed from some bullshit iTunes crap
#!/usr/bin/python3
#Extract xml file from iTunes subscribe bullshit
'''
That is, take a subscribe page
(e.g., http://itunes.apple.com/us/podcast/a-state-trance-official-podcast/id260190086)
and find the original xml playlist, so you don't have to have iShit or w/e.
'''
import sys,urllib.parse,urllib.request
import xml.parsers.expat
#spoof user agent
heads = {
"User-Agent": "iTunes/10.2.2"
}
#url for itunes podcast page
base_itunes_url = "https://buy.itunes.apple.com/WebObjects/MZFinance.woa/wa/com.apple.jingle.app.finance.DirectAction/subscribePodcast"
param = {
"id": None, #we'll need to find this
"wasWarnedAboutPodcasts": "true"
}
#initial subscribe/general page
url = sys.argv[1]
url_info = urllib.parse.urlparse(url)
#now extract podcast id from url
t = ''.join(list(url_info[2].split('/')[-1])[2:])
#insert id into parameters
param["id"] = t
#create new url with podcast id
p = urllib.parse.urlencode(param)
itunes_url = '?'.join([base_itunes_url,p])
#open the url
b = urllib.request.Request(itunes_url,headers=heads)
a = urllib.request.urlopen(b).read()
#parse xml to find <key>feedURL</key><string>URL_WE_WANT</string>
desired_tag = "feedURL"
current_tag = False
urls = []
def startag(name, attrs):
pass
def endtag(name):
pass
def chardata(data):
global current_tag, desired_tag
if current_tag == True:
urls.append(data)
current_tag = False
else:
if data == desired_tag:
current_tag = True
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = startag
p.EndElementHandler = endtag
p.CharacterDataHandler = chardata
p.Parse(a)
#urls we found (should be only one, but w/e)
print(urls[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment