Skip to content

Instantly share code, notes, and snippets.

@andres-erbsen
Created October 23, 2011 18:35
Show Gist options
  • Save andres-erbsen/1307686 to your computer and use it in GitHub Desktop.
Save andres-erbsen/1307686 to your computer and use it in GitHub Desktop.
Vikerraadio arhiivist sarja allalaadmine - download entire series from vikerraadio archive
from datetime import datetime
import xml.dom.minidom
import re
import urllib
archive_url = 'http://vikerraadio.err.ee/kuularhiiv?saade=25&kid=123'
kid = re.findall('kid=\d+',archive_url)[0].replace('kid=','')
rss_base_url = 'http://vikerraadio.err.ee/gfx/rss2.php?id='
rss_url = rss_base_url + kid
rss = xml.dom.minidom.parse(urllib.urlopen(rss_url))
items = rss.getElementsByTagName('item')
for i,item in enumerate(items):
print 'Parsing item %d of %d' % (i+1, len(items))
title = item.getElementsByTagName('title')[0].firstChild.data
url = item.getElementsByTagName('link')[0].firstChild.data
rawdate = item.getElementsByTagName('pubDate')[0].firstChild.data
date = datetime.strftime(datetime.strptime(rawdate.split(' +')[0],"%a, %d %b %Y %H:%M:%S"),'%Y-%m-%d')
ext = '.'+url.split('.')[-1]
filename = '%s %s%s' % (date, title, ext)
print 'Saving "%s" to "%s".' % (url, filename)
urllib.urlretrieve(url,filename)
print '"%s" saved.' % filename
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment