Skip to content

Instantly share code, notes, and snippets.

@hamilton
Created September 22, 2011 06:31
Show Gist options
  • Save hamilton/1234186 to your computer and use it in GitHub Desktop.
Save hamilton/1234186 to your computer and use it in GitHub Desktop.
Downloads every track from "History of Electronic / Electroacoustic Music, 1937-2000"
import urllib2
import os
import re
from sgmllib import SGMLParser
print '-' * 80
print ' History of Electronic / Electroacoustic Music, 1937-2000 '
print '-' * 80
print
print 'This small script will download every track from "History of Electronic / Electroacoustic Music, 1937-2000"'
link = 'http://norbertschiegl.tumblr.com/post/5870608201/history-of-electronic-electroacoustic-music-1937-2001'
page = urllib2.urlopen(link)
if not os.path.exists('electronic_music'):
os.mkdir('electronic_music')
existing_files = set([])
else:
existing_files = set(os.listdir('electronic_music'))
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
parser = URLLister()
parser.feed(page.read())
pieces = [link for link in parser.urls if '.mp3' in link]
total = len(pieces)
for i, piece in enumerate(pieces):
#link = piece['href']
link = piece
filename = link.split('/')[-1]
if filename not in existing_files:
a = open('electronic_music/_%s' % filename, 'wb')
try:
f = urllib2.urlopen(link)
except urllib2.HTTPError:
f = None
if f:
a.write(f.read())
a.close()
os.rename('electronic_music/_%s' % filename, 'electronic_music/%s' % filename)
else:
print '%s / %s was not available - 404 error. Sheesh.' % (i + 1, total)
print 'finished %s / %s (%s)' % (i+1, total, filename)
@capital-G
Copy link

I updated this with another url and to a single command line

wget --accept mp3,MP3 --convert-links --random-wait -r -p -E --no-directories -e robots=off https://ubu.com/sound/electronic.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment