Created
September 22, 2011 06:31
-
-
Save hamilton/1234186 to your computer and use it in GitHub Desktop.
Downloads every track from "History of Electronic / Electroacoustic Music, 1937-2000"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
import os | |
import re | |
from sgmllib import SGMLParser | |
print '-' * 80 | |
print ' History of Electronic / Electroacoustic Music, 1937-2000 ' | |
print '-' * 80 | |
print 'This small script will download every track from "History of Electronic / Electroacoustic Music, 1937-2000"' | |
link = 'http://norbertschiegl.tumblr.com/post/5870608201/history-of-electronic-electroacoustic-music-1937-2001' | |
page = urllib2.urlopen(link) | |
if not os.path.exists('electronic_music'): | |
os.mkdir('electronic_music') | |
existing_files = set([]) | |
else: | |
existing_files = set(os.listdir('electronic_music')) | |
class URLLister(SGMLParser): | |
def reset(self): | |
SGMLParser.reset(self) | |
self.urls = [] | |
def start_a(self, attrs): | |
href = [v for k, v in attrs if k=='href'] | |
if href: | |
self.urls.extend(href) | |
parser = URLLister() | |
parser.feed(page.read()) | |
pieces = [link for link in parser.urls if '.mp3' in link] | |
total = len(pieces) | |
for i, piece in enumerate(pieces): | |
#link = piece['href'] | |
link = piece | |
filename = link.split('/')[-1] | |
if filename not in existing_files: | |
a = open('electronic_music/_%s' % filename, 'wb') | |
try: | |
f = urllib2.urlopen(link) | |
except urllib2.HTTPError: | |
f = None | |
if f: | |
a.write(f.read()) | |
a.close() | |
os.rename('electronic_music/_%s' % filename, 'electronic_music/%s' % filename) | |
else: | |
print '%s / %s was not available - 404 error. Sheesh.' % (i + 1, total) | |
print 'finished %s / %s (%s)' % (i+1, total, filename) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I updated this with another url and to a single command line