Skip to content

Instantly share code, notes, and snippets.

Created May 9, 2014
Embed
What would you like to do?
All of Bach media extraction
#!/usr/bin/python
# Audio extraction script for All of Bach:
# http://allofbach.com/en/
import re
import urllib2
from BeautifulSoup import BeautifulSoup
page = urllib2.urlopen('http://allofbach.com/en/bwv/')
soup = BeautifulSoup(page)
section = soup.find('section', 'number-block')
ul = section.find('ul')
pattern = re.compile('/en/bwv/bwv-(\d+)')
command = 'youtube-dl -k -x --audio-format=mp3 --audio-quality=0'
for li in ul.findAll('li'):
title = li.find('h4').string
desc = li.find('p').find('em').string
href = li.find('a')['href']
bwv = pattern.match(href).group(1)
url = 'http://allofbach.com' + href + 'detail/'
print '%s -o "BWV%s - %s.%%(ext)s" %s' % (command, bwv, title, url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment