cwharland/studs.py

## studs.py
from bs4 import BeautifulSoup as bs
import urllib2
import re
import wget

# Where do you want to download the files?
path = '~/studs_terkel/%s'

# Page with all the mp3 links
url = 'http://conversations.studsterkel.org/htimes.php'

page = urllib2.urlopen(url)
soup = bs(page)

# URLs for each interview
mp3s = [x['href'] for x in soup.find_all('a', href = re.compile(r'mp3'))]
# Matching file names
file_names = [x.text.strip() for x in soup.find_all('font', color = '#FFFFFF')]

# Check that we have names for all the links
# If not something messed up with the parsing above #NotMyProblem
assert len(file_names) == len(mp3s)

# Get to downloading
for f, m in zip(file_names, mp3s):
    formatted_name = path % (f + '.mp3')
    wget.download(m,formatted_name)
	from bs4 import BeautifulSoup as bs
	import urllib2
	import re
	import wget

	# Where do you want to download the files?
	path = '~/studs_terkel/%s'

	# Page with all the mp3 links
	url = 'http://conversations.studsterkel.org/htimes.php'

	page = urllib2.urlopen(url)
	soup = bs(page)

	# URLs for each interview
	mp3s = [x['href'] for x in soup.find_all('a', href = re.compile(r'mp3'))]
	# Matching file names
	file_names = [x.text.strip() for x in soup.find_all('font', color = '#FFFFFF')]

	# Check that we have names for all the links
	# If not something messed up with the parsing above #NotMyProblem
	assert len(file_names) == len(mp3s)

	# Get to downloading
	for f, m in zip(file_names, mp3s):
	formatted_name = path % (f + '.mp3')
	wget.download(m,formatted_name)