Skip to content

Instantly share code, notes, and snippets.

@jasti
Last active June 5, 2016 18:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasti/cf5188b411d185a4f27a to your computer and use it in GitHub Desktop.
Save jasti/cf5188b411d185a4f27a to your computer and use it in GitHub Desktop.
'''
Created on Aug 27, 2014
@author: vj
'''
import urllib2
import re
from bs4 import BeautifulSoup
# Takes a URL list and downloads music from each URL iteratively
def downloadMusic(urlList):
for url in urlList:
file_name = url.split('/')[-1]
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer_var = u.read(block_sz)
if not buffer_var:
break
file_size_dl += len(buffer_var)
f.write(buffer_var)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
f.close()
# Given a URL, finds all the links on the page. Optionally can pass in an argument
#to only get links that end with a certain string. e.g .mp3
def findUrls(main_url, optionalEndsWith):
page = urllib2.urlopen(main_url)
soup = BeautifulSoup(page)
links = soup.find_all("a")
pureLinks = []
for link in links:
if(optionalEndsWith is None):
pureLinks.append(link['href']);
else:
if optionalEndsWith in link['href']:
pureLinks.append(link['href']);
return pureLinks
# Main script begins
if __name__ == '__main__':
print "Starting Process..."
main_url= "http://musicforprogramming.net"
# Get all links
links = findUrls(main_url, None)
localLinks = [];
for link in links:
if(link.startswith('/')):
localLinks.append(main_url+link);
print "Fetched all links to scan :"
print localLinks
# Iterate over links and save the ones ending with .mp3
mp3Links = []
for pageLink in localLinks:
mp3Link = findUrls(pageLink,"mp3")
if(mp3Link != None):
mp3Links.append(mp3Link)
# Download all files
for mp3l in mp3Links:
downloadMusic(mp3l)
@Kriptonium
Copy link

Not working on python 3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment