This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on Aug 27, 2014 | |
@author: vj | |
''' | |
import urllib2 | |
import re | |
from bs4 import BeautifulSoup | |
# Takes a URL list and downloads music from each URL iteratively | |
def downloadMusic(urlList): | |
for url in urlList: | |
file_name = url.split('/')[-1] | |
u = urllib2.urlopen(url) | |
f = open(file_name, 'wb') | |
meta = u.info() | |
file_size = int(meta.getheaders("Content-Length")[0]) | |
print "Downloading: %s Bytes: %s" % (file_name, file_size) | |
file_size_dl = 0 | |
block_sz = 8192 | |
while True: | |
buffer_var = u.read(block_sz) | |
if not buffer_var: | |
break | |
file_size_dl += len(buffer_var) | |
f.write(buffer_var) | |
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) | |
status = status + chr(8)*(len(status)+1) | |
print status, | |
f.close() | |
# Given a URL, finds all the links on the page. Optionally can pass in an argument | |
#to only get links that end with a certain string. e.g .mp3 | |
def findUrls(main_url, optionalEndsWith): | |
page = urllib2.urlopen(main_url) | |
soup = BeautifulSoup(page) | |
links = soup.find_all("a") | |
pureLinks = [] | |
for link in links: | |
if(optionalEndsWith is None): | |
pureLinks.append(link['href']); | |
else: | |
if optionalEndsWith in link['href']: | |
pureLinks.append(link['href']); | |
return pureLinks | |
# Main script begins | |
if __name__ == '__main__': | |
print "Starting Process..." | |
main_url= "http://musicforprogramming.net" | |
# Get all links | |
links = findUrls(main_url, None) | |
localLinks = []; | |
for link in links: | |
if(link.startswith('/')): | |
localLinks.append(main_url+link); | |
print "Fetched all links to scan :" | |
print localLinks | |
# Iterate over links and save the ones ending with .mp3 | |
mp3Links = [] | |
for pageLink in localLinks: | |
mp3Link = findUrls(pageLink,"mp3") | |
if(mp3Link != None): | |
mp3Links.append(mp3Link) | |
# Download all files | |
for mp3l in mp3Links: | |
downloadMusic(mp3l) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Not working on python 3