Skip to content

Instantly share code, notes, and snippets.

Last active June 5, 2016 18:49
Show Gist options
  • Save jasti/cf5188b411d185a4f27a to your computer and use it in GitHub Desktop.
Save jasti/cf5188b411d185a4f27a to your computer and use it in GitHub Desktop.
Created on Aug 27, 2014
@author: vj
import urllib2
import re
from bs4 import BeautifulSoup
# Takes a URL list and downloads music from each URL iteratively
def downloadMusic(urlList):
for url in urlList:
file_name = url.split('/')[-1]
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
meta =
file_size = int(meta.getheaders("Content-Length")[0])
print "Downloading: %s Bytes: %s" % (file_name, file_size)
file_size_dl = 0
block_sz = 8192
while True:
buffer_var =
if not buffer_var:
file_size_dl += len(buffer_var)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
# Given a URL, finds all the links on the page. Optionally can pass in an argument
#to only get links that end with a certain string. e.g .mp3
def findUrls(main_url, optionalEndsWith):
page = urllib2.urlopen(main_url)
soup = BeautifulSoup(page)
links = soup.find_all("a")
pureLinks = []
for link in links:
if(optionalEndsWith is None):
if optionalEndsWith in link['href']:
return pureLinks
# Main script begins
if __name__ == '__main__':
print "Starting Process..."
main_url= ""
# Get all links
links = findUrls(main_url, None)
localLinks = [];
for link in links:
print "Fetched all links to scan :"
print localLinks
# Iterate over links and save the ones ending with .mp3
mp3Links = []
for pageLink in localLinks:
mp3Link = findUrls(pageLink,"mp3")
if(mp3Link != None):
# Download all files
for mp3l in mp3Links:
Copy link

Not working on python 3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment