jasti/Beats.py

## Beats.py
'''
Created on Aug 27, 2014

@author: vj
'''

import urllib2
import re
from bs4 import BeautifulSoup

# Takes a URL list and downloads music from each URL iteratively
def downloadMusic(urlList):

    for url in urlList:
        file_name = url.split('/')[-1]
        u = urllib2.urlopen(url)
        f = open(file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        print "Downloading: %s Bytes: %s" % (file_name, file_size)

        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer_var = u.read(block_sz)
            if not buffer_var:
                break

            file_size_dl += len(buffer_var)
            f.write(buffer_var)
            status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
            status = status + chr(8)*(len(status)+1)
            print status,

        f.close()


# Given a URL, finds all the links on the page. Optionally can pass in an argument
#to only get links that end with a certain string. e.g .mp3
def findUrls(main_url, optionalEndsWith):
    page = urllib2.urlopen(main_url)
    soup = BeautifulSoup(page)
    links = soup.find_all("a")

    pureLinks = []


    for link in links:
            if(optionalEndsWith is None):
                pureLinks.append(link['href']);
            else:
                if optionalEndsWith in link['href']:
                    pureLinks.append(link['href']);


    return pureLinks

# Main script begins
if __name__ == '__main__':

    print "Starting Process..."

    main_url= "http://musicforprogramming.net"
    # Get all links
    links = findUrls(main_url, None)
    localLinks = [];
    for link in links:
        if(link.startswith('/')):
            localLinks.append(main_url+link);
    print "Fetched all links to scan :"
    print localLinks

    # Iterate over links and save the ones ending with .mp3
    mp3Links = []

    for pageLink in localLinks:
        mp3Link = findUrls(pageLink,"mp3")
        if(mp3Link != None):
            mp3Links.append(mp3Link)

    # Download all files
    for mp3l in mp3Links:
        downloadMusic(mp3l)
	'''
	Created on Aug 27, 2014

	@author: vj
	'''

	import urllib2
	import re
	from bs4 import BeautifulSoup

	# Takes a URL list and downloads music from each URL iteratively
	def downloadMusic(urlList):

	for url in urlList:
	file_name = url.split('/')[-1]
	u = urllib2.urlopen(url)
	f = open(file_name, 'wb')
	meta = u.info()
	file_size = int(meta.getheaders("Content-Length")[0])
	print "Downloading: %s Bytes: %s" % (file_name, file_size)

	file_size_dl = 0
	block_sz = 8192
	while True:
	buffer_var = u.read(block_sz)
	if not buffer_var:
	break

	file_size_dl += len(buffer_var)
	f.write(buffer_var)
	status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
	status = status + chr(8)*(len(status)+1)
	print status,

	f.close()


	# Given a URL, finds all the links on the page. Optionally can pass in an argument
	#to only get links that end with a certain string. e.g .mp3
	def findUrls(main_url, optionalEndsWith):
	page = urllib2.urlopen(main_url)
	soup = BeautifulSoup(page)
	links = soup.find_all("a")

	pureLinks = []


	for link in links:
	if(optionalEndsWith is None):
	pureLinks.append(link['href']);
	else:
	if optionalEndsWith in link['href']:
	pureLinks.append(link['href']);


	return pureLinks

	# Main script begins
	if __name__ == '__main__':

	print "Starting Process..."

	main_url= "http://musicforprogramming.net"
	# Get all links
	links = findUrls(main_url, None)
	localLinks = [];
	for link in links:
	if(link.startswith('/')):
	localLinks.append(main_url+link);
	print "Fetched all links to scan :"
	print localLinks

	# Iterate over links and save the ones ending with .mp3
	mp3Links = []

	for pageLink in localLinks:
	mp3Link = findUrls(pageLink,"mp3")
	if(mp3Link != None):
	mp3Links.append(mp3Link)

	# Download all files
	for mp3l in mp3Links:
	downloadMusic(mp3l)