Last active
October 25, 2018 05:14
-
-
Save nickzuck/0e92372a2319c342b2817749af01ac35 to your computer and use it in GitHub Desktop.
Program to donwload mp3 songs from Index of directories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Nikhil Kumar Singh(nickzuck_007) | |
# Date : 6th May 2018 | |
# Updated: 25th Oct 2018 | |
# Description: Download all the songs on an index page, with urls in urls array | |
from bs4 import BeautifulSoup | |
from urlparse import urlparse | |
import requests | |
import os | |
import time | |
def ismp3(url): | |
ext = url.split(".")[-1] | |
return ext == 'mp3' | |
def downloadMP3(name, url): | |
print "Downloading ", name | |
if os.path.exists(name): | |
print "File already exists " | |
print 100*"-" | |
return False | |
response = requests.get(url, verify = False) | |
print "Writing to file ",name | |
f = open(name, "w") | |
f.write(response.content) | |
f.close() | |
print "Write completed ", name | |
print 100* "-" | |
return True | |
if __name__ == '__main__': | |
try: | |
# Open the file named urls | |
f = open("urls") | |
urls = map(str.strip, f.readlines()) | |
except IOError as e : | |
# In case it was not possible to open the file | |
urls = [] | |
finally: | |
f.close() | |
count = 0 ; | |
start_time = time.time() | |
try : | |
for url in urls: | |
print "Donwloading for url ", url | |
response = requests.get(url, verify = False) | |
request_data = response.content | |
soup = BeautifulSoup(request_data, "lxml") | |
results = soup.select('ul li a') | |
for result in results: | |
href_link = result['href'] | |
parsed_url = urlparse(href_link) | |
# If the url is absolute url, return the same | |
if parsed_url.netloc: | |
final_url = href_link | |
else: | |
final_url = url + parsed_url.path | |
name = result.text | |
print name , " -- " , final_url | |
if ismp3(final_url): | |
if downloadMP3(name,final_url): | |
count += 1 | |
print "Download complete for url ", url | |
print 100 *"=" | |
except KeyboardInterrupt: | |
pass | |
end_time = time.time() | |
print "Downloaded %d song(s) in %s seconds" %(count, end_time - start_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment