Skip to content

Instantly share code, notes, and snippets.

@nickzuck
Last active October 25, 2018 05:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickzuck/0e92372a2319c342b2817749af01ac35 to your computer and use it in GitHub Desktop.
Save nickzuck/0e92372a2319c342b2817749af01ac35 to your computer and use it in GitHub Desktop.
Program to donwload mp3 songs from Index of directories
# Author: Nikhil Kumar Singh(nickzuck_007)
# Date : 6th May 2018
# Updated: 25th Oct 2018
# Description: Download all the songs on an index page, with urls in urls array
from bs4 import BeautifulSoup
from urlparse import urlparse
import requests
import os
import time
def ismp3(url):
ext = url.split(".")[-1]
return ext == 'mp3'
def downloadMP3(name, url):
print "Downloading ", name
if os.path.exists(name):
print "File already exists "
print 100*"-"
return False
response = requests.get(url, verify = False)
print "Writing to file ",name
f = open(name, "w")
f.write(response.content)
f.close()
print "Write completed ", name
print 100* "-"
return True
if __name__ == '__main__':
try:
# Open the file named urls
f = open("urls")
urls = map(str.strip, f.readlines())
except IOError as e :
# In case it was not possible to open the file
urls = []
finally:
f.close()
count = 0 ;
start_time = time.time()
try :
for url in urls:
print "Donwloading for url ", url
response = requests.get(url, verify = False)
request_data = response.content
soup = BeautifulSoup(request_data, "lxml")
results = soup.select('ul li a')
for result in results:
href_link = result['href']
parsed_url = urlparse(href_link)
# If the url is absolute url, return the same
if parsed_url.netloc:
final_url = href_link
else:
final_url = url + parsed_url.path
name = result.text
print name , " -- " , final_url
if ismp3(final_url):
if downloadMP3(name,final_url):
count += 1
print "Download complete for url ", url
print 100 *"="
except KeyboardInterrupt:
pass
end_time = time.time()
print "Downloaded %d song(s) in %s seconds" %(count, end_time - start_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment