iam4722202468/youtube-downloader.py

## youtube-downloader.py
#!/usr/bin/python
import sys
import wikipedia
import urllib2
from BeautifulSoup import BeautifulSoup
import os
from threading import Thread
import unicodedata
import commands
import json

bandname = <band name as string>
keywords = ['', 'Lyrics', 'Official']

try:
	wikipage = wikipedia.page(bandname)
except wikipedia.exceptions.WikipediaException:
	print "Page not found"

page = wikipage.html()

start = page.find('Studio albums', page.find('<th scope="row" class="navbox-group" style="background: #EEEEEE;">'))
end = page.find('</div>', start)

parsedpage = wikipage.html()[start:end]
soup = BeautifulSoup(parsedpage)

def conversong(album, bandname, song, song_index, song_title):
	os.system('cd "' + album + '" && ffmpeg -loglevel panic -i "$(find . -name "' + song_title + '"\\*)" -acodec libmp3lame "' + song_title + '.mp3"')
	os.system('cd "' + album + '" && eyeD3 --remove-all -a "' + bandname + '" -A "' + album + '" -t "' + song + '" -n ' + song_index + ' --add-image Cover.jpg:FRONT_COVER "' + song_title + '.mp3"')
	os.system('cd "' + album + '" && rm "' + song_title + '.mp4"')
	os.system('cd "' + album + '" && rm "' + song_title + '.mkv"')
	os.system('cd "' + album + '" && rm "' + song_title + '.f141.m4a"')
	os.system('cd "' + album + '" && rm "' + song_title + '.f140.m4a"')
	return

for link_tag in soup.findAll('a'):
	href = link_tag.get('href')

	response = urllib2.urlopen('http://en.wikipedia.org' + href)
	page = response.read()
	start = page.find('right; background-color: #eee">Length</th>')
	end = page.find("<b>", start)
	parsedpage = page[start:end]
	soup = BeautifulSoup(parsedpage)

	album = link_tag.text.replace('amp;','')

	year_find = page.find('bday dtstart published updated">')
	year_release = page[year_find + 32: year_find + 32 + 4]

	if year_release == ' dir':
		year_find = page.find(',',page.find('<td class="published">'));
		year_release = page[year_find+2: year_find+6]

	if not os.path.exists(year_release + " - " + album):
		os.makedirs(year_release + " - " + album)

	start_img = page.find('src="', page.find('class="image"><img alt="', page.find('<td colspan="2" style="text-align:center">')))
	end_img = page.find('"',start_img+5)
	image_url = page[start_img+5:end_img]

	image_url = image_url.replace('/thumb','')
	image_url = '/'.join(image_url.split('/')[:-1])

	os.system('cd "' + year_release + " - " + album + '" && wget http:' + image_url + ' -O Cover.jpg')

	songnum = 0

	for link_tag in soup.findAll('td'):
		song = link_tag.text.replace('amp;','')

		if song.find("\"") >= 0:

			songnum += 1
			song_index = str(songnum)

			if songnum < 10:
				song_index = '0' + song_index

			song = unicodedata.normalize('NFKD', song.split('"')[1]).encode('ascii','ignore')
			song_title = bandname + " - " + album + " - " + song_index + " - " + song.replace("/","|")

			if not os.path.isfile("./" + year_release + " - " + album + "/" + song_title + ".mp3"):

				highest_views = 0
				highest_keyword = ""

				for place in keywords:
					try:
						json_ = commands.getstatusoutput('youtube-dl -s --print-json "ytsearch:' + song + ' ' + bandname + ' ' + place + '"')
						views = int(json.loads(json_[1])['view_count'])

						try:
							file_size = int(json.loads(json_[1])['requested_formats'][0]['filesize']);
						except KeyError:
							print "something went wrong..."

						if(views > highest_views and file_size < 50000000):
							highest_views = views
							highest_keyword = place
					except ValueError:
						print "No JSON object could be decoded"

				os.system('cd "' + year_release + " - " + album + '" && youtube-dl --max-filesize 50m "ytsearch:' + song + ' ' + bandname + ' ' + highest_keyword + '" --prefer-ffmpeg -o "' + song_title + '.%(ext)s"')

				t = Thread(target=conversong, args=(year_release + " - " + album, bandname, song, song_index, song_title,))
				t.start()
	#!/usr/bin/python
	import sys
	import wikipedia
	import urllib2
	from BeautifulSoup import BeautifulSoup
	import os
	from threading import Thread
	import unicodedata
	import commands
	import json

	bandname = <band name as string>
	keywords = ['', 'Lyrics', 'Official']

	try:
	wikipage = wikipedia.page(bandname)
	except wikipedia.exceptions.WikipediaException:
	print "Page not found"

	page = wikipage.html()

	start = page.find('Studio albums', page.find('<th scope="row" class="navbox-group" style="background: #EEEEEE;">'))
	end = page.find('</div>', start)

	parsedpage = wikipage.html()[start:end]
	soup = BeautifulSoup(parsedpage)

	def conversong(album, bandname, song, song_index, song_title):
	os.system('cd "' + album + '" && ffmpeg -loglevel panic -i "$(find . -name "' + song_title + '"\\*)" -acodec libmp3lame "' + song_title + '.mp3"')
	os.system('cd "' + album + '" && eyeD3 --remove-all -a "' + bandname + '" -A "' + album + '" -t "' + song + '" -n ' + song_index + ' --add-image Cover.jpg:FRONT_COVER "' + song_title + '.mp3"')
	os.system('cd "' + album + '" && rm "' + song_title + '.mp4"')
	os.system('cd "' + album + '" && rm "' + song_title + '.mkv"')
	os.system('cd "' + album + '" && rm "' + song_title + '.f141.m4a"')
	os.system('cd "' + album + '" && rm "' + song_title + '.f140.m4a"')
	return

	for link_tag in soup.findAll('a'):
	href = link_tag.get('href')

	response = urllib2.urlopen('http://en.wikipedia.org' + href)
	page = response.read()
	start = page.find('right; background-color: #eee">Length</th>')
	end = page.find("<b>", start)
	parsedpage = page[start:end]
	soup = BeautifulSoup(parsedpage)

	album = link_tag.text.replace('amp;','')

	year_find = page.find('bday dtstart published updated">')
	year_release = page[year_find + 32: year_find + 32 + 4]

	if year_release == ' dir':
	year_find = page.find(',',page.find('<td class="published">'));
	year_release = page[year_find+2: year_find+6]

	if not os.path.exists(year_release + " - " + album):
	os.makedirs(year_release + " - " + album)

	start_img = page.find('src="', page.find('class="image"><img alt="', page.find('<td colspan="2" style="text-align:center">')))
	end_img = page.find('"',start_img+5)
	image_url = page[start_img+5:end_img]

	image_url = image_url.replace('/thumb','')
	image_url = '/'.join(image_url.split('/')[:-1])

	os.system('cd "' + year_release + " - " + album + '" && wget http:' + image_url + ' -O Cover.jpg')

	songnum = 0

	for link_tag in soup.findAll('td'):
	song = link_tag.text.replace('amp;','')

	if song.find("\"") >= 0:

	songnum += 1
	song_index = str(songnum)

	if songnum < 10:
	song_index = '0' + song_index

	song = unicodedata.normalize('NFKD', song.split('"')[1]).encode('ascii','ignore')
	song_title = bandname + " - " + album + " - " + song_index + " - " + song.replace("/","\|")

	if not os.path.isfile("./" + year_release + " - " + album + "/" + song_title + ".mp3"):

	highest_views = 0
	highest_keyword = ""

	for place in keywords:
	try:
	json_ = commands.getstatusoutput('youtube-dl -s --print-json "ytsearch:' + song + ' ' + bandname + ' ' + place + '"')
	views = int(json.loads(json_[1])['view_count'])

	try:
	file_size = int(json.loads(json_[1])['requested_formats'][0]['filesize']);
	except KeyError:
	print "something went wrong..."

	if(views > highest_views and file_size < 50000000):
	highest_views = views
	highest_keyword = place
	except ValueError:
	print "No JSON object could be decoded"

	os.system('cd "' + year_release + " - " + album + '" && youtube-dl --max-filesize 50m "ytsearch:' + song + ' ' + bandname + ' ' + highest_keyword + '" --prefer-ffmpeg -o "' + song_title + '.%(ext)s"')

	t = Thread(target=conversong, args=(year_release + " - " + album, bandname, song, song_index, song_title,))
	t.start()