Skip to content

Instantly share code, notes, and snippets.

@iam4722202468
Created July 31, 2015 04:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save iam4722202468/ae7f58833bc147e00a50 to your computer and use it in GitHub Desktop.
Save iam4722202468/ae7f58833bc147e00a50 to your computer and use it in GitHub Desktop.
Python youtube downloader by artist
#!/usr/bin/python
import sys
import wikipedia
import urllib2
from BeautifulSoup import BeautifulSoup
import os
from threading import Thread
import unicodedata
import commands
import json
bandname = <band name as string>
keywords = ['', 'Lyrics', 'Official']
try:
wikipage = wikipedia.page(bandname)
except wikipedia.exceptions.WikipediaException:
print "Page not found"
page = wikipage.html()
start = page.find('Studio albums', page.find('<th scope="row" class="navbox-group" style="background: #EEEEEE;">'))
end = page.find('</div>', start)
parsedpage = wikipage.html()[start:end]
soup = BeautifulSoup(parsedpage)
def conversong(album, bandname, song, song_index, song_title):
os.system('cd "' + album + '" && ffmpeg -loglevel panic -i "$(find . -name "' + song_title + '"\\*)" -acodec libmp3lame "' + song_title + '.mp3"')
os.system('cd "' + album + '" && eyeD3 --remove-all -a "' + bandname + '" -A "' + album + '" -t "' + song + '" -n ' + song_index + ' --add-image Cover.jpg:FRONT_COVER "' + song_title + '.mp3"')
os.system('cd "' + album + '" && rm "' + song_title + '.mp4"')
os.system('cd "' + album + '" && rm "' + song_title + '.mkv"')
os.system('cd "' + album + '" && rm "' + song_title + '.f141.m4a"')
os.system('cd "' + album + '" && rm "' + song_title + '.f140.m4a"')
return
for link_tag in soup.findAll('a'):
href = link_tag.get('href')
response = urllib2.urlopen('http://en.wikipedia.org' + href)
page = response.read()
start = page.find('right; background-color: #eee">Length</th>')
end = page.find("<b>", start)
parsedpage = page[start:end]
soup = BeautifulSoup(parsedpage)
album = link_tag.text.replace('amp;','')
year_find = page.find('bday dtstart published updated">')
year_release = page[year_find + 32: year_find + 32 + 4]
if year_release == ' dir':
year_find = page.find(',',page.find('<td class="published">'));
year_release = page[year_find+2: year_find+6]
if not os.path.exists(year_release + " - " + album):
os.makedirs(year_release + " - " + album)
start_img = page.find('src="', page.find('class="image"><img alt="', page.find('<td colspan="2" style="text-align:center">')))
end_img = page.find('"',start_img+5)
image_url = page[start_img+5:end_img]
image_url = image_url.replace('/thumb','')
image_url = '/'.join(image_url.split('/')[:-1])
os.system('cd "' + year_release + " - " + album + '" && wget http:' + image_url + ' -O Cover.jpg')
songnum = 0
for link_tag in soup.findAll('td'):
song = link_tag.text.replace('amp;','')
if song.find("\"") >= 0:
songnum += 1
song_index = str(songnum)
if songnum < 10:
song_index = '0' + song_index
song = unicodedata.normalize('NFKD', song.split('"')[1]).encode('ascii','ignore')
song_title = bandname + " - " + album + " - " + song_index + " - " + song.replace("/","|")
if not os.path.isfile("./" + year_release + " - " + album + "/" + song_title + ".mp3"):
highest_views = 0
highest_keyword = ""
for place in keywords:
try:
json_ = commands.getstatusoutput('youtube-dl -s --print-json "ytsearch:' + song + ' ' + bandname + ' ' + place + '"')
views = int(json.loads(json_[1])['view_count'])
try:
file_size = int(json.loads(json_[1])['requested_formats'][0]['filesize']);
except KeyError:
print "something went wrong..."
if(views > highest_views and file_size < 50000000):
highest_views = views
highest_keyword = place
except ValueError:
print "No JSON object could be decoded"
os.system('cd "' + year_release + " - " + album + '" && youtube-dl --max-filesize 50m "ytsearch:' + song + ' ' + bandname + ' ' + highest_keyword + '" --prefer-ffmpeg -o "' + song_title + '.%(ext)s"')
t = Thread(target=conversong, args=(year_release + " - " + album, bandname, song, song_index, song_title,))
t.start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment