Skip to content

Instantly share code, notes, and snippets.

Created July 31, 2015 04:01
Show Gist options
  • Save iam4722202468/ae7f58833bc147e00a50 to your computer and use it in GitHub Desktop.
Save iam4722202468/ae7f58833bc147e00a50 to your computer and use it in GitHub Desktop.
Python youtube downloader by artist
import sys
import wikipedia
import urllib2
from BeautifulSoup import BeautifulSoup
import os
from threading import Thread
import unicodedata
import commands
import json
bandname = <band name as string>
keywords = ['', 'Lyrics', 'Official']
wikipage =
except wikipedia.exceptions.WikipediaException:
print "Page not found"
page = wikipage.html()
start = page.find('Studio albums', page.find('<th scope="row" class="navbox-group" style="background: #EEEEEE;">'))
end = page.find('</div>', start)
parsedpage = wikipage.html()[start:end]
soup = BeautifulSoup(parsedpage)
def conversong(album, bandname, song, song_index, song_title):
os.system('cd "' + album + '" && ffmpeg -loglevel panic -i "$(find . -name "' + song_title + '"\\*)" -acodec libmp3lame "' + song_title + '.mp3"')
os.system('cd "' + album + '" && eyeD3 --remove-all -a "' + bandname + '" -A "' + album + '" -t "' + song + '" -n ' + song_index + ' --add-image Cover.jpg:FRONT_COVER "' + song_title + '.mp3"')
os.system('cd "' + album + '" && rm "' + song_title + '.mp4"')
os.system('cd "' + album + '" && rm "' + song_title + '.mkv"')
os.system('cd "' + album + '" && rm "' + song_title + '.f141.m4a"')
os.system('cd "' + album + '" && rm "' + song_title + '.f140.m4a"')
for link_tag in soup.findAll('a'):
href = link_tag.get('href')
response = urllib2.urlopen('' + href)
page =
start = page.find('right; background-color: #eee">Length</th>')
end = page.find("<b>", start)
parsedpage = page[start:end]
soup = BeautifulSoup(parsedpage)
album = link_tag.text.replace('amp;','')
year_find = page.find('bday dtstart published updated">')
year_release = page[year_find + 32: year_find + 32 + 4]
if year_release == ' dir':
year_find = page.find(',',page.find('<td class="published">'));
year_release = page[year_find+2: year_find+6]
if not os.path.exists(year_release + " - " + album):
os.makedirs(year_release + " - " + album)
start_img = page.find('src="', page.find('class="image"><img alt="', page.find('<td colspan="2" style="text-align:center">')))
end_img = page.find('"',start_img+5)
image_url = page[start_img+5:end_img]
image_url = image_url.replace('/thumb','')
image_url = '/'.join(image_url.split('/')[:-1])
os.system('cd "' + year_release + " - " + album + '" && wget http:' + image_url + ' -O Cover.jpg')
songnum = 0
for link_tag in soup.findAll('td'):
song = link_tag.text.replace('amp;','')
if song.find("\"") >= 0:
songnum += 1
song_index = str(songnum)
if songnum < 10:
song_index = '0' + song_index
song = unicodedata.normalize('NFKD', song.split('"')[1]).encode('ascii','ignore')
song_title = bandname + " - " + album + " - " + song_index + " - " + song.replace("/","|")
if not os.path.isfile("./" + year_release + " - " + album + "/" + song_title + ".mp3"):
highest_views = 0
highest_keyword = ""
for place in keywords:
json_ = commands.getstatusoutput('youtube-dl -s --print-json "ytsearch:' + song + ' ' + bandname + ' ' + place + '"')
views = int(json.loads(json_[1])['view_count'])
file_size = int(json.loads(json_[1])['requested_formats'][0]['filesize']);
except KeyError:
print "something went wrong..."
if(views > highest_views and file_size < 50000000):
highest_views = views
highest_keyword = place
except ValueError:
print "No JSON object could be decoded"
os.system('cd "' + year_release + " - " + album + '" && youtube-dl --max-filesize 50m "ytsearch:' + song + ' ' + bandname + ' ' + highest_keyword + '" --prefer-ffmpeg -o "' + song_title + '.%(ext)s"')
t = Thread(target=conversong, args=(year_release + " - " + album, bandname, song, song_index, song_title,))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment