Created March 14, 2017 20:57
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import json
import urllib
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
from unidecode import unidecode
import youtube_dl
def spotify_get_playlist_tracks(uri):
client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
_, _, username, _, playlist_id = uri.split(':')
r = sp.user_playlist(username, playlist_id)
for item in r['tracks']['items']:
track = item['track']
if track['type'] != 'track': continue
t = {
'added_at': item['added_at'],
'artists': [a['name'] for a in track['artists'] if a['type']=='artist'],
'title': track['name']
yield ', '.join(t['artists']) + ' - ' + t['title']
def ytsearch(txt):
url = "{}".format(urllib.parse.quote(txt))
response = urllib.request.urlopen(url)
html =
soup = BeautifulSoup(html, 'html.parser')
for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}):
yield '' + vid['href'], vid.text
def jaccard_similarity(t1, t2):
def normalize_title(t):
t = unidecode(t.lower())
t = re.sub('[^a-z]', ' ', t)
stopwords = ['hd', 'vs']
for token in t.split():
if token not in stopwords: yield token
s1, s2 = map(lambda x: set(normalize_title(x)), (t1, t2))
j = len(s1 & s2) / len(s1 | s2)
return j
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '320',
for t in spotify_get_playlist_tracks('...'):
res = []
for rank, (url, title) in enumerate(ytsearch(t)):
res.append((-jaccard_similarity(t, title), rank, url, title))
j, rank, url, title = res[0]
if j > -1:
print('WARNING: possible bad match: "{}" <--> "{}"'.format(t, title))
with youtube_dl.YoutubeDL(ydl_opts) as ydl:[url])
