Skip to content

Instantly share code, notes, and snippets.

@fferri
Created March 14, 2017 20:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fferri/526d85eb23392d6eb29c2485b988726f to your computer and use it in GitHub Desktop.
Save fferri/526d85eb23392d6eb29c2485b988726f to your computer and use it in GitHub Desktop.
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import json
import urllib
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
from unidecode import unidecode
import youtube_dl
def spotify_get_playlist_tracks(uri):
client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
_, _, username, _, playlist_id = uri.split(':')
r = sp.user_playlist(username, playlist_id)
for item in r['tracks']['items']:
track = item['track']
if track['type'] != 'track': continue
t = {
'added_at': item['added_at'],
'artists': [a['name'] for a in track['artists'] if a['type']=='artist'],
'title': track['name']
}
yield ', '.join(t['artists']) + ' - ' + t['title']
def ytsearch(txt):
url = "https://www.youtube.com/results?search_query={}".format(urllib.parse.quote(txt))
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}):
yield 'https://www.youtube.com' + vid['href'], vid.text
def jaccard_similarity(t1, t2):
def normalize_title(t):
t = unidecode(t.lower())
t = re.sub('[^a-z]', ' ', t)
stopwords = ['hd', 'vs']
for token in t.split():
if token not in stopwords: yield token
s1, s2 = map(lambda x: set(normalize_title(x)), (t1, t2))
j = len(s1 & s2) / len(s1 | s2)
return j
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '320',
}]
}
for t in spotify_get_playlist_tracks('...'):
print(t)
res = []
for rank, (url, title) in enumerate(ytsearch(t)):
res.append((-jaccard_similarity(t, title), rank, url, title))
res.sort()
j, rank, url, title = res[0]
if j > -1:
print('WARNING: possible bad match: "{}" <--> "{}"'.format(t, title))
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment