Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active October 16, 2020 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/e78ef1002f2123822c1787c1e697acce to your computer and use it in GitHub Desktop.
Save rvanbruggen/e78ef1002f2123822c1787c1e697acce to your computer and use it in GitHub Desktop.
Spotify Playlist Joy
import spotipy
from neo4j import GraphDatabase
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
# ------------------------------------ Configuration parameters ------------------------------------ #
user_id = "YOUR USER_ID" # Spotify user ID.
client = "YOUR CLIENT" # Spotify client ID.
secret = "YOUR SECRET" # Spotify client secret.
# playlist_uri = "spotify:playlist:1eCqsRrwBAFc2lf5ZLGa5m" # LONG original public playlist with songs to be sorted.
playlist_uri = "spotify:playlist:1BTunw40NV9HgFpLXQ7hpm" # SHORT original public playlist with songs to be sorted.
neo4j_url = "neo4j://localhost:7687" # bolt url of the neo4j database.
neo4j_username = "neo4j" # neo4j username. defaults to 'neo4j'.
neo4j_password = "changeme" # neo4j password.
scope = 'playlist-modify-private' # Spotify scope required to manage playlists.
redirect_uri = 'http://localhost:8888/callback' # Spotify callback url. Set to localhost for development.
cache_path = "spotify_cache.tmp" # Where spotify caches the session variables.
create_constraints = True # Whether to create constraints.
write_to_spotify = False # Whether to write back the generated playlists to spotify.
plot_kmeans_clusters = False # Whether to plot the kmeans clusters used for playlists.
min_playlist_size = 40 # Cut off for playlists to be grouped as 'misc'
playlist_split_limit = 160 # min size for playlists to be chopped up in smaller ones.
playlist_desc = 'Generated using neo4j-playlist-builder.' # Description of the generated playlists.
playlist_keywords_count = 3 # Number of keywords to use in dynamic playlist names.
playlist_prefix = '[NPB]' # Prefix to put in front of your spotify playlists.
filtered_keywords = '"pop", "mellow", "new", "rock", "folk"' # generic keywords to not include in playlist names
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client, client_secret=secret))
def load_graph_using_spotify_api():
neo4j = create_neo4j_session(url=neo4j_url, user=neo4j_username, password=neo4j_password)
print("dropping and creating constraints...")
recreate_contraints(neo4j)
print("creating tracks...")
tracks = get_tracks()
tracks = get_track_audio_features(tracks)
neo4j.run("UNWIND $tracks as track CREATE (t:Track{id: track.id}) SET t = track",
parameters={'tracks': list(tracks.values())})
print("creating albums...")
albums = get_album_info(tracks)
neo4j.run("UNWIND $albums as album CREATE (a:Album{id: a.id}) SET a = album",
parameters={'albums': list(albums.values())})
print("creating artists...")
artists = get_artist_info(tracks)
neo4j.run("UNWIND $artists as artist CREATE (a:Artist{id: a.id}) SET a = artist",
parameters={'artists': list(artists.values())})
print("finding related artists..")
related_artists = get_related_artists(artists)
neo4j.run("""UNWIND $relatedartists as artist MERGE (a:Artist {id: artist.id}) SET a = artist """,
parameters={'relatedartists': list(related_artists.values())})
neo4j.run("""MATCH (a:Artist) WHERE EXISTS (a.original_artist) WITH a
MATCH (a2:Artist{id: a.original_artist})
MERGE (a)-[:SPOTIFY_RELATES_TO]->(a2)""")
print("creating genres..")
genres = get_genres(albums, artists)
neo4j.run("UNWIND $genres as genre MERGE (g:Genre{name: genre})",
parameters={'genres': list(genres)})
print("Linking tracks to albums, genres, and artists...")
neo4j.run("MATCH (t:Track), (a:Album{id: t.album}) CREATE (t)-[:TRACK_IN_ALBUM]->(a);")
neo4j.run("MATCH (t:Track) UNWIND t.artists as artist MATCH (a:Artist{id: artist}) CREATE (t)-[:TRACK_HAS_ARTIST]->(a)")
neo4j.run("MATCH (a:Artist) UNWIND a.genres as genre MATCH (g:Genre{name: genre}) CREATE (a)-[:ARTIST_HAS_GENRE]->(g)")
neo4j.run("MATCH (a1:Artist)<--(t:Track)-->(a2:Artist) WHERE id(a1)<id(a2) MERGE (a1)-[:WORKED_WITH {track:t.uri}]->(a2)")
neo4j.run("MATCH (ar:Artist)<--(t:Track)-->(al:Album) MERGE (al)-[:ALBUM_HAS_ARTIST]->(ar)")
print("Calculate artist similarity using GDS..")
neo4j.run("""
MATCH (item:`Artist`)-[:`ARTIST_HAS_GENRE`]->(category:`Genre`)
WITH {item:id(item), categories: collect(distinct id(category))} as userData
WITH collect(userData) as dataset
CALL gds.alpha.similarity.overlap.write({
data: dataset,
weightproperty: null,
nodeProjection: '*',
writeProperty: 'score',
writeRelationshipType: 'GDS_ARTIST_SIMILAR_OVERLAP',
similarityCutoff: 0.05,
degreeCutoff: 0 })
YIELD nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100
RETURN nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, p95""")
print("Calculate artist pagerank-spotify using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'NATURAL',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-spotify'})
""")
print("Calculate artist pagerank-workedwith using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'WORKED_WITH',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-workedwith'
})
""")
print("Calculate artist pagerank-similarity using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'GDS_ARTIST_SIMILAR_OVERLAP',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-similarity'
})
""")
print("Calculate artist Louvain community using GDS..")
neo4j.run("""
CALL gds.louvain.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
includeIntermediateCommunities: false,
seedProperty: 'valence',
nodeProperties: [
'valence'
],
writeProperty: 'louvain-community'})
""")
print("Done!")
def recreate_contraints(neo4j):
# recreate constraints / indices and clear existing database.
results = neo4j.run("CALL db.constraints")
for constraint in results:
result = neo4j.run("DROP " + constraint['description'])
neo4j.run("CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Album) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Artist) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (t:Track) ASSERT t.id IS UNIQUE")
neo4j.run("MATCH (n) DETACH DELETE n;")
def get_tracks():
results = spotify.playlist(playlist_uri)['tracks']
items = {}
while results['next'] or results['previous'] is None:
for track in results["items"]:
if track['track']['id']:
track['track']['artists'] = [artist if type(artist) == str else artist['id'] for artist in
track['track']['artists']]
track['track']['album'] = track['track']['album'] if type(track['track']['album']) == str else \
track['track']['album']['id']
items[track['track']['id']] = track['track']
for field in track['track']:
if track is not None and type(track['track'][field]) == dict:
track['track'][field] = None
if not results['next']:
break
results = spotify.next(results)
return items
def get_track_audio_features(tracks, page_size=100):
page_count = len(tracks) / page_size
for i in range(int(page_count) + 1):
ids = list(tracks.keys())[i * page_size:(i + 1) * page_size]
if len(ids) == 0:
break
audio_features = spotify.audio_features(tracks=ids)
for track_features in audio_features:
if track_features is None:
continue
track_id = track_features['id']
for feature, value in track_features.items():
if feature != 'type':
tracks[track_id][feature] = value
return tracks
def get_album_info(tracks, page_size=20):
album_ids = set()
for track_id in tracks.keys():
album_ids.add(tracks[track_id]['album'])
all_albums = {}
page_count = len(album_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(album_ids)[i * page_size:(i + 1) * page_size]
results = spotify.albums(ids)
for album in results['albums']:
album['artists'] = [artist['id'] for artist in album['artists']]
album['images'] = album['images'][1]['url']
album['external_ids'] = None
album['external_urls'] = None
album['tracks'] = len(album['tracks'])
album['copyrights'] = len(album['copyrights'])
all_albums[album['id']] = album
return all_albums
def get_artist_info(items, page_size=50):
all_artists = {}
artist_ids = set()
for track_id in items.keys():
for artist_nr in items[track_id]['artists']:
artist_id = artist_nr
artist_ids.add(artist_id)
# after we have a list of all artists, get the details from the API
page_count = len(artist_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(artist_ids)[i * page_size:(i + 1) * page_size]
results = spotify.artists(ids)
for artist in results['artists']:
if artist["images"]:
artist['images'] = artist['images'][1]['url']
artist['followers'] = artist['followers']['total']
artist['external_urls'] = None
all_artists[artist['id']] = artist
return all_artists
def get_related_artists(items, page_size=50):
page_count = len(items) / page_size
new_artists = {}
for i in range(int(page_count) + 1):
ids = list(items)[i * page_size:(i + 1) * page_size]
for id in ids:
related_artists = spotify.artist_related_artists(id)
for related_artist in related_artists['artists']:
related_artist['original_artist'] = id
if related_artist["images"]:
related_artist['images'] = related_artist['images'][1]['url']
related_artist['followers'] = related_artist['followers']['total']
related_artist['external_urls'] = None
new_artists[related_artist['id']] = related_artist
return new_artists
def get_genres(albums, artists):
genres = set()
for item in albums:
for genre in albums[item]['genres']:
genres.add(genre)
for item in artists:
for genre in artists[item]['genres']:
genres.add(genre)
return genres
def create_neo4j_session(url, user, password):
driver = GraphDatabase.driver(url, auth=(user, password))
return driver.session()
if __name__ == '__main__':
load_graph_using_spotify_api()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment