Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active July 13, 2023 11:32
Show Gist options
  • Save rvanbruggen/23a0eccbffa663a9203b8fea5be58468 to your computer and use it in GitHub Desktop.
Save rvanbruggen/23a0eccbffa663a9203b8fea5be58468 to your computer and use it in GitHub Desktop.
Spotify Playlist importer, queries, and dashboard
import spotipy
from neo4j import GraphDatabase
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
# ------------------------------------ Configuration parameters ------------------------------------ #
user_id = "<<YOUR SPOTIFY USER ID>>" # Spotify user ID.
client = "<<YOUR SPOTIFY CLIENT ID>>" # Spotify client ID.
secret = "<<YOUR SPOTIFY CLIENT SECRET>>" # Spotify client secret.
playlist_uri = "spotify:playlist:1eCqsRrwBAFc2lf5ZLGa5m" # public playlist with songs to be sorted.
neo4j_url = "neo4j://localhost:7687" # bolt url of the neo4j database.
neo4j_username = "<<YOUR NEO4J USERNAME>>" # neo4j username. defaults to 'neo4j'.
neo4j_password = "<<YOUR NEO4J DB PASSWORD>>" # neo4j password.
scope = 'playlist-modify-private' # Spotify scope required to manage playlists.
redirect_uri = 'http://localhost:8888/callback' # Spotify callback url. Set to localhost for development.
cache_path = "spotify_cache.tmp" # Where spotify caches the session variables.
create_constraints = True # Whether to create constraints.
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client, client_secret=secret))
# ------------------------------------ Configuration parameters end --------------------------------- #
def load_graph_using_spotify_api():
neo4j = create_neo4j_session(url=neo4j_url, user=neo4j_username, password=neo4j_password)
print("dropping and creating constraints...")
recreate_contraints(neo4j)
print("creating tracks...")
tracks = get_tracks()
tracks = get_track_audio_features(tracks)
neo4j.run("UNWIND $tracks as track CREATE (t:Track{id: track.id}) SET t = track",
parameters={'tracks': list(tracks.values())})
print("creating albums...")
albums = get_album_info(tracks)
neo4j.run("UNWIND $albums as album CREATE (a:Album{id: a.id}) SET a = album",
parameters={'albums': list(albums.values())})
print("creating artists...")
artists = get_artist_info(tracks)
neo4j.run("UNWIND $artists as artist CREATE (a:Artist{id: a.id}) SET a = artist",
parameters={'artists': list(artists.values())})
print("finding related artists..")
related_artists = get_related_artists(artists)
neo4j.run("""UNWIND $relatedartists as artist MERGE (a:Artist {id: artist.id}) SET a = artist """,
parameters={'relatedartists': list(related_artists.values())})
neo4j.run("""MATCH (a:Artist) WHERE EXISTS (a.original_artist) WITH a
MATCH (a2:Artist{id: a.original_artist})
MERGE (a)-[:SPOTIFY_RELATES_TO]->(a2)""")
print("creating genres..")
genres = get_genres(albums, artists)
neo4j.run("UNWIND $genres as genre MERGE (g:Genre{name: genre})",
parameters={'genres': list(genres)})
print("Linking tracks to albums, genres, and artists...")
neo4j.run("MATCH (t:Track), (a:Album{id: t.album}) CREATE (t)-[:TRACK_IN_ALBUM]->(a);")
neo4j.run("MATCH (t:Track) UNWIND t.artists as artist MATCH (a:Artist{id: artist}) CREATE (t)-[:TRACK_HAS_ARTIST]->(a)")
neo4j.run("MATCH (a:Artist) UNWIND a.genres as genre MATCH (g:Genre{name: genre}) CREATE (a)-[:ARTIST_HAS_GENRE]->(g)")
neo4j.run("MATCH (a1:Artist)<--(t:Track)-->(a2:Artist) WHERE id(a1)<id(a2) MERGE (a1)-[:WORKED_WITH {track:t.uri}]->(a2)")
neo4j.run("MATCH (ar:Artist)<--(t:Track)-->(al:Album) MERGE (al)-[:ALBUM_HAS_ARTIST]->(ar)")
print("Calculate artist similarity using GDS..")
neo4j.run("""
MATCH (item:`Artist`)-[:`ARTIST_HAS_GENRE`]->(category:`Genre`)
WITH {item:id(item), categories: collect(distinct id(category))} as userData
WITH collect(userData) as dataset
CALL gds.alpha.similarity.overlap.write({
data: dataset,
weightproperty: null,
nodeProjection: '*',
writeProperty: 'score',
writeRelationshipType: 'GDS_ARTIST_SIMILAR_OVERLAP',
similarityCutoff: 0.05,
degreeCutoff: 0 })
YIELD nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100
RETURN nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, p95""")
print("Calculate artist pagerank-spotify using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'NATURAL',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-spotify'})
""")
print("Calculate artist pagerank-workedwith using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'WORKED_WITH',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-workedwith'
})
""")
print("Calculate artist pagerank-similarity using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'GDS_ARTIST_SIMILAR_OVERLAP',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-similarity'
})
""")
print("Calculate artist Louvain community using GDS..")
neo4j.run("""
CALL gds.louvain.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
includeIntermediateCommunities: false,
seedProperty: 'valence',
nodeProperties: [
'valence'
],
writeProperty: 'louvain-community'})
""")
print("Starting the loading!")
def recreate_contraints(neo4j):
# recreate constraints / indices and clear existing database.
results = neo4j.run("CALL db.constraints")
for constraint in results:
result = neo4j.run("DROP " + constraint['description'])
neo4j.run("CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Album) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Artist) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (t:Track) ASSERT t.id IS UNIQUE")
neo4j.run("MATCH (n) DETACH DELETE n;")
def get_tracks():
results = spotify.playlist(playlist_uri)['tracks']
items = {}
while results['next'] or results['previous'] is None:
for track in results["items"]:
if track['track']['id']:
track['track']['artists'] = [artist if type(artist) == str else artist['id'] for artist in
track['track']['artists']]
track['track']['album'] = track['track']['album'] if type(track['track']['album']) == str else \
track['track']['album']['id']
items[track['track']['id']] = track['track']
for field in track['track']:
if track is not None and type(track['track'][field]) == dict:
track['track'][field] = None
if not results['next']:
break
results = spotify.next(results)
return items
def get_track_audio_features(tracks, page_size=100):
page_count = len(tracks) / page_size
for i in range(int(page_count) + 1):
ids = list(tracks.keys())[i * page_size:(i + 1) * page_size]
if len(ids) == 0:
break
audio_features = spotify.audio_features(tracks=ids)
for track_features in audio_features:
if track_features is None:
continue
track_id = track_features['id']
for feature, value in track_features.items():
if feature != 'type':
tracks[track_id][feature] = value
return tracks
def get_album_info(tracks, page_size=20):
album_ids = set()
for track_id in tracks.keys():
album_ids.add(tracks[track_id]['album'])
all_albums = {}
page_count = len(album_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(album_ids)[i * page_size:(i + 1) * page_size]
results = spotify.albums(ids)
for album in results['albums']:
album['artists'] = [artist['id'] for artist in album['artists']]
album['images'] = album['images'][1]['url']
album['external_ids'] = None
album['external_urls'] = None
album['tracks'] = len(album['tracks'])
album['copyrights'] = len(album['copyrights'])
all_albums[album['id']] = album
return all_albums
def get_artist_info(items, page_size=50):
all_artists = {}
artist_ids = set()
for track_id in items.keys():
for artist_nr in items[track_id]['artists']:
artist_id = artist_nr
artist_ids.add(artist_id)
# after we have a list of all artists, get the details from the API
page_count = len(artist_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(artist_ids)[i * page_size:(i + 1) * page_size]
results = spotify.artists(ids)
for artist in results['artists']:
if artist["images"]:
artist['images'] = artist['images'][1]['url']
artist['followers'] = artist['followers']['total']
artist['external_urls'] = None
all_artists[artist['id']] = artist
return all_artists
def get_related_artists(items, page_size=50):
page_count = len(items) / page_size
new_artists = {}
for i in range(int(page_count) + 1):
ids = list(items)[i * page_size:(i + 1) * page_size]
for id in ids:
related_artists = spotify.artist_related_artists(id)
for related_artist in related_artists['artists']:
related_artist['original_artist'] = id
if related_artist["images"]:
related_artist['images'] = related_artist['images'][1]['url']
related_artist['followers'] = related_artist['followers']['total']
related_artist['external_urls'] = None
new_artists[related_artist['id']] = related_artist
return new_artists
def get_genres(albums, artists):
genres = set()
for item in albums:
for genre in albums[item]['genres']:
genres.add(genre)
for item in artists:
for genre in artists[item]['genres']:
genres.add(genre)
return genres
def create_neo4j_session(url, user, password):
driver = GraphDatabase.driver(url, auth=(user, password))
return driver.session()
if __name__ == '__main__':
load_graph_using_spotify_api()
print("Done!")
// structure of the graph
match (n)
return "Node" as Type,labels(n) as Name,count(n) as Count
union
match ()-[r]->()
return "Relationship" as Type,type(r) as Name, count(r) as Count
//links between two artists
match (a1:Artist), (a2:Artist),
path = allshortestpaths ((a1)-[*]-(a2))
where toUpper(a1.name) contains "BRUCE"
and toUpper(a2.name) contains "TOM"
return path
limit 10;
//table of interesting artists
match (a:Artist)
return a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity
order by a.`pagerank-spotify` desc
limit 10
//neighborhood of most important artists
match path = ((a:Artist)-[*..2]-(conn))
return path
order by a.`pagerank-spotify` desc
limit 10
//some stats about number of tracks per artist
match (a:Artist)<--(t:Track)
return a.name as Artist, count(t) as NumberOfTracks
order by NumberOfTracks desc
limit 10;
//some stats about number of tracks per album
match (ar:Artist)<--(t:Track)-->(al:Album)-->(a)
return ar.name, al.name, count(t)
order by count(t) desc
limit 10
{
"title": "Welcome to my Spotify Dashboard!",
"version": "1.0",
"editable": true,
"reports": [
{
"title": "Based on the following projects",
"width": 12,
"height": 4,
"type": "text",
"query": "* [Niels' Spofify Playlist Builder](https://nielsdejong.nl/neo4j%20projects/2020/09/23/spotify-playlist-builder.html)\n* [Niels' NeoDash project](https://nielsdejong.nl/neo4j%20projects/2020/11/16/neodash.html) - which enables this page!\n\n\n![Spotify logo](https://developer.spotify.com/assets/branding-guidelines/logo@2x.png)\n\nIn this Dashboard, we will show you how you can take a look at the small Spotify Graph that we created.!\nYou will find all the code on [github of course](https://gist.github.com/rvanbruggen/23a0eccbffa663a9203b8fea5be58468)..\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 1
},
{
"title": "Structure of the graph - queries:",
"width": 12,
"height": 4,
"type": "text",
"query": "Graphically:\n\n```\ncall db.schema.visualizations()\n```\n\nOr as a table:\n```\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n```",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Structure of the graph",
"width": 6,
"height": 4,
"type": "graph",
"query": "call db.schema.visualization()",
"page": 24,
"properties": [
"name",
"name",
"name",
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "Structure of the graph",
"width": 6,
"height": 4,
"type": "table",
"query": "// structure of the graph\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Cypher queries:",
"width": 12,
"height": 8,
"type": "text",
"query": "#### Links between \"BRUCE\" and \"TOM\":\n```\nmatch (a1:Artist), (a2:Artist),\npath = allshortestpaths ((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10;\n```\n\n#### Table of interesting artists\n```\nmatch (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\n#### Neighborhood of most important artists\n```\nmatch path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\nSee below for all the results!\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Links between \"BRUCE\" and \"TOM\"",
"width": 12,
"height": 4,
"type": "graph",
"query": "match (a1:Artist), (a2:Artist), path = allshortestpaths((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10",
"page": 38,
"properties": [
"name",
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "Table of Interesting Artists",
"width": 12,
"height": 4,
"type": "table",
"query": "match (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Neighborhood of important artists",
"width": 12,
"height": 4,
"type": "graph",
"query": "match path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10",
"page": 11,
"properties": [
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "How many songs per Artist?",
"width": 12,
"height": 8,
"type": "bar",
"query": "match (a:Artist)<--(t:Track)\nreturn a.name as Artist, count(t) as NumberOfTracks\norder by NumberOfTracks desc\nlimit 10;\n",
"page": 18,
"properties": [],
"parameters": "{\"x\":\"Artist\",\"y\":10}",
"refresh": 0
},
{
"title": "Number of Songs in an Album",
"width": 12,
"height": 8,
"type": "bar",
"query": "match (ar:Artist)<--(t:Track)-->(al:Album)-->(a)\nreturn ar.name as Artist, al.name as Album, count(t) as NumberOfSongs\norder by count(t) desc\nlimit 10\n",
"page": 7,
"properties": [
"Album",
"NumberOfSongs"
],
"parameters": "{\"x\":\"Album\",\"y\":10}",
"refresh": 0
},
{}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment