Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active July 13, 2023 11:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/23a0eccbffa663a9203b8fea5be58468 to your computer and use it in GitHub Desktop.
Save rvanbruggen/23a0eccbffa663a9203b8fea5be58468 to your computer and use it in GitHub Desktop.
Spotify Playlist importer, queries, and dashboard
import spotipy
from neo4j import GraphDatabase
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
# ------------------------------------ Configuration parameters ------------------------------------ #
user_id = "<<YOUR SPOTIFY USER ID>>" # Spotify user ID.
client = "<<YOUR SPOTIFY CLIENT ID>>" # Spotify client ID.
secret = "<<YOUR SPOTIFY CLIENT SECRET>>" # Spotify client secret.
playlist_uri = "spotify:playlist:1eCqsRrwBAFc2lf5ZLGa5m" # public playlist with songs to be sorted.
neo4j_url = "neo4j://localhost:7687" # bolt url of the neo4j database.
neo4j_username = "<<YOUR NEO4J USERNAME>>" # neo4j username. defaults to 'neo4j'.
neo4j_password = "<<YOUR NEO4J DB PASSWORD>>" # neo4j password.
scope = 'playlist-modify-private' # Spotify scope required to manage playlists.
redirect_uri = 'http://localhost:8888/callback' # Spotify callback url. Set to localhost for development.
cache_path = "spotify_cache.tmp" # Where spotify caches the session variables.
create_constraints = True # Whether to create constraints.
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client, client_secret=secret))
# ------------------------------------ Configuration parameters end --------------------------------- #
def load_graph_using_spotify_api():
neo4j = create_neo4j_session(url=neo4j_url, user=neo4j_username, password=neo4j_password)
print("dropping and creating constraints...")
recreate_contraints(neo4j)
print("creating tracks...")
tracks = get_tracks()
tracks = get_track_audio_features(tracks)
neo4j.run("UNWIND $tracks as track CREATE (t:Track{id: track.id}) SET t = track",
parameters={'tracks': list(tracks.values())})
print("creating albums...")
albums = get_album_info(tracks)
neo4j.run("UNWIND $albums as album CREATE (a:Album{id: a.id}) SET a = album",
parameters={'albums': list(albums.values())})
print("creating artists...")
artists = get_artist_info(tracks)
neo4j.run("UNWIND $artists as artist CREATE (a:Artist{id: a.id}) SET a = artist",
parameters={'artists': list(artists.values())})
print("finding related artists..")
related_artists = get_related_artists(artists)
neo4j.run("""UNWIND $relatedartists as artist MERGE (a:Artist {id: artist.id}) SET a = artist """,
parameters={'relatedartists': list(related_artists.values())})
neo4j.run("""MATCH (a:Artist) WHERE EXISTS (a.original_artist) WITH a
MATCH (a2:Artist{id: a.original_artist})
MERGE (a)-[:SPOTIFY_RELATES_TO]->(a2)""")
print("creating genres..")
genres = get_genres(albums, artists)
neo4j.run("UNWIND $genres as genre MERGE (g:Genre{name: genre})",
parameters={'genres': list(genres)})
print("Linking tracks to albums, genres, and artists...")
neo4j.run("MATCH (t:Track), (a:Album{id: t.album}) CREATE (t)-[:TRACK_IN_ALBUM]->(a);")
neo4j.run("MATCH (t:Track) UNWIND t.artists as artist MATCH (a:Artist{id: artist}) CREATE (t)-[:TRACK_HAS_ARTIST]->(a)")
neo4j.run("MATCH (a:Artist) UNWIND a.genres as genre MATCH (g:Genre{name: genre}) CREATE (a)-[:ARTIST_HAS_GENRE]->(g)")
neo4j.run("MATCH (a1:Artist)<--(t:Track)-->(a2:Artist) WHERE id(a1)<id(a2) MERGE (a1)-[:WORKED_WITH {track:t.uri}]->(a2)")
neo4j.run("MATCH (ar:Artist)<--(t:Track)-->(al:Album) MERGE (al)-[:ALBUM_HAS_ARTIST]->(ar)")
print("Calculate artist similarity using GDS..")
neo4j.run("""
MATCH (item:`Artist`)-[:`ARTIST_HAS_GENRE`]->(category:`Genre`)
WITH {item:id(item), categories: collect(distinct id(category))} as userData
WITH collect(userData) as dataset
CALL gds.alpha.similarity.overlap.write({
data: dataset,
weightproperty: null,
nodeProjection: '*',
writeProperty: 'score',
writeRelationshipType: 'GDS_ARTIST_SIMILAR_OVERLAP',
similarityCutoff: 0.05,
degreeCutoff: 0 })
YIELD nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, stdDev, p25, p50, p75, p90, p95, p99, p999, p100
RETURN nodes, similarityPairs, writeRelationshipType, writeProperty, min, max, mean, p95""")
print("Calculate artist pagerank-spotify using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'NATURAL',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-spotify'})
""")
print("Calculate artist pagerank-workedwith using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'WORKED_WITH',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-workedwith'
})
""")
print("Calculate artist pagerank-similarity using GDS..")
neo4j.run("""
CALL gds.pageRank.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'GDS_ARTIST_SIMILAR_OVERLAP',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank-similarity'
})
""")
print("Calculate artist Louvain community using GDS..")
neo4j.run("""
CALL gds.louvain.write({
nodeProjection: 'Artist',
relationshipProjection: {
relType: {
type: 'SPOTIFY_RELATES_TO',
orientation: 'UNDIRECTED',
properties: {}
}
},
relationshipWeightProperty: null,
includeIntermediateCommunities: false,
seedProperty: 'valence',
nodeProperties: [
'valence'
],
writeProperty: 'louvain-community'})
""")
print("Starting the loading!")
def recreate_contraints(neo4j):
# recreate constraints / indices and clear existing database.
results = neo4j.run("CALL db.constraints")
for constraint in results:
result = neo4j.run("DROP " + constraint['description'])
neo4j.run("CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Album) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (a:Artist) ASSERT a.id IS UNIQUE")
neo4j.run("CREATE CONSTRAINT ON (t:Track) ASSERT t.id IS UNIQUE")
neo4j.run("MATCH (n) DETACH DELETE n;")
def get_tracks():
results = spotify.playlist(playlist_uri)['tracks']
items = {}
while results['next'] or results['previous'] is None:
for track in results["items"]:
if track['track']['id']:
track['track']['artists'] = [artist if type(artist) == str else artist['id'] for artist in
track['track']['artists']]
track['track']['album'] = track['track']['album'] if type(track['track']['album']) == str else \
track['track']['album']['id']
items[track['track']['id']] = track['track']
for field in track['track']:
if track is not None and type(track['track'][field]) == dict:
track['track'][field] = None
if not results['next']:
break
results = spotify.next(results)
return items
def get_track_audio_features(tracks, page_size=100):
page_count = len(tracks) / page_size
for i in range(int(page_count) + 1):
ids = list(tracks.keys())[i * page_size:(i + 1) * page_size]
if len(ids) == 0:
break
audio_features = spotify.audio_features(tracks=ids)
for track_features in audio_features:
if track_features is None:
continue
track_id = track_features['id']
for feature, value in track_features.items():
if feature != 'type':
tracks[track_id][feature] = value
return tracks
def get_album_info(tracks, page_size=20):
album_ids = set()
for track_id in tracks.keys():
album_ids.add(tracks[track_id]['album'])
all_albums = {}
page_count = len(album_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(album_ids)[i * page_size:(i + 1) * page_size]
results = spotify.albums(ids)
for album in results['albums']:
album['artists'] = [artist['id'] for artist in album['artists']]
album['images'] = album['images'][1]['url']
album['external_ids'] = None
album['external_urls'] = None
album['tracks'] = len(album['tracks'])
album['copyrights'] = len(album['copyrights'])
all_albums[album['id']] = album
return all_albums
def get_artist_info(items, page_size=50):
all_artists = {}
artist_ids = set()
for track_id in items.keys():
for artist_nr in items[track_id]['artists']:
artist_id = artist_nr
artist_ids.add(artist_id)
# after we have a list of all artists, get the details from the API
page_count = len(artist_ids) / page_size
for i in range(int(page_count) + 1):
ids = list(artist_ids)[i * page_size:(i + 1) * page_size]
results = spotify.artists(ids)
for artist in results['artists']:
if artist["images"]:
artist['images'] = artist['images'][1]['url']
artist['followers'] = artist['followers']['total']
artist['external_urls'] = None
all_artists[artist['id']] = artist
return all_artists
def get_related_artists(items, page_size=50):
page_count = len(items) / page_size
new_artists = {}
for i in range(int(page_count) + 1):
ids = list(items)[i * page_size:(i + 1) * page_size]
for id in ids:
related_artists = spotify.artist_related_artists(id)
for related_artist in related_artists['artists']:
related_artist['original_artist'] = id
if related_artist["images"]:
related_artist['images'] = related_artist['images'][1]['url']
related_artist['followers'] = related_artist['followers']['total']
related_artist['external_urls'] = None
new_artists[related_artist['id']] = related_artist
return new_artists
def get_genres(albums, artists):
genres = set()
for item in albums:
for genre in albums[item]['genres']:
genres.add(genre)
for item in artists:
for genre in artists[item]['genres']:
genres.add(genre)
return genres
def create_neo4j_session(url, user, password):
driver = GraphDatabase.driver(url, auth=(user, password))
return driver.session()
if __name__ == '__main__':
load_graph_using_spotify_api()
print("Done!")
// structure of the graph
match (n)
return "Node" as Type,labels(n) as Name,count(n) as Count
union
match ()-[r]->()
return "Relationship" as Type,type(r) as Name, count(r) as Count
//links between two artists
match (a1:Artist), (a2:Artist),
path = allshortestpaths ((a1)-[*]-(a2))
where toUpper(a1.name) contains "BRUCE"
and toUpper(a2.name) contains "TOM"
return path
limit 10;
//table of interesting artists
match (a:Artist)
return a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity
order by a.`pagerank-spotify` desc
limit 10
//neighborhood of most important artists
match path = ((a:Artist)-[*..2]-(conn))
return path
order by a.`pagerank-spotify` desc
limit 10
//some stats about number of tracks per artist
match (a:Artist)<--(t:Track)
return a.name as Artist, count(t) as NumberOfTracks
order by NumberOfTracks desc
limit 10;
//some stats about number of tracks per album
match (ar:Artist)<--(t:Track)-->(al:Album)-->(a)
return ar.name, al.name, count(t)
order by count(t) desc
limit 10
{
"title": "Welcome to my Spotify Dashboard!",
"version": "1.0",
"editable": true,
"reports": [
{
"title": "Based on the following projects",
"width": 12,
"height": 4,
"type": "text",
"query": "* [Niels' Spofify Playlist Builder](https://nielsdejong.nl/neo4j%20projects/2020/09/23/spotify-playlist-builder.html)\n* [Niels' NeoDash project](https://nielsdejong.nl/neo4j%20projects/2020/11/16/neodash.html) - which enables this page!\n\n\n![Spotify logo](https://developer.spotify.com/assets/branding-guidelines/logo@2x.png)\n\nIn this Dashboard, we will show you how you can take a look at the small Spotify Graph that we created.!\nYou will find all the code on [github of course](https://gist.github.com/rvanbruggen/23a0eccbffa663a9203b8fea5be58468)..\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 1
},
{
"title": "Structure of the graph - queries:",
"width": 12,
"height": 4,
"type": "text",
"query": "Graphically:\n\n```\ncall db.schema.visualizations()\n```\n\nOr as a table:\n```\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n```",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Structure of the graph",
"width": 6,
"height": 4,
"type": "graph",
"query": "call db.schema.visualization()",
"page": 24,
"properties": [
"name",
"name",
"name",
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "Structure of the graph",
"width": 6,
"height": 4,
"type": "table",
"query": "// structure of the graph\nmatch (n)\nreturn \"Node\" as Type,labels(n) as Name,count(n) as Count\nunion\nmatch ()-[r]->()\nreturn \"Relationship\" as Type,type(r) as Name, count(r) as Count\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Cypher queries:",
"width": 12,
"height": 8,
"type": "text",
"query": "#### Links between \"BRUCE\" and \"TOM\":\n```\nmatch (a1:Artist), (a2:Artist),\npath = allshortestpaths ((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10;\n```\n\n#### Table of interesting artists\n```\nmatch (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\n#### Neighborhood of most important artists\n```\nmatch path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10\n```\n\nSee below for all the results!\n",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Links between \"BRUCE\" and \"TOM\"",
"width": 12,
"height": 4,
"type": "graph",
"query": "match (a1:Artist), (a2:Artist), path = allshortestpaths((a1)-[*]-(a2))\nwhere toUpper(a1.name) contains \"BRUCE\"\nand toUpper(a2.name) contains \"TOM\"\nreturn path\nlimit 10",
"page": 38,
"properties": [
"name",
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "Table of Interesting Artists",
"width": 12,
"height": 4,
"type": "table",
"query": "match (a:Artist)\nreturn a.name as ArtistName, a.`pagerank-spotify` as SpotifyPagerank, a.`pagerank-workedwith` as WorkedWithPageRank, a.`pagerank-similarity` as PageRankSimilarity\norder by a.`pagerank-spotify` desc\nlimit 10",
"page": 1,
"properties": [],
"parameters": "",
"refresh": 0
},
{
"title": "Neighborhood of important artists",
"width": 12,
"height": 4,
"type": "graph",
"query": "match path = ((a:Artist)-[*..2]-(conn))\nreturn path\norder by a.`pagerank-spotify` desc\nlimit 10",
"page": 11,
"properties": [
"name"
],
"parameters": "",
"refresh": 0
},
{
"title": "How many songs per Artist?",
"width": 12,
"height": 8,
"type": "bar",
"query": "match (a:Artist)<--(t:Track)\nreturn a.name as Artist, count(t) as NumberOfTracks\norder by NumberOfTracks desc\nlimit 10;\n",
"page": 18,
"properties": [],
"parameters": "{\"x\":\"Artist\",\"y\":10}",
"refresh": 0
},
{
"title": "Number of Songs in an Album",
"width": 12,
"height": 8,
"type": "bar",
"query": "match (ar:Artist)<--(t:Track)-->(al:Album)-->(a)\nreturn ar.name as Artist, al.name as Album, count(t) as NumberOfSongs\norder by count(t) desc\nlimit 10\n",
"page": 7,
"properties": [
"Album",
"NumberOfSongs"
],
"parameters": "{\"x\":\"Album\",\"y\":10}",
"refresh": 0
},
{}
]
}
@peterjohnson427
Copy link

MusConv Tool is a powerful music transfer tool that simplifies the process of moving music playlists and tracks between different streaming services.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment