Forked from nielsdejong/music-graph-clustering.cypher
Last active
June 23, 2025 08:25
-
-
Save alexnb/950b3bf9c335fe264cca6ef05114423c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. Add placeholder properties onto non-song entities (needed for FastRP) | |
MATCH (n) | |
WHERE n:Artist OR n:Genre OR n:Album | |
SET n.instrumentalness = 0.0, | |
n.speechiness = 0.0, | |
n.tempo = 0.0, | |
n.danceability = 0.0, | |
n.acousticness = 0.0, | |
n.instrumentalness = 0.0, | |
n.liveness = 0.0, | |
n.valence = 0.0, | |
n.loudness = 0.0, | |
n.energy = 0.0, | |
n.popularity = 0.0; | |
// 2. Project graph into session | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
MATCH (source:Song)-[r]->(target) | |
WITH gds.graph.project('musicgraph', source, target, { | |
sourceNodeLabels: "Song", | |
targetNodeLabels: labels(target), | |
sourceNodeProperties: source { | |
.instrumentalness, .speechiness, .tempo, .danceability, .acousticness, | |
.liveness, .durationMs, .valence, .loudness, | |
.energy, .popularity | |
}, | |
targetNodeProperties: target { | |
.instrumentalness, .speechiness, .tempo, .danceability, .acousticness, | |
.liveness, .valence, .loudness, | |
.energy, .popularity | |
}, | |
relationshipProperties: { } | |
}, { | |
memory: '4GB', | |
ttl: duration({minutes: 5}) | |
}) AS g | |
RETURN g.graphName, g.nodeCount, g.relationshipCount; | |
// 3. Run FastRP | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
CALL gds.fastRP.mutate('musicgraph', { | |
randomSeed: 42, | |
embeddingDimension: 8, | |
mutateProperty: 'embedding', | |
iterationWeights: [0.8, 1, 1, 1], | |
normalizationStrength: 0.5, | |
featureProperties: [ | |
'instrumentalness','tempo', 'danceability', 'acousticness', | |
'liveness', 'valence', 'loudness', | |
'energy', 'popularity' | |
] | |
}) | |
YIELD nodePropertiesWritten | |
RETURN nodePropertiesWritten; | |
// 4. Filter the graph to only Song nodes with embedding | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
CALL gds.graph.filter( | |
'musicgraph2', // New graph name | |
'musicgraph', // Existing in-memory graph | |
'n:Song', // Node filter: only Song nodes | |
'*' | |
) | |
YIELD graphName, nodeCount, relationshipCount | |
RETURN graphName, nodeCount, relationshipCount; | |
// 5. Run KMeans to cluster songs based on embedding. | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
CALL gds.kmeans.write('musicgraph2', { | |
nodeProperty: 'embedding', | |
writeProperty: 'cluster', | |
k: 40 | |
}) YIELD computeMillis | |
RETURN computeMillis; | |
// 6. Extract cluster as separate node (playlists) | |
MATCH (s:Song) | |
WHERE s.cluster IS NOT NULL | |
MERGE (c:Playlist {id: s.cluster}) | |
MERGE (s)-[:IN_PLAYLIST]->(c); | |
// 7. Clean up temporary properties. | |
MATCH (n) | |
WHERE n:Artist OR n:Genre OR n:Album | |
REMOVE n.instrumentalness, | |
n.speechiness, | |
n.tempo, | |
n.danceability, | |
n.acousticness, | |
n.liveness, | |
n.durationMs, | |
n.key, | |
n.timeSignature, | |
n.valence, | |
n.loudness, | |
n.energy, | |
n.embedding, | |
n.popularity; | |
// 8. Drop projection | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
CALL gds.graph.drop("musicgraph") | |
YIELD schema | |
RETURN *; | |
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials | |
CALL gds.graph.drop("musicgraph2") | |
YIELD schema | |
RETURN *; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment