Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save alexnb/950b3bf9c335fe264cca6ef05114423c to your computer and use it in GitHub Desktop.
Save alexnb/950b3bf9c335fe264cca6ef05114423c to your computer and use it in GitHub Desktop.
// 1. Add placeholder properties onto non-song entities (needed for FastRP)
MATCH (n)
WHERE n:Artist OR n:Genre OR n:Album
SET n.instrumentalness = 0.0,
n.speechiness = 0.0,
n.tempo = 0.0,
n.danceability = 0.0,
n.acousticness = 0.0,
n.instrumentalness = 0.0,
n.liveness = 0.0,
n.valence = 0.0,
n.loudness = 0.0,
n.energy = 0.0,
n.popularity = 0.0;
// 2. Project graph into session
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
MATCH (source:Song)-[r]->(target)
WITH gds.graph.project('musicgraph', source, target, {
sourceNodeLabels: "Song",
targetNodeLabels: labels(target),
sourceNodeProperties: source {
.instrumentalness, .speechiness, .tempo, .danceability, .acousticness,
.liveness, .durationMs, .valence, .loudness,
.energy, .popularity
},
targetNodeProperties: target {
.instrumentalness, .speechiness, .tempo, .danceability, .acousticness,
.liveness, .valence, .loudness,
.energy, .popularity
},
relationshipProperties: { }
}, {
memory: '4GB',
ttl: duration({minutes: 5})
}) AS g
RETURN g.graphName, g.nodeCount, g.relationshipCount;
// 3. Run FastRP
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
CALL gds.fastRP.mutate('musicgraph', {
randomSeed: 42,
embeddingDimension: 8,
mutateProperty: 'embedding',
iterationWeights: [0.8, 1, 1, 1],
normalizationStrength: 0.5,
featureProperties: [
'instrumentalness','tempo', 'danceability', 'acousticness',
'liveness', 'valence', 'loudness',
'energy', 'popularity'
]
})
YIELD nodePropertiesWritten
RETURN nodePropertiesWritten;
// 4. Filter the graph to only Song nodes with embedding
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
CALL gds.graph.filter(
'musicgraph2', // New graph name
'musicgraph', // Existing in-memory graph
'n:Song', // Node filter: only Song nodes
'*'
)
YIELD graphName, nodeCount, relationshipCount
RETURN graphName, nodeCount, relationshipCount;
// 5. Run KMeans to cluster songs based on embedding.
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
CALL gds.kmeans.write('musicgraph2', {
nodeProperty: 'embedding',
writeProperty: 'cluster',
k: 40
}) YIELD computeMillis
RETURN computeMillis;
// 6. Extract cluster as separate node (playlists)
MATCH (s:Song)
WHERE s.cluster IS NOT NULL
MERGE (c:Playlist {id: s.cluster})
MERGE (s)-[:IN_PLAYLIST]->(c);
// 7. Clean up temporary properties.
MATCH (n)
WHERE n:Artist OR n:Genre OR n:Album
REMOVE n.instrumentalness,
n.speechiness,
n.tempo,
n.danceability,
n.acousticness,
n.liveness,
n.durationMs,
n.key,
n.timeSignature,
n.valence,
n.loudness,
n.energy,
n.embedding,
n.popularity;
// 8. Drop projection
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
CALL gds.graph.drop("musicgraph")
YIELD schema
RETURN *;
WITH gds.aura.api.credentials("aTCwhbP2B7BkZMenb1K2Opunlx0vQYnk", "...") AS credentials
CALL gds.graph.drop("musicgraph2")
YIELD schema
RETURN *;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment