Created
January 12, 2015 17:45
-
-
Save rvanbruggen/cd9cfe3e31c92bf69a8b to your computer and use it in GitHub Desktop.
Graph Karaoke - Billie Jean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//create the karaoke graph | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE/export?format=csv&id=1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE&gid=0" as csv | |
with csv.Songpart as songpart, csv.Songpartsentence as songpartsentence, csv.Songsentence as row | |
unwind row as text | |
with songpart, songpartsentence, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized | |
with songpart, songpartsentence, [w in split(normalized," ") | trim(w)] as words | |
unwind range(0,size(words)-2) as idx | |
MERGE (w1:Word {name:words[idx]}) | |
MERGE (w2:Word {name:words[idx+1]}) | |
MERGE (w1)-[r:NEXT {songpart:toInt(songpart), songpartsentence:toInt(songpartsentence)}]->(w2) | |
ON CREATE SET r.count = 1 ON MATCH SET r.count = r.count +1 | |
//find the words | |
match (w:Word) return w.name; | |
//find the songpart | |
match (w:Word)-[n:NEXT {songpart:1}]->() | |
return w,n | |
//find the sentences | |
match (start)-[n:NEXT {songpart:5, songpartsentence:1}]->(end) | |
return start, n, end | |
//find the central words | |
//degree centrality | |
match (n:Word)-[r]-(m:Word) | |
return n.name, count(r) as degree | |
order by degree desc; | |
//betweenness centrality query using UNWIND | |
MATCH p=allShortestPaths((source:Word)-[*]-(target:Word)) | |
WHERE id(source) < id(target) and length(p) > 1 | |
UNWIND nodes(p)[1..-1] as n | |
RETURN n.name, count(*) as betweenness | |
ORDER BY betweenness DESC; | |
//store betweenness centrality on the nodes | |
MATCH p=allShortestPaths((source:Word)-[*]-(target:Word)) | |
WHERE id(source) < id(target) and length(p) > 1 | |
UNWIND nodes(p)[1..-1] as n | |
with n, count(*) as betweenness | |
set n.betweenness=betweenness; | |
//query betweenness by property | |
match (n:Word) | |
where n.betweenness is not null | |
return n.name, n.betweenness order by n.betweenness desc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment