rvanbruggen/billiejean.cql

## billiejean.cql
//create the karaoke graph
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE/export?format=csv&id=1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE&gid=0" as csv
with csv.Songpart as songpart, csv.Songpartsentence as songpartsentence, csv.Songsentence as row
unwind row as text
with songpart, songpartsentence, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized
with songpart, songpartsentence, [w in split(normalized," ") | trim(w)] as words
unwind range(0,size(words)-2) as idx
MERGE (w1:Word {name:words[idx]})
MERGE (w2:Word {name:words[idx+1]})
MERGE (w1)-[r:NEXT {songpart:toInt(songpart), songpartsentence:toInt(songpartsentence)}]->(w2)
  ON CREATE SET r.count = 1 ON MATCH SET r.count = r.count +1

//find the words
match (w:Word) return w.name;

//find the songpart
match (w:Word)-[n:NEXT {songpart:1}]->()
return w,n

//find the sentences
match (start)-[n:NEXT {songpart:5, songpartsentence:1}]->(end)
return start, n, end

//find the central words
//degree centrality
match (n:Word)-[r]-(m:Word)
return n.name, count(r) as degree
order by degree desc;

//betweenness centrality query using UNWIND
MATCH p=allShortestPaths((source:Word)-[*]-(target:Word))
WHERE id(source) < id(target) and length(p) > 1
UNWIND nodes(p)[1..-1] as n
RETURN n.name, count(*) as betweenness
ORDER BY betweenness DESC;

//store betweenness centrality on the nodes
MATCH p=allShortestPaths((source:Word)-[*]-(target:Word))
WHERE id(source) < id(target) and length(p) > 1
UNWIND nodes(p)[1..-1] as n
with n, count(*) as betweenness
set n.betweenness=betweenness;

//query betweenness by property
match (n:Word)
where n.betweenness is not null
return n.name, n.betweenness order by n.betweenness desc
	//create the karaoke graph
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE/export?format=csv&id=1DLu2bl1ZO7Zm8zU1UXNCDZGxsnBkicAJD4J-FSbVXLE&gid=0" as csv
	with csv.Songpart as songpart, csv.Songpartsentence as songpartsentence, csv.Songsentence as row
	unwind row as text
	with songpart, songpartsentence, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] \| replace(t,delim,"")) as normalized
	with songpart, songpartsentence, [w in split(normalized," ") \| trim(w)] as words
	unwind range(0,size(words)-2) as idx
	MERGE (w1:Word {name:words[idx]})
	MERGE (w2:Word {name:words[idx+1]})
	MERGE (w1)-[r:NEXT {songpart:toInt(songpart), songpartsentence:toInt(songpartsentence)}]->(w2)
	ON CREATE SET r.count = 1 ON MATCH SET r.count = r.count +1

	//find the words
	match (w:Word) return w.name;

	//find the songpart
	match (w:Word)-[n:NEXT {songpart:1}]->()
	return w,n

	//find the sentences
	match (start)-[n:NEXT {songpart:5, songpartsentence:1}]->(end)
	return start, n, end

	//find the central words
	//degree centrality
	match (n:Word)-[r]-(m:Word)
	return n.name, count(r) as degree
	order by degree desc;

	//betweenness centrality query using UNWIND
	MATCH p=allShortestPaths((source:Word)-[*]-(target:Word))
	WHERE id(source) < id(target) and length(p) > 1
	UNWIND nodes(p)[1..-1] as n
	RETURN n.name, count(*) as betweenness
	ORDER BY betweenness DESC;

	//store betweenness centrality on the nodes
	MATCH p=allShortestPaths((source:Word)-[*]-(target:Word))
	WHERE id(source) < id(target) and length(p) > 1
	UNWIND nodes(p)[1..-1] as n
	with n, count(*) as betweenness
	set n.betweenness=betweenness;

	//query betweenness by property
	match (n:Word)
	where n.betweenness is not null
	return n.name, n.betweenness order by n.betweenness desc