Last active
February 24, 2016 21:52
-
-
Save rvanbruggen/6c6028d34d9a4473acf1 to your computer and use it in GitHub Desktop.
Multilingual graph karaoke - Dansmuziek, Doe Maar!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Dansmuziek, Doe Maar! | |
create index on :SongSentence(seq); | |
//load the sentences | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o/export?format=csv&id=17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o&gid=363092519" as csv | |
create (s:SongSentence {seq: toInt(csv.Sequence)}); | |
match (s1:SongSentence), (s2:SongSentence) | |
where s1.seq=s2.seq-1 | |
merge (s1)-[:PRECEDES]->(s2); | |
//load the sentences in English | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o/export?format=csv&id=17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o&gid=363092519" as csv | |
with csv.Sequence as seq, csv.English as row | |
unwind row as text | |
with seq, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized | |
with seq, [w in split(normalized," ") | trim(w)] as words | |
unwind range(0,size(words)-2) as idx | |
MERGE (w1:English {name:words[idx], seq:toInt(seq), language:"English"}) | |
MERGE (w2:English {name:words[idx+1], seq:toInt(seq), language:"English"}) | |
MERGE (w1)-[r:NEXT {seq:toInt(seq)}]->(w2) | |
//load the sentences in Dutch | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o/export?format=csv&id=17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o&gid=363092519" as csv | |
with csv.Sequence as seq, csv.Dutch as row | |
unwind row as text | |
with seq, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized | |
with seq, [w in split(normalized," ") | trim(w)] as words | |
unwind range(0,size(words)-2) as idx | |
MERGE (w1:Dutch {name:words[idx], seq:toInt(seq), language:"Dutch"}) | |
MERGE (w2:Dutch {name:words[idx+1], seq:toInt(seq), language:"Dutch"}) | |
MERGE (w1)-[r:NEXT {seq:toInt(seq)}]->(w2) | |
//load the sentences in French | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o/export?format=csv&id=17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o&gid=363092519" as csv | |
with csv.Sequence as seq, csv.French as row | |
unwind row as text | |
with seq, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized | |
with seq, [w in split(normalized," ") | trim(w)] as words | |
unwind range(0,size(words)-2) as idx | |
MERGE (w1:French {name:words[idx], seq:toInt(seq), language:"French"}) | |
MERGE (w2:French {name:words[idx+1], seq:toInt(seq), language:"French"}) | |
MERGE (w1)-[r:NEXT {seq:toInt(seq)}]->(w2) | |
//load the sentences in German | |
load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o/export?format=csv&id=17AOq4dQKL-Sl0qxmoevnmEVZO5yAR44lA2nh1x0hE0o&gid=363092519" as csv | |
with csv.Sequence as seq, csv.German as row | |
unwind row as text | |
with seq, reduce(t=tolower(text), delim in [",",".","!","?",'"',":",";","'","-"] | replace(t,delim,"")) as normalized | |
with seq, [w in split(normalized," ") | trim(w)] as words | |
unwind range(0,size(words)-2) as idx | |
MERGE (w1:German {name:words[idx], seq:toInt(seq), language:"French"}) | |
MERGE (w2:German {name:words[idx+1], seq:toInt(seq), language:"French"}) | |
MERGE (w1)-[r:NEXT {seq:toInt(seq)}]->(w2) | |
//connect the sentences | |
match (w)-[:NEXT]->() | |
where not( ()-[:NEXT]->(w) ) | |
with w | |
match (s:SongSentence) | |
where s.seq=w.seq | |
merge (s)-[:STARTS_WITH]->(w); | |
//delete the Songsentences | |
// match (s:Songsentence) | |
// detach delete s; | |
//find the sentences in different languages | |
match p = ((s:SongSentence {seq:1})-[:STARTS_WITH]->()-[:NEXT*]-(w)) | |
where not ((w)-[:NEXT]->()) | |
return p | |
match (w {seq:1}) | |
return w; | |
match (w {seq:4}) | |
where not ("SongSentence"in labels(w)) | |
return w.seq as Sequence, labels(w) as Language, collect(w.name) as Sentence | |
order by Language[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment