Last active
January 12, 2022 07:27
-
-
Save rvanbruggen/fb27a8dcda5e5be6aad499e370b900a7 to your computer and use it in GitHub Desktop.
HRC email corpus as a graph - with NLP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//setting up the indexes | |
create constraint on (p:Person) | |
assert p.alias is unique; | |
create constraint on (e:Email) | |
assert e.id is unique; | |
create index on :Email(subject); | |
// Creating the graph | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "https://s3-us-west-2.amazonaws.com/neo4j-datasets-public/Emails-refined.csv" AS line | |
MERGE (fr:Person {alias: COALESCE(line.MetadataFrom, line.ExtractedFrom, '')}) | |
MERGE (to:Person {alias: COALESCE(line.MetadataTo, line.ExtractedTo, '')}) | |
MERGE (em:Email { id: line.Id }) | |
ON CREATE SET em.foia_doc=line.DocNumber, em.subject=line.MetadataSubject, em.to=line.MetadataTo, em.from=line.MetadataFrom, em.text=line.RawText, em.ex_to=line.ExtractedTo, em.ex_from=line.ExtractedFrom | |
MERGE (to)<-[:TO]-(em)-[:FROM]->(fr) | |
MERGE (fr)-[r:HAS_EMAILED]->(to) | |
ON CREATE SET r.count = 1 | |
ON MATCH SET r.count = r.count +1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//refactoring email addresses | |
match (p:Person) | |
where p.alias contains ".gov" | |
return p.alias | |
order by p.alias; | |
match (p:Person) where p.alias contains ".gov" | |
set p.email_address = p.alias; | |
//merge duplicate Person nodes | |
//template query in the first query | |
//other queries generated with https://docs.google.com/spreadsheets/d/1W3Co_cGhi6kTSkOUSXUtv9CpdarcafuZGILg-8SNSZk/edit#gid=0 | |
with ["Axelrod, David M", "Axelrod_D"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Berger, Samuel R", "Berger, Samuel", "SBerger"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Campbell, Kurt M", "CampbellKM@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Mills, Cheryl D", "Mills, Cherlyl D", "Cheryl", "Cheryl.mills", "Mills, Cheryl"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Crowley, Philip J", "Crowley, Philip", "CrowleyPJ@state.gov", "crowleypj@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Fuchs, Michael H", "FuchsMH@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Hanley, Monica R", "HanleyMR@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Abedin, Huma", "Huma@clintonemail.com"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Sullivan, Jacob H", "Jake.Sullivan", "Jake.sullivan", "Sullivan, Jake"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Jiloty, Lauren C", "Jiloty, Lauren", "Jjiloty, Lauren C", "JilotyLC@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Kelly, Craig A", "KellyC@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Koh, Harold H", "Koh, Harold Hongju", "KohHH@state.gov", "kohhh@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Lew, Jacob J", "Lew, Jacob", "lewjj@state.gov", "LewJJ@state.gov", "Lew", "jacobjlew"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["McHale, Judith A", "McHale, Judith", "McHaleJA@state.gov", "mchaleja@state.gov", "mhcaleja@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Mikulski, Bam", "Mikulski, Bam (Mitkulski)", "Mikulski, BAM", "bam@mikulski.senate.gov", "BAM@Mikulski.senate.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Muscatine, Lissa", "MuscatineL@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Posner, Michael H", "Posner, Michael"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Rice, Susan E", "Rice, Susan E."] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Rooney, Megan", "RooneyM@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Russo, Robert V", "Russorv@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Schwerin, Daniel B", "SchwerinDB@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Sherman, Wendy R", "Sherman, Wendy"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Slaughter, Anne-Marie", "SlaughterA@state.gov", "slaughtera@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Steinberg, James B", "SteinbergJB@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Stern, Todd D", "Stern, Todd", "Stern, Todd D (S/SECC)", "sterntd@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Tillemann, Tomicah S", "TillemannTS@state.gov", "Tillemann, Tomicah"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Toiv, Nora F", "Toiv, Nora", "toivnf@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Valmoro, Lona", "ValmoroLJ@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Verma, Richard R", "Verma, Richard", "VermaRR@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Verveer, Melanne S", "Verveer, Melanne", "VerveerMS@state.gov", "verveerms@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Carson, Johnnie", "carsonj@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Chollet, Derek H", "cholletdh@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Feltman, Jeffrey D", "feltmanjd@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Podesta, John", "jpodesta"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Marshall, Capricia P", "Marshall, Capricia", "marshallcp@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Reines, Philippe", "reinesp@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Rodriguez, Miguel E", "rodriguezme@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Rooney, Megan", "rooneym@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Shannon, Thomas A", "shannonta@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Shapiro, Andrew J", "shapiroa@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Tauscher, Ellen O", "tauschereo@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Valenzuela, Arturo A", "valenzuelaaa@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Balderston, Kris M", "Balderston, Kris", "BaldersonKM@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Ebeling, Betsy", "Betsy.Ebeling"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Burns, William J", "Burnswj@state.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Blair, Cherie", "CHERIE BLAIR"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Garten, David (Lautenberg)", "David_Garten@lautenberg.senate.gov"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Clinton, Hillary R", "HRC", "Clinton, Hillary"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Hill, Christopher R (Baghdad)", "Hill"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Graham, Lindsey", "LGraham"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Baer, Daniel", "baer.daniel", "daniel.baer"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Marshall, Capricia", "cmarshall", "capriciamarshall"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Pally, Maura M", "ECA:Pally, Maura"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
with ["Talbott, Strobe", "STALBOTT", "stalbott"] as list | |
match (a:Person) | |
where a.alias in list | |
with collect(a) as nodes, list[0] as tobeAlias | |
call apoc.refactor.mergeNodes(nodes,{ | |
properties:"discard", | |
mergeRels:true | |
}) | |
yield node | |
set node.alias = tobeAlias | |
return "Nodes merged."; | |
//add the counts | |
MATCH (a:Person)-[r]-(b:Email) WITH a, count(r) as count SET a.count = count; | |
//cleanup mail with faulty alias | |
match (to:Person)<--(e:Email)-->(from:Person)-->(to) | |
where to.alias = "Mills, Cheryl D; Toiv, Nora F; Sullivan, Jacob 3; Ross, Alec J; Slaughter, Anne-Marie; ChoIlet, Derek H; Crowley, Philip" | |
with from, to, e | |
match (cheryl:Person {alias: "Mills, Cheryl D"}) | |
match (nora:Person {alias: "Toiv, Nora F"}) | |
match (jacob:Person {alias: "Sullivan, Jacob H"}) | |
match (am:Person {alias: "Slaughter, Anne-Marie"}) | |
match (derek:Person {alias:"Chollet, Derek H"}) | |
match (philip:Person {alias:"Crowley, Philip J"}) | |
merge (alec:Person {alias: "Ross, Alec J"}) | |
create (from)-[:HAS_EMAILED]->(cheryl) | |
create (cheryl)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(nora) | |
create (nora)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(jacob) | |
create (jacob)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(alec) | |
create (alec)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(am) | |
create (am)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(derek) | |
create (derek)<-[:TO]-(e) | |
create (from)-[:HAS_EMAILED]->(philip) | |
create (philip)<-[:TO]-(e) | |
detach delete to; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//calculate pagerank | |
:param label => ('Person'); | |
:param relationshipType => ('HAS_EMAILED'); | |
:param limit => ( 100); | |
:param config => ({concurrency: 8, direction: 'Outgoing', weightProperty: null, defaultValue: 1, dampingFactor: 0.85, iterations: 25, writeProperty: 'pagerank'}); | |
CALL algo.pageRank($label, $relationshipType, $config); | |
//query pagerank | |
MATCH (n:Person) RETURN n.alias, n.pagerank | |
order by n.pagerank desc; | |
//calculate betweenness | |
:param label => ('Person'); | |
:param relationshipType => ('HAS_EMAILED'); | |
:param limit => ( 100); | |
:param config => ({concurrency: 8, direction: 'Outgoing', writeProperty: 'betweenness'}); | |
CALL algo.betweenness($label, $relationshipType, $config); | |
//query betweenness | |
MATCH (n:Person) RETURN n.alias, n.betweenness | |
order by n.betweenness desc; | |
//emails sidechannel | |
match (p1:Person)-[:HAS_EMAILED]-(p:Person)-[:HAS_EMAILED]-(p2:Person)-[:HAS_EMAILED]-(p1) | |
where p.alias contains "Hillary" and p1 <> p2 | |
with p1, p2 | |
match path = (p1)--(e:Email)--(p2) | |
where not p1.alias contains "Hillary" and not p2.alias contains "Hillary" | |
return path | |
limit 10; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//do fulltext query | |
//create the fulltext index | |
CALL db.index.fulltext.createNodeIndex("fullEmails",["Email"],["text","subject"]); | |
//query the fulltext index | |
CALL db.index.fulltext.queryNodes("fullEmails", "trump corruption") YIELD node, score | |
RETURN node.text, score | |
order by score desc | |
limit 5; | |
//do the Natural Language Processing (NLP) with the Graphaware plugins | |
//create the NLP schema | |
CALL ga.nlp.createSchema(); | |
//set the default language | |
CALL ga.nlp.config.setDefaultLanguage('en'); | |
//define my email text analysis pipeline | |
CALL ga.nlp.processor.addPipeline({textProcessor: 'com.graphaware.nlp.processor.stanford.StanfordTextProcessor', name: 'emailanalyser', processingSteps: {tokenize: true, ner: true, dependency: true}, stopWords: '+,result, all, during', | |
threadNumber: 20}); | |
//make the pipeline the default | |
CALL ga.nlp.processor.pipeline.default("emailanalyser"); | |
//text information extraction with annotations | |
CALL apoc.periodic.iterate( | |
"MATCH (e:Email) | |
where not (e)-[:HAS_ANNOTATED_TEXT]->() | |
return e", " | |
CALL ga.nlp.annotate({text: left(e.text,10000), id: id(e)}) | |
YIELD result | |
MERGE (e)-[:HAS_ANNOTATED_TEXT]->(result) | |
RETURN result", {batchSize:1, iterateList:true}); | |
//keyword extraction from annotated texts | |
MATCH (a:AnnotatedText) | |
CALL ga.nlp.ml.textRank({annotatedText: a, stopwords: '+,other,email', useDependencies: true}) | |
YIELD result RETURN result; | |
//postprocessing of keywords | |
// Important note: create subsequent indices to optimise the post-process method performance | |
CREATE INDEX ON :Keyword(numTerms); | |
CREATE INDEX ON :Keyword(value); | |
CALL ga.nlp.ml.textRank.postprocess({keywordLabel: "Keyword", method: "subgroups"}) | |
YIELD result | |
RETURN result; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment