Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active January 12, 2022 07:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rvanbruggen/fb27a8dcda5e5be6aad499e370b900a7 to your computer and use it in GitHub Desktop.
Save rvanbruggen/fb27a8dcda5e5be6aad499e370b900a7 to your computer and use it in GitHub Desktop.
HRC email corpus as a graph - with NLP
//setting up the indexes
create constraint on (p:Person)
assert p.alias is unique;
create constraint on (e:Email)
assert e.id is unique;
create index on :Email(subject);
// Creating the graph
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM "https://s3-us-west-2.amazonaws.com/neo4j-datasets-public/Emails-refined.csv" AS line
MERGE (fr:Person {alias: COALESCE(line.MetadataFrom, line.ExtractedFrom, '')})
MERGE (to:Person {alias: COALESCE(line.MetadataTo, line.ExtractedTo, '')})
MERGE (em:Email { id: line.Id })
ON CREATE SET em.foia_doc=line.DocNumber, em.subject=line.MetadataSubject, em.to=line.MetadataTo, em.from=line.MetadataFrom, em.text=line.RawText, em.ex_to=line.ExtractedTo, em.ex_from=line.ExtractedFrom
MERGE (to)<-[:TO]-(em)-[:FROM]->(fr)
MERGE (fr)-[r:HAS_EMAILED]->(to)
ON CREATE SET r.count = 1
ON MATCH SET r.count = r.count +1;
//refactoring email addresses
match (p:Person)
where p.alias contains ".gov"
return p.alias
order by p.alias;
match (p:Person) where p.alias contains ".gov"
set p.email_address = p.alias;
//merge duplicate Person nodes
//template query in the first query
//other queries generated with https://docs.google.com/spreadsheets/d/1W3Co_cGhi6kTSkOUSXUtv9CpdarcafuZGILg-8SNSZk/edit#gid=0
with ["Axelrod, David M", "Axelrod_D"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Berger, Samuel R", "Berger, Samuel", "SBerger"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Campbell, Kurt M", "CampbellKM@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Mills, Cheryl D", "Mills, Cherlyl D", "Cheryl", "Cheryl.mills", "Mills, Cheryl"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Crowley, Philip J", "Crowley, Philip", "CrowleyPJ@state.gov", "crowleypj@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Fuchs, Michael H", "FuchsMH@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Hanley, Monica R", "HanleyMR@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Abedin, Huma", "Huma@clintonemail.com"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Sullivan, Jacob H", "Jake.Sullivan", "Jake.sullivan", "Sullivan, Jake"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Jiloty, Lauren C", "Jiloty, Lauren", "Jjiloty, Lauren C", "JilotyLC@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Kelly, Craig A", "KellyC@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Koh, Harold H", "Koh, Harold Hongju", "KohHH@state.gov", "kohhh@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Lew, Jacob J", "Lew, Jacob", "lewjj@state.gov", "LewJJ@state.gov", "Lew", "jacobjlew"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["McHale, Judith A", "McHale, Judith", "McHaleJA@state.gov", "mchaleja@state.gov", "mhcaleja@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Mikulski, Bam", "Mikulski, Bam (Mitkulski)", "Mikulski, BAM", "bam@mikulski.senate.gov", "BAM@Mikulski.senate.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Muscatine, Lissa", "MuscatineL@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Posner, Michael H", "Posner, Michael"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Rice, Susan E", "Rice, Susan E."] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Rooney, Megan", "RooneyM@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Russo, Robert V", "Russorv@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Schwerin, Daniel B", "SchwerinDB@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Sherman, Wendy R", "Sherman, Wendy"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Slaughter, Anne-Marie", "SlaughterA@state.gov", "slaughtera@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Steinberg, James B", "SteinbergJB@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Stern, Todd D", "Stern, Todd", "Stern, Todd D (S/SECC)", "sterntd@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Tillemann, Tomicah S", "TillemannTS@state.gov", "Tillemann, Tomicah"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Toiv, Nora F", "Toiv, Nora", "toivnf@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Valmoro, Lona", "ValmoroLJ@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Verma, Richard R", "Verma, Richard", "VermaRR@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Verveer, Melanne S", "Verveer, Melanne", "VerveerMS@state.gov", "verveerms@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Carson, Johnnie", "carsonj@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Chollet, Derek H", "cholletdh@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Feltman, Jeffrey D", "feltmanjd@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Podesta, John", "jpodesta"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Marshall, Capricia P", "Marshall, Capricia", "marshallcp@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Reines, Philippe", "reinesp@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Rodriguez, Miguel E", "rodriguezme@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Rooney, Megan", "rooneym@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Shannon, Thomas A", "shannonta@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Shapiro, Andrew J", "shapiroa@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Tauscher, Ellen O", "tauschereo@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Valenzuela, Arturo A", "valenzuelaaa@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Balderston, Kris M", "Balderston, Kris", "BaldersonKM@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Ebeling, Betsy", "Betsy.Ebeling"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Burns, William J", "Burnswj@state.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Blair, Cherie", "CHERIE BLAIR"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Garten, David (Lautenberg)", "David_Garten@lautenberg.senate.gov"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Clinton, Hillary R", "HRC", "Clinton, Hillary"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Hill, Christopher R (Baghdad)", "Hill"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Graham, Lindsey", "LGraham"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Baer, Daniel", "baer.daniel", "daniel.baer"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Marshall, Capricia", "cmarshall", "capriciamarshall"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Pally, Maura M", "ECA:Pally, Maura"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
with ["Talbott, Strobe", "STALBOTT", "stalbott"] as list
match (a:Person)
where a.alias in list
with collect(a) as nodes, list[0] as tobeAlias
call apoc.refactor.mergeNodes(nodes,{
properties:"discard",
mergeRels:true
})
yield node
set node.alias = tobeAlias
return "Nodes merged.";
//add the counts
MATCH (a:Person)-[r]-(b:Email) WITH a, count(r) as count SET a.count = count;
//cleanup mail with faulty alias
match (to:Person)<--(e:Email)-->(from:Person)-->(to)
where to.alias = "Mills, Cheryl D; Toiv, Nora F; Sullivan, Jacob 3; Ross, Alec J; Slaughter, Anne-Marie; ChoIlet, Derek H; Crowley, Philip"
with from, to, e
match (cheryl:Person {alias: "Mills, Cheryl D"})
match (nora:Person {alias: "Toiv, Nora F"})
match (jacob:Person {alias: "Sullivan, Jacob H"})
match (am:Person {alias: "Slaughter, Anne-Marie"})
match (derek:Person {alias:"Chollet, Derek H"})
match (philip:Person {alias:"Crowley, Philip J"})
merge (alec:Person {alias: "Ross, Alec J"})
create (from)-[:HAS_EMAILED]->(cheryl)
create (cheryl)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(nora)
create (nora)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(jacob)
create (jacob)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(alec)
create (alec)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(am)
create (am)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(derek)
create (derek)<-[:TO]-(e)
create (from)-[:HAS_EMAILED]->(philip)
create (philip)<-[:TO]-(e)
detach delete to;
//calculate pagerank
:param label => ('Person');
:param relationshipType => ('HAS_EMAILED');
:param limit => ( 100);
:param config => ({concurrency: 8, direction: 'Outgoing', weightProperty: null, defaultValue: 1, dampingFactor: 0.85, iterations: 25, writeProperty: 'pagerank'});
CALL algo.pageRank($label, $relationshipType, $config);
//query pagerank
MATCH (n:Person) RETURN n.alias, n.pagerank
order by n.pagerank desc;
//calculate betweenness
:param label => ('Person');
:param relationshipType => ('HAS_EMAILED');
:param limit => ( 100);
:param config => ({concurrency: 8, direction: 'Outgoing', writeProperty: 'betweenness'});
CALL algo.betweenness($label, $relationshipType, $config);
//query betweenness
MATCH (n:Person) RETURN n.alias, n.betweenness
order by n.betweenness desc;
//emails sidechannel
match (p1:Person)-[:HAS_EMAILED]-(p:Person)-[:HAS_EMAILED]-(p2:Person)-[:HAS_EMAILED]-(p1)
where p.alias contains "Hillary" and p1 <> p2
with p1, p2
match path = (p1)--(e:Email)--(p2)
where not p1.alias contains "Hillary" and not p2.alias contains "Hillary"
return path
limit 10;
//do fulltext query
//create the fulltext index
CALL db.index.fulltext.createNodeIndex("fullEmails",["Email"],["text","subject"]);
//query the fulltext index
CALL db.index.fulltext.queryNodes("fullEmails", "trump corruption") YIELD node, score
RETURN node.text, score
order by score desc
limit 5;
//do the Natural Language Processing (NLP) with the Graphaware plugins
//create the NLP schema
CALL ga.nlp.createSchema();
//set the default language
CALL ga.nlp.config.setDefaultLanguage('en');
//define my email text analysis pipeline
CALL ga.nlp.processor.addPipeline({textProcessor: 'com.graphaware.nlp.processor.stanford.StanfordTextProcessor', name: 'emailanalyser', processingSteps: {tokenize: true, ner: true, dependency: true}, stopWords: '+,result, all, during',
threadNumber: 20});
//make the pipeline the default
CALL ga.nlp.processor.pipeline.default("emailanalyser");
//text information extraction with annotations
CALL apoc.periodic.iterate(
"MATCH (e:Email)
where not (e)-[:HAS_ANNOTATED_TEXT]->()
return e", "
CALL ga.nlp.annotate({text: left(e.text,10000), id: id(e)})
YIELD result
MERGE (e)-[:HAS_ANNOTATED_TEXT]->(result)
RETURN result", {batchSize:1, iterateList:true});
//keyword extraction from annotated texts
MATCH (a:AnnotatedText)
CALL ga.nlp.ml.textRank({annotatedText: a, stopwords: '+,other,email', useDependencies: true})
YIELD result RETURN result;
//postprocessing of keywords
// Important note: create subsequent indices to optimise the post-process method performance
CREATE INDEX ON :Keyword(numTerms);
CREATE INDEX ON :Keyword(value);
CALL ga.nlp.ml.textRank.postprocess({keywordLabel: "Keyword", method: "subgroups"})
YIELD result
RETURN result;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment