Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active August 8, 2017 18:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/0174e5d86cf9553657470e63565dce96 to your computer and use it in GitHub Desktop.
Save rvanbruggen/0174e5d86cf9553657470e63565dce96 to your computer and use it in GitHub Desktop.
Importing and querying the web of Belgian Public companies and their ceo's/chairmen
//Importing from the Google Spreadsheet
//import the Person nodes
load csv with headers from
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=0" as persons
create (n:Node:Person)
set n = persons;
//import the Company nodes
load csv with headers from
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=2040965723" as companies
create (n:Node:Company)
set n = companies;
/////////////////////////////////////////////
//IMPORTING THE RELATIONSHIPS: 2 ALTERNATIVES
/////////////////////////////////////////////
//IF YOU WANT GENERIC RELATIONSHIP TYPE AND JUST WANT TO USE STANDARD CYPHER, USE THIS
//import the relationships
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target})
create (source)-[:RELATED_TO {type: csv.mandate}]->(target);
//IF YOU WANT SPECIFIC RELATIONSHIP TYPES AND HAVE APOCs ENABLED, USE THIS
//specific rels
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target})
CALL apoc.create.relationship(source,csv.mandate,{},target) yield rel
return count(*);
//Colour males and females with new labels
match (n:Person)
where n.gender = "M"
set n:Male;
match (n:Person)
where n.gender = "V"
set n:Female;
//remove the old labels
match (n:Node)
remove n:Node;
//create the INDEXES
create index on :Male(name);
create index on :Female(name);
create index on :Company(name);
//import from original files
//import the Person nodes
load csv with headers from
"http://multimedia.tijd.be/bestuurders/data/nodesdef.csv" as nodes
create (n:Node)
set n = nodes;
//import the relationships
load csv with headers from "http://multimedia.tijd.be/bestuurders/data/linksdef.csv" as rels
match (source:Node {ID: rels.source}), (target:Node {ID: rels.target})
create (source)-[:RELATED_TO {type: rels.mandate}]->(target);
//Queries on the Belgian Public Company Dataset
// degree of the Persons
match (p:Person)
return p.name, size( (p)--() ) as degree
order by degree desc
limit 10
//3-hop network around Luc Bertrand
match path = (m:Male)-[r*..3]-(n)
where m.name contains "Bertrand"
return path
//explore the links between highly connected nodes
//between Philippe Vlerick and Luc Bertrand
match (vlerick:Person {name:"Philippe Vlerick"}), (bertrand:Person {name:"Luc Bertrand"}),
path = allshortestpaths ((vlerick)-[*]-(bertrand))
return path;
//Between Bert De Graeve and Luc Bertrand
match (degraeve:Person {name:"Bert De Graeve"}), (bertrand:Person {name:"Luc Bertrand"}),
path = allshortestpaths ((degraeve)-[*]-(bertrand))
return path;
//Between Bert De Graeve and Frank Donck
match (degraeve:Person {name:"Bert De Graeve"}), (donck:Person {name:"Frank Donck"}),
path = allshortestpaths ((degraeve)-[*]-(donck))
return path;
//explore some links between companies
match (kbc:Company {name:"KBC"}), (li:Company {name:"AB INBEV"}),
path = allshortestpaths ((kbc)-[*]-(li))
return path;
//what is the maximum diameter of the graph
//maximum diameter as text
MATCH (a:Person), (b:Person) WHERE id(a) > id(b)
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b))
with length(p) AS len, extract(x IN nodes(p) | x.name) AS path
ORDER BY len DESC LIMIT 1
return path
//maximum diameter as a graph
MATCH (a:Person), (b:Person) WHERE id(a) > id(b)
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b))
with length(p) AS len, p
ORDER BY len DESC LIMIT 1
return p
//explore some graph algos with APOC
//algos
//betweenness centrality
MATCH (node:Person)
WHERE id(node) %2 = 0
WITH collect(node) AS nodes
CALL apoc.algo.betweenness(['RELATED_TO'],nodes,'BOTH') YIELD node, score
RETURN node.name, score
ORDER BY score DESC
//closeness centrality
MATCH (node:Person)
WHERE id(node) %2 = 0
WITH collect(node) AS nodes
CALL apoc.algo.closeness(['RELATED_TO'],nodes,'INCOMING') YIELD node, score
RETURN node.name, score
ORDER BY score DESC
//pageRank for Companies
MATCH (node:Company)
WHERE id(node) %2 = 0
WITH collect(node) AS nodes
// compute over relationships of all types
CALL apoc.algo.pageRank(nodes) YIELD node, score
RETURN node.name, score
ORDER BY score DESC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment