Importing and querying the web of Belgian Public companies and their ceo's/chairmen
//Importing from the Google Spreadsheet | |
//import the Person nodes | |
load csv with headers from | |
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=0" as persons | |
create (n:Node:Person) | |
set n = persons; | |
//import the Company nodes | |
load csv with headers from | |
"https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=2040965723" as companies | |
create (n:Node:Company) | |
set n = companies; | |
///////////////////////////////////////////// | |
//IMPORTING THE RELATIONSHIPS: 2 ALTERNATIVES | |
///////////////////////////////////////////// | |
//IF YOU WANT GENERIC RELATIONSHIP TYPE AND JUST WANT TO USE STANDARD CYPHER, USE THIS | |
//import the relationships | |
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv | |
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target}) | |
create (source)-[:RELATED_TO {type: csv.mandate}]->(target); | |
//IF YOU WANT SPECIFIC RELATIONSHIP TYPES AND HAVE APOCs ENABLED, USE THIS | |
//specific rels | |
load csv with headers from "https://docs.google.com/spreadsheets/d/1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc/export?format=csv&id=1_X628w_2Lx8ZAIPQQUAGhoDTuf31MRxY821E5D3u2Nc&gid=773066509" as csv | |
match (source:Node {ID: csv.source}), (target:Node {ID: csv.target}) | |
CALL apoc.create.relationship(source,csv.mandate,{},target) yield rel | |
return count(*); | |
//Colour males and females with new labels | |
match (n:Person) | |
where n.gender = "M" | |
set n:Male; | |
match (n:Person) | |
where n.gender = "V" | |
set n:Female; | |
//remove the old labels | |
match (n:Node) | |
remove n:Node; | |
//create the INDEXES | |
create index on :Male(name); | |
create index on :Female(name); | |
create index on :Company(name); |
//import from original files | |
//import the Person nodes | |
load csv with headers from | |
"http://multimedia.tijd.be/bestuurders/data/nodesdef.csv" as nodes | |
create (n:Node) | |
set n = nodes; | |
//import the relationships | |
load csv with headers from "http://multimedia.tijd.be/bestuurders/data/linksdef.csv" as rels | |
match (source:Node {ID: rels.source}), (target:Node {ID: rels.target}) | |
create (source)-[:RELATED_TO {type: rels.mandate}]->(target); |
//Queries on the Belgian Public Company Dataset | |
// degree of the Persons | |
match (p:Person) | |
return p.name, size( (p)--() ) as degree | |
order by degree desc | |
limit 10 | |
//3-hop network around Luc Bertrand | |
match path = (m:Male)-[r*..3]-(n) | |
where m.name contains "Bertrand" | |
return path | |
//explore the links between highly connected nodes | |
//between Philippe Vlerick and Luc Bertrand | |
match (vlerick:Person {name:"Philippe Vlerick"}), (bertrand:Person {name:"Luc Bertrand"}), | |
path = allshortestpaths ((vlerick)-[*]-(bertrand)) | |
return path; | |
//Between Bert De Graeve and Luc Bertrand | |
match (degraeve:Person {name:"Bert De Graeve"}), (bertrand:Person {name:"Luc Bertrand"}), | |
path = allshortestpaths ((degraeve)-[*]-(bertrand)) | |
return path; | |
//Between Bert De Graeve and Frank Donck | |
match (degraeve:Person {name:"Bert De Graeve"}), (donck:Person {name:"Frank Donck"}), | |
path = allshortestpaths ((degraeve)-[*]-(donck)) | |
return path; | |
//explore some links between companies | |
match (kbc:Company {name:"KBC"}), (li:Company {name:"AB INBEV"}), | |
path = allshortestpaths ((kbc)-[*]-(li)) | |
return path; | |
//what is the maximum diameter of the graph | |
//maximum diameter as text | |
MATCH (a:Person), (b:Person) WHERE id(a) > id(b) | |
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b)) | |
with length(p) AS len, extract(x IN nodes(p) | x.name) AS path | |
ORDER BY len DESC LIMIT 1 | |
return path | |
//maximum diameter as a graph | |
MATCH (a:Person), (b:Person) WHERE id(a) > id(b) | |
MATCH p=shortestPath((a)-[:RELATED_TO*]-(b)) | |
with length(p) AS len, p | |
ORDER BY len DESC LIMIT 1 | |
return p | |
//explore some graph algos with APOC | |
//algos | |
//betweenness centrality | |
MATCH (node:Person) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
CALL apoc.algo.betweenness(['RELATED_TO'],nodes,'BOTH') YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC | |
//closeness centrality | |
MATCH (node:Person) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
CALL apoc.algo.closeness(['RELATED_TO'],nodes,'INCOMING') YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC | |
//pageRank for Companies | |
MATCH (node:Company) | |
WHERE id(node) %2 = 0 | |
WITH collect(node) AS nodes | |
// compute over relationships of all types | |
CALL apoc.algo.pageRank(nodes) YIELD node, score | |
RETURN node.name, score | |
ORDER BY score DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment