amaaradji/panama_papers_csv.cypher

## panama_papers_csv.cypher
//You can download the Panama Paper CSV files here https://offshoreleaks-data.icij.org/offshoreleaks/csv/csv_panama_papers.2018-02-14.zip
//Unzip and drop the CSV files in the 'import' folder of your Neo4j install folder

//Note that the panama_papers.nodes.address.csv and panama_papers.nodes.officer.csv contain two small errors you need to fix in order to be able to import them
//to fix panama_papers.nodes.address.csv go line 63038 col 89 remove the \ (escape character)
//to fix panama_papers.nodes.officer.csv go line 199625 col 20 remove the \ (escape character)

//It is required to install the APOC plug-in in Neo4j (refer to documentation here https://neo4j.com/labs/apoc/4.1/installation/)

//-----------------import CSV files----------------------
LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.intermediary.csv" AS row
CREATE (n:Intermediary)
SET n=row;

LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.address.csv" AS row
CREATE (n:Address)
SET n=row;

LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.officer.csv" AS row
CREATE (n:Officer)
SET n=row;

LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.entity.csv" AS row
CREATE (n:Entity)
SET n=row;

LOAD CSV WITH HEADERS FROM "file:///panama_papers.edges.csv" AS row
CREATE (n:Edge)
SET n=row;


//--------------create parameterized relationtype from the imported panama_papers.edges.csv
MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
CALL apoc.create.relationship(n1,toUpper(e.TYPE),{sourceID: e.sourceID, end_date: e.end_date, valid_until: e.valid_until, link: e.link, start_date: e.start_date},n2) YIELD rel
Return n1,rel,n2;

// you can delete the Edge nodes

//*****************************************************************************************
//-----------Query Snippets---------------------------------------------------------------
//*****************************************************************************************
CALL db.schema.visualization();
//explore (reconsider) relations
MATCH (n:Edge) RETURN DISTINCT n.link, count(*);
//create new relations
MATCH (n1)-[r:OFFICER_OF {link:'beneficiary, shareholder and director of'}]->(n2)
CREATE (n1)-[b:BENEFICIARY]->(n2) SET b=r
CREATE (n1)-[s:SHAREHOLDER_OF]->(n2) SET s=r
CREATE (n1)-[d:DIRECTOR_OF]->(n2) SET d=r

//explore connection with 2 hops
MATCH(a:Officer),(b:Officer) WHERE a.name CONTAINS 'Smith' AND b.name CONTAINS 'Grant' MATCH p=allShortestPaths((a)-[*..2]-(b)) RETURN p LIMIT 50
//any VIP
MATCH p=(o:Officer)-->(:Address)<--(:Officer)-->(:Entity)<--(:Intermediary)
WHERE o.name CONTAINS 'Gunnlaugsson'
RETURN p LIMIT 100

////**********************************************************************

//DUMP
./neo4j-admin dump --database=panama --to=/home/abdu/Dropbox/Cergy/M1-BDA+/Project-20210215/panama_edge_REL_props.dump

//LOAD
./neo4j-admin load --force --database=panama --from=/home/abdu/neo4j-community-4.2.3/import/panama_edge_REL_props.dump

////**********************************************************************


//selectig random nodes and creating new node
call apoc.cypher.run("MATCH (i:Intermediary) return i.node_id as id, rand() as rand ORDER BY rand ASC Limit 1",{} ) YIELD value as i1
call apoc.cypher.run("MATCH (i:Intermediary) return i.node_id as id, rand() as rand ORDER BY rand ASC Limit 1",{} ) YIELD value as i2
CREATE (e:Edge)
SET e.START_ID= i1.id, e.END_ID = i2.id, e.TYPE = "thisisnewrelation"


MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
CALL apoc.create.relationship(n1,e.TYPE,{},n2) YIELD rel
Return n1,rel,n2

MATCH (i1:Intermediary {node_id:e.START_ID}),(e:Edge),(i2:Intermediary {node_id:e.END_ID})
CALL apoc.create.relationship(i1,e.TYPE,{},i2) YIELD rel
Return i1,rel,i2

MATCH (i1:Intermediary),(e:Edge),(i2:Intermediary)
WHERE i1.node_id=e.START_ID and i2.node_id = e.END_ID
CALL apoc.create.relationship(i1,e.TYPE,{},i2) YIELD rel
Return i1,rel,i2

//it is recommended to use community
//activate apoc by moving lab folder apoc jar file into plugin folder + add dbms.security.procedures.unrestricted=apoc.* to config file
//curate the csv files (removing the escape character \ before " or ,)
//import csv with headers
//add unique contrainst and index


//to explore use :play https://guides.neo4j.com/sandbox/icij-panama-papers/datashape.html
//           and https://guides.neo4j.com/sandbox/icij-panama-papers


MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
CALL apoc.create.relationship(n1,e.TYPE,{sourceID: e.sourceID, end_date: e.end_date, valid_until: e.valid_until, link: e.link, start_date: e.start_date},n2) YIELD rel
Return n1,rel,n2
	//You can download the Panama Paper CSV files here https://offshoreleaks-data.icij.org/offshoreleaks/csv/csv_panama_papers.2018-02-14.zip
	//Unzip and drop the CSV files in the 'import' folder of your Neo4j install folder

	//Note that the panama_papers.nodes.address.csv and panama_papers.nodes.officer.csv contain two small errors you need to fix in order to be able to import them
	//to fix panama_papers.nodes.address.csv go line 63038 col 89 remove the \ (escape character)
	//to fix panama_papers.nodes.officer.csv go line 199625 col 20 remove the \ (escape character)

	//It is required to install the APOC plug-in in Neo4j (refer to documentation here https://neo4j.com/labs/apoc/4.1/installation/)

	//-----------------import CSV files----------------------
	LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.intermediary.csv" AS row
	CREATE (n:Intermediary)
	SET n=row;

	LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.address.csv" AS row
	CREATE (n:Address)
	SET n=row;

	LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.officer.csv" AS row
	CREATE (n:Officer)
	SET n=row;

	LOAD CSV WITH HEADERS FROM "file:///panama_papers.nodes.entity.csv" AS row
	CREATE (n:Entity)
	SET n=row;

	LOAD CSV WITH HEADERS FROM "file:///panama_papers.edges.csv" AS row
	CREATE (n:Edge)
	SET n=row;


	//--------------create parameterized relationtype from the imported panama_papers.edges.csv
	MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
	CALL apoc.create.relationship(n1,toUpper(e.TYPE),{sourceID: e.sourceID, end_date: e.end_date, valid_until: e.valid_until, link: e.link, start_date: e.start_date},n2) YIELD rel
	Return n1,rel,n2;

	// you can delete the Edge nodes

	//*****************************************************************************************
	//-----------Query Snippets---------------------------------------------------------------
	//*****************************************************************************************
	CALL db.schema.visualization();
	//explore (reconsider) relations
	MATCH (n:Edge) RETURN DISTINCT n.link, count(*);
	//create new relations
	MATCH (n1)-[r:OFFICER_OF {link:'beneficiary, shareholder and director of'}]->(n2)
	CREATE (n1)-[b:BENEFICIARY]->(n2) SET b=r
	CREATE (n1)-[s:SHAREHOLDER_OF]->(n2) SET s=r
	CREATE (n1)-[d:DIRECTOR_OF]->(n2) SET d=r

	//explore connection with 2 hops
	MATCH(a:Officer),(b:Officer) WHERE a.name CONTAINS 'Smith' AND b.name CONTAINS 'Grant' MATCH p=allShortestPaths((a)-[*..2]-(b)) RETURN p LIMIT 50
	//any VIP
	MATCH p=(o:Officer)-->(:Address)<--(:Officer)-->(:Entity)<--(:Intermediary)
	WHERE o.name CONTAINS 'Gunnlaugsson'
	RETURN p LIMIT 100

	////**********************************************************************

	//DUMP
	./neo4j-admin dump --database=panama --to=/home/abdu/Dropbox/Cergy/M1-BDA+/Project-20210215/panama_edge_REL_props.dump

	//LOAD
	./neo4j-admin load --force --database=panama --from=/home/abdu/neo4j-community-4.2.3/import/panama_edge_REL_props.dump

	////**********************************************************************



	//selectig random nodes and creating new node
	call apoc.cypher.run("MATCH (i:Intermediary) return i.node_id as id, rand() as rand ORDER BY rand ASC Limit 1",{} ) YIELD value as i1
	call apoc.cypher.run("MATCH (i:Intermediary) return i.node_id as id, rand() as rand ORDER BY rand ASC Limit 1",{} ) YIELD value as i2
	CREATE (e:Edge)
	SET e.START_ID= i1.id, e.END_ID = i2.id, e.TYPE = "thisisnewrelation"





	MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
	CALL apoc.create.relationship(n1,e.TYPE,{},n2) YIELD rel
	Return n1,rel,n2

	MATCH (i1:Intermediary {node_id:e.START_ID}),(e:Edge),(i2:Intermediary {node_id:e.END_ID})
	CALL apoc.create.relationship(i1,e.TYPE,{},i2) YIELD rel
	Return i1,rel,i2

	MATCH (i1:Intermediary),(e:Edge),(i2:Intermediary)
	WHERE i1.node_id=e.START_ID and i2.node_id = e.END_ID
	CALL apoc.create.relationship(i1,e.TYPE,{},i2) YIELD rel
	Return i1,rel,i2

	//it is recommended to use community
	//activate apoc by moving lab folder apoc jar file into plugin folder + add dbms.security.procedures.unrestricted=apoc.* to config file
	//curate the csv files (removing the escape character \ before " or ,)
	//import csv with headers
	//add unique contrainst and index


	//to explore use :play https://guides.neo4j.com/sandbox/icij-panama-papers/datashape.html
	// and https://guides.neo4j.com/sandbox/icij-panama-papers


	MATCH (n1 {node_id:e.START_ID}),(e:Edge),(n2 {node_id:e.END_ID})
	CALL apoc.create.relationship(n1,e.TYPE,{sourceID: e.sourceID, end_date: e.end_date, valid_until: e.valid_until, link: e.link, start_date: e.start_date},n2) YIELD rel
	Return n1,rel,n2