Before you go any further in exploring the data, take a look at these documents to give you more context
- the ICIJ Investigation ** an explanation of what Suspicious Activity Reports (SARs) actually are
- Michael Hunger's work with the data
- the raw data to download
and for background reading:
- An explanation about what correspiondence banks are
- An illustration of the humongous costs / transaction fees that correspondence banks charge their clients
If you want to run this, please manually copy the below block into the statement editor above.
create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);
load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_transactions_map.csv" as value
merge (s:Filing {id:value.id}) set s += value;
load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_bank_connections.csv" as value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name,
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b,
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code}) on create set c.name = value.entity_b_country
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);
match (f:Filing)
set f.transactions = toInteger(f.number_transactions)
set f.amount = toFloat(f.amount_transactions)
set f.end=date(apoc.temporal.toZonedTemporal(f.end_date,"MMM dd, yyyy"))
set f.begin=date(apoc.temporal.toZonedTemporal(f.begin_date,"MMM dd, yyyy"))
merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)
merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)
merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;
If you want to run this, please manually copy the below block into the statement editor above.
create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);
call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/countries.json") yield value
merge (c:Country {code:value.iso3}) set c.name = value.name, c.tld = value.iso2, c.location = point({latitude:toFloat(value.lat), longitude:toFloat(value.lng)})
with * where not value.exist_transaction is null set c:ExistTransactions;
call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-data.json") yield value
merge (s:Filing {id:value.id}) set s += value;
call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-details.json") yield value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name,
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b,
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code})
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);
match (f:Filing)
set f.end=datetime(f.end_date_format)
set f.begin=datetime(f.begin_date_format)
merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)
merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)
merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;
Then we can start querying the data.
match (e:Entity)--(c:Country)
return distinct c.name, count(e)
order by count(e) desc;
MATCH (f:Filing)
RETURN f ORDER BY f.amount DESC LIMIT 10;
MATCH (f:Filing)
WITH f ORDER BY f.amount DESC LIMIT 10
MATCH (f)-[r]-(e:Entity)
RETURN *;
MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[:COUNTRY]-(c:Country)
RETURN e.name, c.name, total;
MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[r:COUNTRY]-(c:Country)
RETURN e, r, c;
MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
WITH source, target, round(sum(f.amount)) as total ORDER BY total DESC LIMIT 10
RETURN source.name, target.name, total;
MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
RETURN source.name, target.name, count(*) as NumberOfFilings, round(sum(f.amount)) as total$
ORDER BY NumberOfFilings DESC
LIMIT 10;
Many of the GDS algorithms require a mono-partite graph - all graphs need to have the same label. Therefore, we want to infer a new relationship type between ENTITY
nodes - the TRANSFERRED_TO
relationship.
match (from:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(to:Entity)
with from, to, sum(f.amount) as sum
merge (from)-[t:TRANSFERRED_TO]->(to)
set t.amount=sum;
:param limit => ( 1000);
:param config => ({
nodeProjection: 'Entity',
relationshipProjection: {
relType: {
type: 'TRANSFERRED_TO',
orientation: 'NATURAL',
properties: {
amount: {
property: 'amount',
defaultValue: 1
}
}
}
},
relationshipWeightProperty: 'amount',
includeIntermediateCommunities: false,
seedProperty: '',
writeProperty: 'louvain_cluster'
});
:param communityNodeLimit => ( 10);
CALL gds.louvain.write($config);
And then show the results:
MATCH (node:`Entity`)
WHERE exists(node.`louvain_cluster`)
WITH node, node.`louvain_cluster` AS community
WITH collect(node) AS allNodes,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community[-1] ELSE community END AS community,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community ELSE null END as communities
RETURN community, communities, allNodes[0..$communityNodeLimit] AS nodes, size(allNodes) AS size
ORDER BY size DESC
LIMIT toInteger($limit);
:param limit => ( 1000);
:param config => ({
nodeProjection: 'Entity',
relationshipProjection: {
relType: {
type: 'TRANSFERRED_TO',
orientation: 'NATURAL',
properties: {}
}
},
relationshipWeightProperty: null,
dampingFactor: 0.85,
maxIterations: 20,
writeProperty: 'pagerank'
});
:param communityNodeLimit => ( 10);
CALL gds.pageRank.write($config);
And then show the results
MATCH (node:`Entity`)
WHERE exists(node.`pagerank`)
RETURN node, node.`pagerank` AS score
ORDER BY score DESC
LIMIT toInteger($limit);
:param limit => ( 1000);
:param config => ({
nodeProjection: 'Entity',
relationshipProjection: {
relType: {
type: 'TRANSFERRED_TO',
orientation: 'NATURAL',
properties: {}
}
},
writeProperty: 'betweenness'
});
:param communityNodeLimit => ( 10);
CALL gds.betweenness.write($config);
And then show the results:
MATCH (node:`Entity`)
WHERE exists(node.`betweenness`)
RETURN node, node.`betweenness` AS score
ORDER BY score DESC
LIMIT toInteger($limit);