Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
FinCen Queries

ICIJ FinCEN Files in Neo4j

Before you go any further in exploring the data, take a look at these documents to give you more context

and for background reading:


Importing the data

From CSV files - this does not include LOCATION DATA!

If you want to run this, please manually copy the below block into the statement editor above.

create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);

load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_transactions_map.csv" as value
merge (s:Filing {id:value.id}) set s += value;

load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_bank_connections.csv" as value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name, 
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b, 
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code}) on create set c.name = value.entity_b_country
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);

match (f:Filing)
set f.transactions = toInteger(f.number_transactions)
set f.amount = toFloat(f.amount_transactions)
set f.end=date(apoc.temporal.toZonedTemporal(f.end_date,"MMM dd, yyyy"))
set f.begin=date(apoc.temporal.toZonedTemporal(f.begin_date,"MMM dd, yyyy"))

merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)

merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)

merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;

From the JSON files - this does include LOCATION DATA

If you want to run this, please manually copy the below block into the statement editor above.

create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/countries.json") yield value
merge (c:Country {code:value.iso3}) set c.name = value.name, c.tld = value.iso2, c.location = point({latitude:toFloat(value.lat), longitude:toFloat(value.lng)})
with * where not value.exist_transaction is null set c:ExistTransactions;

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-data.json") yield value
merge (s:Filing {id:value.id}) set s += value;

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-details.json") yield value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name, 
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b, 
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code})
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);

match (f:Filing)
set f.end=datetime(f.end_date_format)
set f.begin=datetime(f.begin_date_format)

merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)

merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)

merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;

Then we can start querying the data.


How many entities in which countries

match (e:Entity)--(c:Country)
return distinct c.name, count(e)
order by count(e) desc;

Biggest Filings

MATCH (f:Filing)
RETURN f ORDER BY f.amount DESC LIMIT 10;

Biggest Filing with its participants

MATCH (f:Filing)
WITH f ORDER BY f.amount DESC LIMIT 10
MATCH (f)-[r]-(e:Entity)
RETURN *;

Entities with highest transaction volume

MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[:COUNTRY]-(c:Country)
RETURN e.name, c.name, total;

Entities with highest transaction volume - as a graph

MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[r:COUNTRY]-(c:Country)
RETURN e, r, c;

Money flows between banks - total worth of transactions

MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
WITH source, target, round(sum(f.amount)) as total ORDER BY total DESC LIMIT 10
RETURN source.name, target.name, total;

Money flows between banks - number of transactions

MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
RETURN source.name, target.name, count(*) as NumberOfFilings, round(sum(f.amount)) as total$
ORDER BY NumberOfFilings DESC
LIMIT 10;

Running Graph Data Science queries

Infer new relationship (needed for GDS)

Many of the GDS algorithms require a mono-partite graph - all graphs need to have the same label. Therefore, we want to infer a new relationship type between ENTITY nodes - the TRANSFERRED_TO relationship.

match (from:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(to:Entity)
with from, to, sum(f.amount) as sum
merge (from)-[t:TRANSFERRED_TO]->(to)
set t.amount=sum;

Run Louvain clustering

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {
        amount: {
          property: 'amount',
          defaultValue: 1
        }
      }
    }
  },
  relationshipWeightProperty: 'amount',
  includeIntermediateCommunities: false,
  seedProperty: '',
  writeProperty: 'louvain_cluster'
});
:param communityNodeLimit => ( 10);

CALL gds.louvain.write($config);

And then show the results:

MATCH (node:`Entity`)
WHERE exists(node.`louvain_cluster`)
WITH node, node.`louvain_cluster` AS community
WITH collect(node) AS allNodes,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community[-1] ELSE community END AS community,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community ELSE null END as communities
RETURN community, communities, allNodes[0..$communityNodeLimit] AS nodes, size(allNodes) AS size
ORDER BY size DESC
LIMIT toInteger($limit);

Run Pagerank

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {}
    }
  },
  relationshipWeightProperty: null,
  dampingFactor: 0.85,
  maxIterations: 20,
  writeProperty: 'pagerank'
});
:param communityNodeLimit => ( 10);

CALL gds.pageRank.write($config);

And then show the results

MATCH (node:`Entity`)
WHERE exists(node.`pagerank`)
RETURN node, node.`pagerank` AS score
ORDER BY score DESC
LIMIT toInteger($limit);

Run Betweenness centrality

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {}
    }
  },
  writeProperty: 'betweenness'
});
:param communityNodeLimit => ( 10);

CALL gds.betweenness.write($config);

And then show the results:

MATCH (node:`Entity`)
WHERE exists(node.`betweenness`)
RETURN node, node.`betweenness` AS score
ORDER BY score DESC
LIMIT toInteger($limit);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.