Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Created September 24, 2020 07:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/35e3d1e5279945b7456e5e8411037f67 to your computer and use it in GitHub Desktop.
Save rvanbruggen/35e3d1e5279945b7456e5e8411037f67 to your computer and use it in GitHub Desktop.
FinCen Queries

ICIJ FinCEN Files in Neo4j

Before you go any further in exploring the data, take a look at these documents to give you more context

and for background reading:


Importing the data

From CSV files - this does not include LOCATION DATA!

If you want to run this, please manually copy the below block into the statement editor above.

create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);

load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_transactions_map.csv" as value
merge (s:Filing {id:value.id}) set s += value;

load csv with headers from "https://raw.githubusercontent.com/jexp/fincen/main/download_bank_connections.csv" as value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name, 
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b, 
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code}) on create set c.name = value.entity_b_country
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);

match (f:Filing)
set f.transactions = toInteger(f.number_transactions)
set f.amount = toFloat(f.amount_transactions)
set f.end=date(apoc.temporal.toZonedTemporal(f.end_date,"MMM dd, yyyy"))
set f.begin=date(apoc.temporal.toZonedTemporal(f.begin_date,"MMM dd, yyyy"))

merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)

merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)

merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;

From the JSON files - this does include LOCATION DATA

If you want to run this, please manually copy the below block into the statement editor above.

create constraint on (c:Country) assert c.code is unique;
create constraint on (e:Entity) assert e.id is unique;
create constraint on (f:Filing) assert f.id is unique;
create index on :Filing(icij_sar_id);
create index on :Entity(name);
create index on :Filing(begin);
create index on :Filing(end);
create index on :Filing(amount);
create index on :Country(name);

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/countries.json") yield value
merge (c:Country {code:value.iso3}) set c.name = value.name, c.tld = value.iso2, c.location = point({latitude:toFloat(value.lat), longitude:toFloat(value.lng)})
with * where not value.exist_transaction is null set c:ExistTransactions;

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-data.json") yield value
merge (s:Filing {id:value.id}) set s += value;

call apoc.load.json("https://raw.githubusercontent.com/jexp/fincen/main/sar-details.json") yield value
match (f:Filing {icij_sar_id:value.icij_sar_id})
merge (filer:Entity {id:value.filer_org_name_id}) on create set filer.name = value.filer_org_name, 
filer.location = point({latitude:toFloat(value.filer_org_lat),longitude:toFloat(value.filer_org_lng)})
merge (other:Entity {id:value.entity_b_id}) on create set other.name = value.entity_b, 
other.location = point({latitude:toFloat(value.entity_b_lat),longitude:toFloat(value.entity_b_lng)}),
other.country = value.entity_b_iso_code
merge (c:Country {code:value.entity_b_iso_code})
merge (f)<-[:FILED]-(filer)
merge (f)-[:CONCERNS]->(other)
merge (other)-[:COUNTRY]->(c);

match (f:Filing)
set f.end=datetime(f.end_date_format)
set f.begin=datetime(f.begin_date_format)

merge (ben:Entity {id:f.beneficiary_bank_id})
on create set ben.name = f.beneficiary_bank, ben.location = point({latitude:toFloat(f.beneficiary_lat), longitude:toFloat(f.beneficiary_lng)})
merge (cben:Country {code:f.beneficiary_iso})
merge (ben)-[:COUNTRY]->(cben)
merge (f)-[:BENEFITS]->(ben)

merge (filer:Entity {id:f.filer_org_name_id})
on create set filer.name = f.filer_org_name, filer.location = point({latitude:toFloat(f.filer_org_lat), longitude:toFloat(f.filer_org_lng)})
merge (f)<-[:FILED]-(filer)

merge (org:Entity {id:f.originator_bank_id})
on create set org.name = f.originator_bank, org.location = point({latitude:toFloat(f.origin_lat), longitude:toFloat(f.origin_lng)})
merge (corg:Country {code:f.originator_iso})
merge (org)-[:COUNTRY]->(corg)
merge (f)-[:ORIGINATOR]->(org)
;

Then we can start querying the data.


How many entities in which countries

match (e:Entity)--(c:Country)
return distinct c.name, count(e)
order by count(e) desc;

Biggest Filings

MATCH (f:Filing)
RETURN f ORDER BY f.amount DESC LIMIT 10;

Biggest Filing with its participants

MATCH (f:Filing)
WITH f ORDER BY f.amount DESC LIMIT 10
MATCH (f)-[r]-(e:Entity)
RETURN *;

Entities with highest transaction volume

MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[:COUNTRY]-(c:Country)
RETURN e.name, c.name, total;

Entities with highest transaction volume - as a graph

MATCH (e:Entity)--(f:Filing)
WITH e, round(sum(f.amount)) as total
WITH e, total ORDER BY total DESC LIMIT 10
OPTIONAL MATCH (e)-[r:COUNTRY]-(c:Country)
RETURN e, r, c;

Money flows between banks - total worth of transactions

MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
WITH source, target, round(sum(f.amount)) as total ORDER BY total DESC LIMIT 10
RETURN source.name, target.name, total;

Money flows between banks - number of transactions

MATCH (source:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(target:Entity)
RETURN source.name, target.name, count(*) as NumberOfFilings, round(sum(f.amount)) as total$
ORDER BY NumberOfFilings DESC
LIMIT 10;

Running Graph Data Science queries

Infer new relationship (needed for GDS)

Many of the GDS algorithms require a mono-partite graph - all graphs need to have the same label. Therefore, we want to infer a new relationship type between ENTITY nodes - the TRANSFERRED_TO relationship.

match (from:Entity)<-[:ORIGINATOR]-(f:Filing)-[:BENEFITS]->(to:Entity)
with from, to, sum(f.amount) as sum
merge (from)-[t:TRANSFERRED_TO]->(to)
set t.amount=sum;

Run Louvain clustering

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {
        amount: {
          property: 'amount',
          defaultValue: 1
        }
      }
    }
  },
  relationshipWeightProperty: 'amount',
  includeIntermediateCommunities: false,
  seedProperty: '',
  writeProperty: 'louvain_cluster'
});
:param communityNodeLimit => ( 10);

CALL gds.louvain.write($config);

And then show the results:

MATCH (node:`Entity`)
WHERE exists(node.`louvain_cluster`)
WITH node, node.`louvain_cluster` AS community
WITH collect(node) AS allNodes,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community[-1] ELSE community END AS community,
CASE WHEN apoc.meta.type(community) = "long[]" THEN community ELSE null END as communities
RETURN community, communities, allNodes[0..$communityNodeLimit] AS nodes, size(allNodes) AS size
ORDER BY size DESC
LIMIT toInteger($limit);

Run Pagerank

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {}
    }
  },
  relationshipWeightProperty: null,
  dampingFactor: 0.85,
  maxIterations: 20,
  writeProperty: 'pagerank'
});
:param communityNodeLimit => ( 10);

CALL gds.pageRank.write($config);

And then show the results

MATCH (node:`Entity`)
WHERE exists(node.`pagerank`)
RETURN node, node.`pagerank` AS score
ORDER BY score DESC
LIMIT toInteger($limit);

Run Betweenness centrality

:param limit => ( 1000);
:param config => ({
  nodeProjection: 'Entity',
  relationshipProjection: {
    relType: {
      type: 'TRANSFERRED_TO',
      orientation: 'NATURAL',
      properties: {}
    }
  },
  writeProperty: 'betweenness'
});
:param communityNodeLimit => ( 10);

CALL gds.betweenness.write($config);

And then show the results:

MATCH (node:`Entity`)
WHERE exists(node.`betweenness`)
RETURN node, node.`betweenness` AS score
ORDER BY score DESC
LIMIT toInteger($limit);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment