Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active November 28, 2018 10:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rvanbruggen/2cbf833e6313e6db4925b8dd2bc0ff6e to your computer and use it in GitHub Desktop.
Save rvanbruggen/2cbf833e6313e6db4925b8dd2bc0ff6e to your computer and use it in GitHub Desktop.
ICIJ Medical Devices Dataset
//import the 3 csv files
USING PERIODIC COMMIT
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1hNtMWHjrqOiMXU_xN7digO74ruzgCDeC" as line
CREATE (d:Device)
set d = line;
USING PERIODIC COMMIT
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1lrMQNAF9k2ZeJJgfite85cvlHYpC7Ax8" as line
CREATE (e:Event)
set e = line;
USING PERIODIC COMMIT
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1GDv0zV889m6LTZpJWdElNhZ3M_rQycoQ" as line
CREATE (m:Manufacturer)
set m = line;
CREATE index on :Event(id);
CREATE index on :Device(id);
CREATE index on :Manufacturer(id);
CREATE index on :Manufacturer(name);
//connect events to devices
MATCH (e:Event), (d:Device)
WHERE d.id = e.device_id
CREATE (d)-[:RELATED_TO]->(e);
//connect devices to manufacturers
MATCH (d:Device), (m:Manufacturer)
WHERE d.manufacturer_id = m.id
CREATE (m)-[:MANUFACTURES]->(d);
//normalise the countries of events
CREATE index on :Country(name);
MATCH (e:Event)
MERGE (c:Country {name: e.country});
CALL apoc.periodic.iterate("
MATCH (e:Event), (c:Country) WHERE e.country = c.name RETURN e,c", "
MERGE (e)-[:LOCATED_IN]->(c)",
{batchSize:5000, parallel:false});
//normalise the EventTypes of Events
CREATE index on :EventType(name);
MATCH (e:Event)
MERGE (et:EventType {name: e.type});
CALL apoc.periodic.iterate("
MATCH (e:Event), (et:EventType) WHERE e.type = et.name RETURN e, et", "
MERGE (e)-[:OF_TYPE]->(et)",
{batchSize:5000, parallel:false});
//normalise the device classification
MATCH (d:Device)
MERGE (dc:DeviceClassification {name: d.classification});
CREATE index on :DeviceClassification(name);
CALL apoc.periodic.iterate("
MATCH (d:Device), (dc:DeviceClassification) WHERE d.classification = dc.name RETURN d, dc", "
MERGE (d)-[:HAS_CLASSIFICATION]->(dc)",
{batchSize:5000, parallel:false});
//normalise the source or manufacturer info
MATCH (m:Manufacturer)
MERGE (s:Source {name: m.source});
CREATE index on :Source(name);
CALL apoc.periodic.iterate("
MATCH (m:Manufacturer), (s:Source) WHERE m.source = s.name RETURN m, s", "
MERGE (s)-[:IS_SOURCE_FOR]->(m)",
{batchSize:5000, parallel:false});
//normalise the source or Event info
MATCH (e:Event)
MERGE (s:Source {name: e.source});
CALL apoc.periodic.iterate("
MATCH (e:Event), (s:Source) WHERE e.source = s.name RETURN e, s", "
MERGE (s)-[:IS_SOURCE_FOR]->(e)",
{batchSize:5000, parallel:false});
//Normalise the parent companies
Create index on :ParentCompany(name);
MATCH (m:Manufacturer)
WHERE m.parent_company is not null AND m.parent_company <> ""
with m
MERGE (pc:ParentCompany {name: m.parent_company});
CALL apoc.periodic.iterate("
MATCH (pc:ParentCompany), (m:Manufacturer) WHERE pc.name = m.parent_company RETURN pc, m", "
MERGE (pc)-[:PARENT_OF]->(m)",
{batchSize:5000, parallel:false});
//cleanup action classification
CREATE index on :Event(action_classification);
MATCH (e:Event)
WHERE e.action_classification = "I" OR e.action_classification = "Class I"
SET e.action_classification = "Class 1";
MATCH (e:Event)
WHERE e.action_classification = "II" OR e.action_classification = "Class II"
SET e.action_classification = "Class 2";
MATCH (e:Event)
WHERE e.action_classification = "III" OR e.action_classification = "Class III"
SET e.action_classification = "Class 3";
//cleanup device risk class
MATCH (d:Device)
WHERE d.risk_class = "II"
SET d.risk_class = "2";
MATCH (d:Device)
WHERE d.risk_class = "III"
SET d.risk_class = 3;
MATCH (d:Device)
WHERE d.risk_class = "Unclassified"
SET d.risk_class = "Not Classified";
//Parent companies with most events by class
MATCH (pc:ParentCompany)-->(m:Manufacturer)-->(d:Device)-->(e:Event)
RETURN pc.name, e.action_classification, count(e)
ORDER BY count(e) desc
LIMIT 10;
//devices with lots of events
MATCH (d:Device)--(e:Event)
WHERE d.quantity_in_commerce is not null
RETURN d.name, d.quantity_in_commerce, e.action_classification, count(e)
ORDER BY count(e) desc
LIMIT 10;
//the graph of breast implants
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn)
WHERE m.name contains "Allergan" or m.name contains "Mentor"
RETURN path;
//the graph of heart valves
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn)
WHERE m.name contains "Medtronic"
RETURN path;
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn)
WHERE m.name contains "Edwards"
RETURN path;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment