Last active
November 28, 2018 10:48
-
-
Save rvanbruggen/2cbf833e6313e6db4925b8dd2bc0ff6e to your computer and use it in GitHub Desktop.
ICIJ Medical Devices Dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//import the 3 csv files | |
USING PERIODIC COMMIT | |
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1hNtMWHjrqOiMXU_xN7digO74ruzgCDeC" as line | |
CREATE (d:Device) | |
set d = line; | |
USING PERIODIC COMMIT | |
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1lrMQNAF9k2ZeJJgfite85cvlHYpC7Ax8" as line | |
CREATE (e:Event) | |
set e = line; | |
USING PERIODIC COMMIT | |
LOAD CSV with headers from "https://docs.google.com/uc?export=download&id=1GDv0zV889m6LTZpJWdElNhZ3M_rQycoQ" as line | |
CREATE (m:Manufacturer) | |
set m = line; | |
CREATE index on :Event(id); | |
CREATE index on :Device(id); | |
CREATE index on :Manufacturer(id); | |
CREATE index on :Manufacturer(name); | |
//connect events to devices | |
MATCH (e:Event), (d:Device) | |
WHERE d.id = e.device_id | |
CREATE (d)-[:RELATED_TO]->(e); | |
//connect devices to manufacturers | |
MATCH (d:Device), (m:Manufacturer) | |
WHERE d.manufacturer_id = m.id | |
CREATE (m)-[:MANUFACTURES]->(d); | |
//normalise the countries of events | |
CREATE index on :Country(name); | |
MATCH (e:Event) | |
MERGE (c:Country {name: e.country}); | |
CALL apoc.periodic.iterate(" | |
MATCH (e:Event), (c:Country) WHERE e.country = c.name RETURN e,c", " | |
MERGE (e)-[:LOCATED_IN]->(c)", | |
{batchSize:5000, parallel:false}); | |
//normalise the EventTypes of Events | |
CREATE index on :EventType(name); | |
MATCH (e:Event) | |
MERGE (et:EventType {name: e.type}); | |
CALL apoc.periodic.iterate(" | |
MATCH (e:Event), (et:EventType) WHERE e.type = et.name RETURN e, et", " | |
MERGE (e)-[:OF_TYPE]->(et)", | |
{batchSize:5000, parallel:false}); | |
//normalise the device classification | |
MATCH (d:Device) | |
MERGE (dc:DeviceClassification {name: d.classification}); | |
CREATE index on :DeviceClassification(name); | |
CALL apoc.periodic.iterate(" | |
MATCH (d:Device), (dc:DeviceClassification) WHERE d.classification = dc.name RETURN d, dc", " | |
MERGE (d)-[:HAS_CLASSIFICATION]->(dc)", | |
{batchSize:5000, parallel:false}); | |
//normalise the source or manufacturer info | |
MATCH (m:Manufacturer) | |
MERGE (s:Source {name: m.source}); | |
CREATE index on :Source(name); | |
CALL apoc.periodic.iterate(" | |
MATCH (m:Manufacturer), (s:Source) WHERE m.source = s.name RETURN m, s", " | |
MERGE (s)-[:IS_SOURCE_FOR]->(m)", | |
{batchSize:5000, parallel:false}); | |
//normalise the source or Event info | |
MATCH (e:Event) | |
MERGE (s:Source {name: e.source}); | |
CALL apoc.periodic.iterate(" | |
MATCH (e:Event), (s:Source) WHERE e.source = s.name RETURN e, s", " | |
MERGE (s)-[:IS_SOURCE_FOR]->(e)", | |
{batchSize:5000, parallel:false}); | |
//Normalise the parent companies | |
Create index on :ParentCompany(name); | |
MATCH (m:Manufacturer) | |
WHERE m.parent_company is not null AND m.parent_company <> "" | |
with m | |
MERGE (pc:ParentCompany {name: m.parent_company}); | |
CALL apoc.periodic.iterate(" | |
MATCH (pc:ParentCompany), (m:Manufacturer) WHERE pc.name = m.parent_company RETURN pc, m", " | |
MERGE (pc)-[:PARENT_OF]->(m)", | |
{batchSize:5000, parallel:false}); | |
//cleanup action classification | |
CREATE index on :Event(action_classification); | |
MATCH (e:Event) | |
WHERE e.action_classification = "I" OR e.action_classification = "Class I" | |
SET e.action_classification = "Class 1"; | |
MATCH (e:Event) | |
WHERE e.action_classification = "II" OR e.action_classification = "Class II" | |
SET e.action_classification = "Class 2"; | |
MATCH (e:Event) | |
WHERE e.action_classification = "III" OR e.action_classification = "Class III" | |
SET e.action_classification = "Class 3"; | |
//cleanup device risk class | |
MATCH (d:Device) | |
WHERE d.risk_class = "II" | |
SET d.risk_class = "2"; | |
MATCH (d:Device) | |
WHERE d.risk_class = "III" | |
SET d.risk_class = 3; | |
MATCH (d:Device) | |
WHERE d.risk_class = "Unclassified" | |
SET d.risk_class = "Not Classified"; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Parent companies with most events by class | |
MATCH (pc:ParentCompany)-->(m:Manufacturer)-->(d:Device)-->(e:Event) | |
RETURN pc.name, e.action_classification, count(e) | |
ORDER BY count(e) desc | |
LIMIT 10; | |
//devices with lots of events | |
MATCH (d:Device)--(e:Event) | |
WHERE d.quantity_in_commerce is not null | |
RETURN d.name, d.quantity_in_commerce, e.action_classification, count(e) | |
ORDER BY count(e) desc | |
LIMIT 10; | |
//the graph of breast implants | |
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn) | |
WHERE m.name contains "Allergan" or m.name contains "Mentor" | |
RETURN path; | |
//the graph of heart valves | |
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn) | |
WHERE m.name contains "Medtronic" | |
RETURN path; | |
MATCH path = (m:Manufacturer)--(d:Device)--(e:Event)--(conn) | |
WHERE m.name contains "Edwards" | |
RETURN path; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment