Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active May 24, 2024 06:16
Show Gist options
  • Save rvanbruggen/896e5c7d599869ad08bf74712ad0100e to your computer and use it in GitHub Desktop.
Save rvanbruggen/896e5c7d599869ad08bf74712ad0100e to your computer and use it in GitHub Desktop.
Demonstration of how to use the Neo4j Graph Database for Data Lineage
//load nodes
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/u/0/d/1eL3IrbzgvZzkNnQUwDCZ1mwVfA6sypZYvDHBoeq48IM/export?format=csv&id=1eL3IrbzgvZzkNnQUwDCZ1mwVfA6sypZYvDHBoeq48IM&gid=0" AS csv
CALL apoc.create.nodes([csv.Label], [{id: csv.ID, name: csv.Name}]) YIELD node
RETURN count(node);
MATCH (n)
SET n:Node;
CREATE INDEX ON :Node(id);
//load rels
load csv with headers from
"https://docs.google.com/spreadsheets/u/0/d/1eL3IrbzgvZzkNnQUwDCZ1mwVfA6sypZYvDHBoeq48IM/export?format=csv&id=1eL3IrbzgvZzkNnQUwDCZ1mwVfA6sypZYvDHBoeq48IM&gid=1634629608" as csv
MATCH (from:Node {id: csv.From}), (to:Node {id: csv.To})
CALL apoc.create.relationship(from, csv.Type,null, to) yield rel
RETURN count(rel);
MATCH (n)
REMOVE n:Node;
DROP INDEX ON :Node(id);
//Data Lineage Queries
//user list of a system
match (s:System)
with s
limit 1
match ((s:System)-[*2..2]-(u:User))
return s.name, u.name
//user graph of a system
match (s:System)
with s
limit 1
match path = ((s:System)-[*2..2]-(u:User))
return path
//link between users
match (u1:User), (u2:User)
where id(u1)<id(u2)
with u1, u2
limit 2
match path = shortestpath( (u1)-[*]-(u2) )
return path
//lineage of a report
MATCH (rep:Report)
with rep LIMIT 1
match path = (source)-[r*..25]->(rep)
where not( ()-->(source) )
return source,rep, path
limit 10
{"name":"Data Lineage perspective","id":"2cb270b0-52ff-11e8-97ab-abfb3b1ef6b8","categories":[{"id":1525811655527,"name":"Log","color":"#f16667","icon":"9B049892-F085-45D3-BD8F-F4DFAAC27C9A","labels":["Log"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655531,"name":"System","color":"#f79868","icon":"F7F33C54-8C86-4117-A0BB-795471C0C1B1","labels":["System"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655534,"name":"User","color":"#ffc554","icon":"DB188874-D25F-4B34-A296-E5E950072319","labels":["User"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655539,"name":"Process","color":"#fee474","icon":"15A6E865-6071-4CDA-9B1E-BA2B7FBD856D","labels":["Process"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655542,"name":"Dataset","color":"#cade71","icon":"6EC9724E-84CD-46C4-B71F-64EB2BC417E8","labels":["Dataset"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655545,"name":"Report","color":"#8dcc93","icon":"B49DB7FB-1218-4E25-AA51-2D94030D8F12","labels":["Report"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":1525811655546,"name":"Database","color":"#60c5bb","icon":"DFDC8C7C-C65E-4957-B80C-89D8753AC4FC","labels":["Database"],"properties":[{"name":"id","exclude":false,"isCaption":false,"dataType":"string"},{"name":"name","exclude":false,"isCaption":true,"dataType":"string"}],"createdAt":"Tue May 08 2018","lastEditedAt":"Fri Oct 26 2018"},{"id":0,"name":"Other","color":"#6B6B6B","icon":"no-icon","labels":[],"properties":[],"caption":[""]}],"palette":{"colors":["#f16667","#f79868","#ffc554","#fee474","#cade71","#8dcc93","#60c5bb","#58c8e3","#9bafda","#c990c0","#DB7295","#efb5c9"],"currentIndex":7},"createdAt":"Tue May 08 2018","lastEditedAt":"Tue May 08 2018","templates":[{"name":"Lineage of a report","id":"tmpl:1525811701398","createdAt":"Tue May 08 2018","text":"Find the source of $report","cypher":"MATCH (rep:Report {name: $report})\nwith rep\nmatch path = (source)-[*]->(rep)\nwhere not( ()-->(source) )\nreturn source,rep, path\nlimit 10","params":[{"name":"$report","suggestionLabel":null,"suggestionProp":null,"cypher":"match (n:Report) return n.name"}],"hasCypherErrors":false},{"name":"User graph of a system","id":"tmpl:1525811898663","createdAt":"Tue May 08 2018","text":"User graph of $system","cypher":"match path = ((s:System {name: $system})-[*2..2]-(u:User))\nreturn path","params":[{"name":"$system","cypher":"match (n:System) return n.name"}]},{"name":"Link between users ","id":"tmpl:1525812037453","createdAt":"Tue May 08 2018","text":"Find links between $user1 and $user2","cypher":"match path = allshortestpaths( (u1:User {name: $user1})-[*]-(u2:User {name: $user2}) )\nreturn path","params":[{"name":"$user1","cypher":"match (n:User) return n.name"},{"name":"$user2","cypher":"match (n:User) return n.name"}]}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment