Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active October 14, 2022 07:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save rvanbruggen/8188af225bb7eccd3e92360a55c3e312 to your computer and use it in GitHub Desktop.
Save rvanbruggen/8188af225bb7eccd3e92360a55c3e312 to your computer and use it in GitHub Desktop.

How to convert a contact tracing graph into a recommendation graph

Creating the contact tracing graph with faker

Using the Neo4j plugin that allows for creating fake data with the "faker library": plugin over here

Persons using faker

foreach (i in range(1,5000) |
    create (p:Person { id : i })
    set p += fkr.person('1940-01-01','2020-05-15')
    set p.healthstatus = fkr.stringElement("Sick,Healthy")
    set p.confirmedtime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
    set p.birthDate = datetime(p.birthDate)
    set p.addresslocation = point({x: toFloat(51.210197+rand()/100), y: toFloat(4.402771+rand()/100)})
    set p.name = p.fullName
    remove p.fullName
);

Places using faker

foreach (i in range(1,100) |
    create (p:Place { id: i, name: "Place nr "+i})
    set p.type = fkr.stringElement("Grocery shop,Theater,Restaurant,School,Hospital,Mall,Bar,Park")
    set p.location = point({x: toFloat(51.210197+rand()/100), y: toFloat(4.402771+rand()/100)})
);

Adding indexes

create index on :Place(id);
create index on :Place(location);
create index on :Place(name);
create index on :Person(id);
create index on :Person(name);
create index on :Person(healthstatus);
create index on :Person(confirmedtime);

Adding the visits

with range(1,5000) as range
unwind range as iteration
match (p:Person {id: toInteger(rand()*500)+1}), (pl:Place {id:toInteger(rand()*100)+1 })
    create (p)-[:PERFORMS_VISIT]->(v:Visit { id: iteration})-[:LOCATED_AT]->(pl)
    create (p)-[virel:VISITS]->(pl)
    set v.starttime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
    set virel.starttime = v.starttime
    set v.endtime = v.starttime + duration("PT"+toInteger(round(rand()*10))+"H"+toInteger(round(rand()*60))+"M")
    set virel.endtime = v.endtime;

Transform contacttracinggraph into productrecommender graph

Make purchases out of visits

match (vi:Visit)
set vi:Purchase
remove vi:Visit
remove vi.duration
remove vi.endtime
set vi.purchasetime = vi.starttime
remove vi.starttime;

Make products out of Places

match (pl:Place)
set pl:Product
remove pl:Place
set pl.name = replace(pl.name, 'Place', 'Product')
remove pl.location
remove pl.type;

Replace the VISITS relationship with a PURCHASES relationship

match (n)-[v:VISITS]->(m)
create (n)-[p:PURCHASES]->(m)
set p.purchasetime = v.starttime
delete v;

Replace the LOCATED_AT relationship with a OF_PRODUCT relationship

match (n)-[la:LOCATED_AT]->(m)
create (n)-[:OF_PRODUCT]->(m)
delete la;

Replace the PERFORMS_VISIT relationship with a PERFORMS_PURCHASE relationship

match (n)-[pv:PERFORMS_VISIT]->(m)
create (n)-[:PERFORMS_PURCHASE]->(m)
delete pv;

Replace the indexes

drop index on :Place(id);
drop index on :Place(name);
drop index on :Place(location);
create index on :Product(id);
create index on :Product(name);

Some example queries on Product Recommendation graph

Query 1:

Person 1 and person2 have 1 product purchase in common. person 2 has bought something that person 1 has not bought (yet)

match path = (p1:Person)-[:PURCHASES]->(pr1:Product)<-[:PURCHASES]-(p2:Person)-[:PURCHASES]->(pr2:Product)
where not exists( (p1)-[:PURCHASES]->(pr2) )
return path
limit 10;

Query 2:

Person 1 and person 2 have purchased 2 products in common - but there’s a 3rd product that person 2 has bought and person 1 has not bought (yet)

match path1 = (p1:Person)-[:PURCHASES]->(pr1:Product)<-[:PURCHASES]-(p2:Person)-[:PURCHASES]->(pr2:Product)<-[:PURCHASES]-(p1),
path2 = (p2)-[:PURCHASES]->(pr3:Product)
where not exists ((p1)-[:PURCHASES]->(pr3))
return path1, path2
limit 10;
//Create the basic contact tracing graph
//Persons using faker
foreach (i in range(1,5000) |
create (p:Person { id : i })
set p += fkr.person('1940-01-01','2020-05-15')
set p.healthstatus = fkr.stringElement("Sick,Healthy")
set p.confirmedtime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
set p.birthDate = datetime(p.birthDate)
set p.addresslocation = point({x: toFloat(51.210197+rand()/100), y: toFloat(4.402771+rand()/100)})
set p.name = p.fullName
remove p.fullName
);
//Places using faker
foreach (i in range(1,100) |
create (p:Place { id: i, name: "Place nr "+i})
set p.type = fkr.stringElement("Grocery shop,Theater,Restaurant,School,Hospital,Mall,Bar,Park")
set p.location = point({x: toFloat(51.210197+rand()/100), y: toFloat(4.402771+rand()/100)})
);
create index on :Place(id);
create index on :Place(location);
create index on :Place(name);
create index on :Person(id);
create index on :Person(name);
create index on :Person(healthstatus);
create index on :Person(confirmedtime);
//VISITS using cypher
with range(1,5000) as range
unwind range as iteration
match (p:Person {id: toInteger(rand()*500)+1}), (pl:Place {id:toInteger(rand()*100)+1 })
create (p)-[:PERFORMS_VISIT]->(v:Visit { id: iteration})-[:LOCATED_AT]->(pl)
create (p)-[virel:VISITS]->(pl)
set v.starttime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
set virel.starttime = v.starttime
set v.endtime = v.starttime + duration("PT"+toInteger(round(rand()*10))+"H"+toInteger(round(rand()*60))+"M")
set virel.endtime = v.endtime;
//transform contacttracinggraph into productrecommender graph
//Make purchases out of visits
match (vi:Visit)
set vi:Purchase
remove vi:Visit
remove vi.duration
remove vi.endtime
set vi.purchasetime = vi.starttime
remove vi.starttime;
//Make products out of Places
match (pl:Place)
set pl:Product
remove pl:Place
set pl.name = replace(pl.name, 'Place', 'Product')
remove pl.location
remove pl.type;
//remove the MEETS relationship
match (p1:Person)-[m:MEETS]-(p2:Person)
delete m;
//replace the VISITS relationship with a PURCHASES relationship
match (n)-[v:VISITS]->(m)
create (n)-[p:PURCHASES]->(m)
set p.purchasetime = v.starttime
delete v;
//replace the LOCATED_AT relationship with a OF_PRODUCT relationship
match (n)-[la:LOCATED_AT]->(m)
create (n)-[:OF_PRODUCT]->(m)
delete la;
//replace the PERFORMS_VISIT relationship with a PERFORMS_PURCHASE relationship
match (n)-[pv:PERFORMS_VISIT]->(m)
create (n)-[:PERFORMS_PURCHASE]->(m)
delete pv;
//replace the indexes
drop index on :Place(id);
drop index on :Place(name);
drop index on :Place(location);
create index on :Product(id);
create index on :Product(name);
//example queries on Product Recommendation graph
//person 1 and person2 have 1 product purchase in common. person 2 has bought something that person 1 has not bought (yet)
match path = (p1:Person)-[:PURCHASES]->(pr1:Product)<-[:PURCHASES]-(p2:Person)-[:PURCHASES]->(pr2:Product)
where not exists( (p1)-[:PURCHASES]->(pr2) )
return path
limit 10;
//person 1 and person 2 have purchased 2 products in common - but there's a 3rd product that person 2 has bought and person 1 has not bought (yet)
match path1 = (p1:Person)-[:PURCHASES]->(pr1:Product)<-[:PURCHASES]-(p2:Person)-[:PURCHASES]->(pr2:Product)<-[:PURCHASES]-(p1),
path2 = (p2)-[:PURCHASES]->(pr3:Product)
where not exists ((p1)-[:PURCHASES]->(pr3))
return path1, path2
limit 10;
//transform contacttracinggraph into VAT fraud graph
//start with fakercontacttracinggraph
//Create 5000 Persons using faker
foreach (i in range(1,5000) |
create (p:Person { id : i })
set p += fkr.person('1940-01-01','2020-05-15')
set p.healthstatus = fkr.stringElement("Sick,Healthy")
set p.confirmedtime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
set p.birthDate = datetime(p.birthDate)
set p.addresslocation = point({x: toFloat(51.210197+rand()/100), y: toFloat(4.402771+rand()/100)})
set p.name = p.fullName
remove p.fullName
);
//Create 15000 MEETS relationships using faker
match (p:Person)
with collect(p) as persons
call fkr.createRelations(persons, "MEETS" , persons, "1-n") yield relationships as meetsRelations1
call fkr.createRelations(persons, "MEETS" , persons, "1-n") yield relationships as meetsRelations2
call fkr.createRelations(persons, "MEETS" , persons, "1-n") yield relationships as meetsRelations3
with meetsRelations1+meetsRelations2+meetsRelations3 as meetsRelations
unwind meetsRelations as meetsRelation
set meetsRelation.starttime = datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")
set meetsRelation.endtime = meetsRelation.starttime + duration("PT"+toInteger(round(rand()*10))+"H"+toInteger(round(rand()*60))+"M")
set meetsRelation.meettime = duration.between(meetsRelation.starttime,meetsRelation.endtime)
set meetsRelation.meettimeinseconds=meetsRelation.meettime.seconds;
//set the indexes
create index on :Person(name);
create index on :Company(vatnumber);
create index on :Company(name);
create index on :Invoice(amount);
create index on :Invoice(invoicedatettime);
//Make Companies out of Persons
match (p:Person)
set p:Company
remove p:Person
set p.name = toUpper(p.name)+', '+fkr.stringElement("Ltd.,Inc.")
set p.vatnumber = fkr.stringElement("BE,NL,SE,GB,FR,DE")+' '+fkr.code(' #### #### ####')
remove p.confirmedtime
remove p.healthstatus;
//Make Invoices out of meetings
match (c1:Company)-[m:MEETS]->(c2:Company)
create (c1)-[invrel:INVOICES {amount: toInteger(round(rand()*10000)+1), invoicedatettime: datetime()-duration("P"+toInteger(round(rand()*100))+"DT"+toInteger(round(rand()*10))+"H")}]->(c2)
create (c1)-[:SENDS_INVOICE]->(invnode:Invoice)<-[:RECEIVES_INVOICE]-(c2)
set invnode = invrel
delete m;
//remove Person index
drop index on :Person(name);
//find rings
//simplest ring - 2 hops
match path = (c:Company)-[:INVOICES]->(c1)-[:INVOICES]->(c)
return path
limit 1
//5 hop ring
match path = (c:Company)-[:INVOICES]->(c1)-[:INVOICES*4..4]->(c)
return path
limit 1
//using iteration for finding ring queries - up to 8 hops
CALL apoc.periodic.iterate("MATCH (c:Company) return c",
"match path = (c)-[:INVOICES]->(c2:Company)-[:INVOICES*..7]->(c) set c.ring = id(c) set c.ringsize = length(path)",
{batchSize:10, parallel:true});
//using the apoc.path.expander to find rings up to 20 hops
match (c:Company)-[:INVOICES]->(nb:Company)
WITH c,nb
call apoc.path.expandConfig(nb, {
relationshipFilter: "INVOICES>",
minLevel: 2,
maxLevel:20,
terminatorNodes: [c],
bfs: false,
uniqueness: "NODE_GLOBAL",
limit: 1
})
yield path
set c.ring = id(c)
set c.ringsize = length(path)
return count(*)
//visualise the longest fraud ring -- with cypher only
match (c:Company)
where c.ringsize is not null
with c, c.ringsize as ringsize
order by ringsize desc
limit 1
match path = (c)-[:INVOICES]->(c2:Company)-[:INVOICES*..4]->(c)
return path
limit 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment