Last active
March 20, 2020 01:50
-
-
Save maxdemarzi/355a9a0c30f748ce54180bcc30037d63 to your computer and use it in GitHub Desktop.
Cypher Queries for Finding Fraud Part 2 Revisited
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH ["Jennifer","Michelle","Tanya","Julie","Christie", | |
"Sophie","Amanda","Khloe","Sarah","Kaylee"] AS names | |
FOREACH (r IN range(1,1000000) | | |
CREATE (:User {username:names[r % size(names)] + "-" + r}) ); | |
FOREACH (r IN range(1,1000000) | CREATE (:Account {number: r, balance: round(rand() * 1000000) / 100.0, type:"Savings"})) | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (account) | |
WHERE id(user) = number | |
AND id(account) = number + 1000000 | |
CREATE (user)-[:HAS_ACCOUNT]->(account) | |
MATCH (user:User), (checking:Account) | |
WHERE ID(checking) = ID(user) + 1000000 | |
CREATE (user)-[:HAS_ACCOUNT]->(checking) | |
//3-3.5 | |
FOREACH (r IN range(1,500000) | CREATE (:Loan {number: r, balance: round(rand() * 1000000) / 100.0, type:"Unsecured Loan"})); | |
//3.5-4 | |
FOREACH (r IN range(1,500000) | CREATE (:Card {number: 4111111111111111 + r, balance: round(rand() * 1000000) / 100.0})); | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (account) | |
WHERE id(user) = number | |
AND id(account) = number + 3000000 | |
CREATE (user)-[:HAS_ACCOUNT]->(account); | |
FOREACH (r IN range(1,1000000) | CREATE (:Identification {number: r + 100000000, type:"SSN"})); | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (identification) | |
WHERE id(user) = number | |
AND id(identification) = number + 4000000 | |
CREATE (user)-[:HAS_ID]->(identification); | |
MATCH (user:User) | |
WITH user | |
ORDER BY rand() | |
LIMIT 1000 | |
MATCH (user)-[r:HAS_ID]->(identification) | |
DELETE r | |
WITH user | |
MATCH (identification) | |
WHERE ID(identification) = 4000000 + round(rand() * 1000000) | |
CREATE (user)-[:HAS_ID]->(identification); | |
FOREACH (r IN range(1,1000000) | CREATE (:Phone {number: r + 5550000000})); | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (phone) | |
WHERE id(user) = number | |
AND id(phone) = number + 5000000 | |
CREATE (user)-[:HAS_PHONE]->(phone); | |
MATCH (user:User) | |
WITH user | |
ORDER BY rand() | |
LIMIT 10000 | |
MATCH (user)-[r:HAS_PHONE]->(phone) | |
DELETE r | |
WITH user | |
MATCH (phone) | |
WHERE ID(phone) = 5000000 + round(rand() * 1000000) | |
CREATE (user)-[:HAS_PHONE]->(phone); | |
WITH ["Chicago", "Aurora","Rockford","Joliet", | |
"Naperville","Springfield", "Peoria", "Elgin", | |
"Waukegan", "Champaign", "Bloomington", "Decatur", | |
"Evanston", "Wheaton", "Belleville", "Urbana", | |
"Quincy", "Rock Island"] AS cities, | |
["North", "South", "East", "West", "SouthWest", | |
"SouthEast", "NorthWest", "NorthEast"] AS direction, | |
["Main", "Park", "Oak", "Pine", "Maple", "Cedar", "Elm", | |
"Washington", "Lake", "Hill", "First", "Second", | |
"Third", "Fourth" + "Fifth"] AS street1, | |
["Drive", "Lane","Avenue", "Way", "Circle", "Square", | |
"Court", "Road", "Alley", "Fork","Grove", "Heights", | |
"Landing", "Path", "Route", "Trail", "Cove", "Loop", | |
"Bend"] AS street2 | |
FOREACH (r IN range(1,1000000) | CREATE (ad:Address {line1: r + " " + direction[r % size(direction)] + " " + street1[r % size(street1)] + " " + street2[r % size(street2)], city: cities[r % size(cities)], state:"IL", zip: 60400 + (r % size(cities)) % 100 }) ) | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (address) | |
WHERE id(user) = number | |
AND id(address) = number + 6000000 | |
CREATE (user)-[:HAS_ADDRESS]->(address); | |
MATCH (user:User) | |
WITH user | |
ORDER BY rand() | |
LIMIT 100000 | |
MATCH (user)-[r:HAS_ADDRESS]->(address) | |
DELETE r | |
WITH user | |
MATCH (address) | |
WHERE ID(address) = 6000000 + round(rand() * 1000000) | |
CREATE (user)-[:HAS_ADDRESS]->(address); | |
CALL gds.graph.create.cypher( | |
'profiles', | |
'MATCH (n:User) RETURN id(n) AS id', | |
'MATCH (p1:User)-->()<--(p2:User) | |
RETURN id(p1) as source, id(p2) as target' | |
); | |
CALL gds.wcc.write("profiles", { writeProperty: 'profilesComponentId' }) | |
YIELD nodePropertiesWritten, componentCount; | |
MATCH (n:User) | |
WITH n.profilesComponentId AS component, COUNT(*) AS number | |
RETURN component, number | |
ORDER BY number DESC | |
LIMIT 25; | |
CREATE INDEX ON :User(profilesComponentId); | |
MATCH (n:User)-[r]-(n2) | |
WHERE n.profilesComponentId = 125816 | |
RETURN n, r, n2; | |
MATCH (phone:Phone) | |
WITH phone | |
ORDER BY rand() | |
LIMIT 100000 | |
MATCH (phone)-[:HAS_PHONE]-(user) | |
CREATE (phone)-[:ACCESSED]->(user); | |
MATCH (phone:Phone) | |
WITH phone | |
ORDER BY rand() | |
LIMIT 10000 | |
MATCH (user) | |
WHERE id(user) = ROUND(rand() * 1000000) | |
CREATE (phone)-[:ACCESSED]->(user); | |
FOREACH (r IN range(1,1000000) | | |
CREATE (fg:Browser {fingerprint: randomUUID()}) ); | |
UNWIND range(1,1000000) AS number | |
MATCH (user), (fg) | |
WHERE id(user) = number | |
AND id(fg) = number + 7000000 | |
CREATE (user)<-[:ACCESSED]-(fg); | |
// Run it 3 times: | |
MATCH (fg:Browser) | |
WITH fg | |
ORDER BY rand() | |
LIMIT 10000 | |
MATCH (user) | |
WHERE id(user) = ROUND(rand() * 1000000) | |
CREATE (fg)-[:ACCESSED]->(user); | |
CALL gds.graph.drop('profiles') YIELD graphName; | |
CALL gds.graph.create.cypher( | |
'access', | |
'MATCH (n:User) RETURN id(n) AS id', | |
'MATCH (p1:User)-[:ACCESSED]-()-[:ACCESSED]-(p2:User) | |
RETURN id(p1) as source, id(p2) as target' | |
); | |
CALL gds.wcc.write("access", { writeProperty: 'accessComponentId' }) | |
YIELD nodePropertiesWritten, componentCount; | |
MATCH (n:User) | |
WITH n.accessComponentId AS component, COUNT(*) AS number | |
RETURN component, number | |
ORDER BY number DESC | |
LIMIT 25; | |
CREATE INDEX ON :User(accessComponentId); | |
MATCH (n:User)-[r]-(n2) | |
WHERE n.accessComponentId = 778 | |
RETURN n, r, n2; | |
CALL gds.graph.drop('access') YIELD graphName; | |
CALL gds.graph.create.cypher( | |
'everything', | |
'MATCH (n:User) RETURN id(n) AS id', | |
'MATCH (p1:User)--()--(p2:User) | |
RETURN id(p1) as source, id(p2) as target' | |
); | |
CALL gds.wcc.write("everything", { writeProperty: 'everythingComponentId' }) | |
YIELD nodePropertiesWritten, componentCount; | |
MATCH (n:User) | |
WITH n.everythingComponentId AS component, COUNT(*) AS number | |
RETURN component, number | |
ORDER BY number DESC | |
LIMIT 25; | |
CREATE INDEX ON :User(everythingComponentId); | |
MATCH (n:User)-[r]-(n2) | |
WHERE n.everythingComponentId = 105538 | |
RETURN n, r, n2; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment