-
-
Save tomasonjo/52d231a7e18c1a24aaa18e81764bda44 to your computer and use it in GitHub Desktop.
CALL apoc.schema.assert( | |
{Category:['name']}, | |
{Business:['id'],User:['id'],Review:['id']}); | |
CALL apoc.periodic.iterate(" | |
CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/business.json') YIELD value RETURN value | |
"," | |
MERGE (b:Business{id:value.business_id}) | |
SET b += apoc.map.clean(value, ['attributes','hours','business_id','categories','address','postal_code'],[]) | |
WITH b,value.categories as categories | |
UNWIND categories as category | |
MERGE (c:Category{id:category}) | |
MERGE (b)-[:IN_CATEGORY]->(c) | |
",{batchSize: 10000, iterateList: true}); | |
CALL apoc.periodic.iterate(" | |
CALL apoc.load.json('file:///ssd/yelp/dataset/tip.json') YIELD value RETURN value | |
"," | |
MATCH (b:Business{id:value.business_id}) | |
MERGE (u:User{id:value.user_id}) | |
MERGE (u)-[:TIP{date:value.date,likes:value.likes}]->(b) | |
",{batchSize: 20000, iterateList: true}); | |
CALL apoc.periodic.iterate(" | |
CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/review.json') YIELD value RETURN value | |
"," | |
MATCH (b:Business{id:value.business_id}) | |
MERGE (u:User{id:value.user_id}) | |
MERGE (r:Review{id:value.review_id}) | |
MERGE (u)-[:WROTE]->(r) | |
MERGE (r)-[:REVIEWS]->(b) | |
SET r += apoc.map.clean(value, ['business_id','user_id','review_id','text'],["0"]) | |
",{batchSize: 10000, iterateList: true}); | |
CALL apoc.periodic.iterate(" | |
CALL apoc.load.json('file:///ssd/yelp/dataset/user.json') YIELD value RETURN value | |
"," | |
MERGE (u:User{id:value.user_id}) | |
SET u += apoc.map.clean(value, ['friends','user_id'],[]) | |
WITH u,value.friends as friends | |
UNWIND friends as friend | |
MERGE (u1:User{id:friend}) | |
MERGE (u)-[:FRIEND]-(u1) | |
",{batchSize: 100, iterateList: true}); | |
CALL apoc.periodic.iterate( | |
"MATCH (p1:User)-->(r1:Review)-->(:Business)<--(r2:Review)<--(p2:User) | |
where id(p1) < id(p2) | |
RETURN p1,p2,collect(r1.stars) as s1,collect(r2.stars) as s2", | |
"MERGE (p1)-[s:SIMILAR]-(p2) SET s.weight = apoc.algo.euclideanSimilarity(s1,s2)" | |
, {batchSize:10000, parallel:false,iterateList:true}); | |
MATCH (b:User) | |
RETURN avg(apoc.node.degree(b,'FRIEND')) as average_friends, | |
stdev(apoc.node.degree(b,'FRIEND')) as stdev_friends, | |
max(apoc.node.degree(b,'FRIEND')) as max_friends, | |
min(apoc.node.degree(b,'FRIEND')) as min_friends | |
MATCH (b:Business) | |
RETURN avg(apoc.node.degree(b,'REVIEWS')) as average_reviews, | |
stdev(apoc.node.degree(b,'REVIEWS')) as stdev_reviews, | |
max(apoc.node.degree(b,'REVIEWS')) as max_reviews, | |
min(apoc.node.degree(b,'REVIEWS')) as min_reviews | |
CALL apoc.periodic.iterate(
"MATCH (p1:User) WHERE exists (p1.total) RETURN p1",
"
MATCH (p2:User) WHERE exists (p2.total)
AND id(p1) < id(p2) AND NOT (p1)-[:SIMILAR]-(p2)
WITH p1,p2, apoc.algo.cosineSimilarity([p1.compliment_cool_normalized,
p1.compliment_cute_normalized,
p1.compliment_funny_normalized,
p1.compliment_hot_normalized,
p1.compliment_list_normalized,
p1.compliment_more_normalized,
p1.compliment_note_normalized,
p1.compliment_photos_normalized,
p1.compliment_plain_normalized,
p1.compliment_profile_normalized,
p1.compliment_writer_normalized]
,
[p2.compliment_cool_normalized,
p2.compliment_cute_normalized,
p2.compliment_funny_normalized,
p2.compliment_hot_normalized,
p2.compliment_list_normalized,
p2.compliment_more_normalized,
p2.compliment_note_normalized,
p2.compliment_photos_normalized,
p2.compliment_plain_normalized,
p2.compliment_profile_normalized,
p2.compliment_writer_normalized]
) as cosineSimilarity WHERE cosineSimilarity > 0.8
MERGE (p1)-[s:COMPLIMENT_SIM]-(p2) SET s.weight = cosineSimilarity"
, {batchSize:1, parallel:false,iterateList:true});
Just wanted to say thank you for this. This really clarified how to use apoc.periodic.iterate
and unwind
for me.
Hi,
Hello,
In my opinion there is a mistake in the original code.
On line 45, the address is missing in the MERGE (u)-[:FRIEND]-(u1) relationship.
This makes that in version 4.4.x the loading takes a very long time.
The same happens on line 53:
MERGE (p1)-[s:SIMILAR]-(p2) SET s.weight = apoc.algo.euclideanSimilarity(s1,s2).
CALL apoc.periodic.iterate('MATCH (u:User) RETURN u'
,'
UNWIND keys(u) as key
with u,sum(CASE WHEN key starts with "compliment" THEN u[key] ELSE 0 END) AS total
WHERE total > 50
SET u.total = total
WITH total,u
UNWIND keys(u) as key
with u,total,CASE WHEN key starts with "compliment" THEN [1] ELSE [] END as complimentKeys,key
with u,key + "_normalized" as new_key,complimentKeys,key,total
UNWIND complimentKeys as c
WITH u,new_key,1.0 * u[key] / total as ratio
CALL apoc.create.setProperty(u, new_key, ratio)
YIELD node
RETURN COUNT(*)
',{batchSize:10000,iterateList:true,parallel:true})