tomasonjo/Yelp

## Yelp
CALL apoc.schema.assert(
{Category:['name']},
{Business:['id'],User:['id'],Review:['id']});


CALL apoc.periodic.iterate("
CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/business.json') YIELD value RETURN value
","
MERGE (b:Business{id:value.business_id})
SET b += apoc.map.clean(value, ['attributes','hours','business_id','categories','address','postal_code'],[])
WITH b,value.categories as categories
UNWIND categories as category
MERGE (c:Category{id:category})
MERGE (b)-[:IN_CATEGORY]->(c)
",{batchSize: 10000, iterateList: true});

CALL apoc.periodic.iterate("
CALL apoc.load.json('file:///ssd/yelp/dataset/tip.json') YIELD value RETURN value
","
MATCH (b:Business{id:value.business_id})
MERGE (u:User{id:value.user_id})
MERGE (u)-[:TIP{date:value.date,likes:value.likes}]->(b)
",{batchSize: 20000, iterateList: true});


CALL apoc.periodic.iterate("
CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/review.json') YIELD value RETURN value
","
MATCH (b:Business{id:value.business_id})
MERGE (u:User{id:value.user_id})
MERGE (r:Review{id:value.review_id})
MERGE (u)-[:WROTE]->(r)
MERGE (r)-[:REVIEWS]->(b)
SET r += apoc.map.clean(value, ['business_id','user_id','review_id','text'],["0"])
",{batchSize: 10000, iterateList: true});

CALL apoc.periodic.iterate("
CALL apoc.load.json('file:///ssd/yelp/dataset/user.json') YIELD value RETURN value
","
MERGE (u:User{id:value.user_id})
SET u += apoc.map.clean(value, ['friends','user_id'],[])
WITH u,value.friends as friends
UNWIND friends as friend
MERGE (u1:User{id:friend})
MERGE (u)-[:FRIEND]-(u1)
",{batchSize: 100, iterateList: true});


CALL apoc.periodic.iterate(
"MATCH (p1:User)-->(r1:Review)-->(:Business)<--(r2:Review)<--(p2:User)
where id(p1) < id(p2)
RETURN p1,p2,collect(r1.stars) as s1,collect(r2.stars) as s2",
"MERGE (p1)-[s:SIMILAR]-(p2) SET s.weight = apoc.algo.euclideanSimilarity(s1,s2)"
, {batchSize:10000, parallel:false,iterateList:true});

MATCH (b:User)
RETURN avg(apoc.node.degree(b,'FRIEND')) as average_friends,
     stdev(apoc.node.degree(b,'FRIEND')) as stdev_friends,
     max(apoc.node.degree(b,'FRIEND')) as max_friends,
     min(apoc.node.degree(b,'FRIEND')) as min_friends


MATCH (b:Business)
RETURN avg(apoc.node.degree(b,'REVIEWS')) as average_reviews,
     stdev(apoc.node.degree(b,'REVIEWS')) as stdev_reviews,
     max(apoc.node.degree(b,'REVIEWS')) as max_reviews,
     min(apoc.node.degree(b,'REVIEWS')) as min_reviews
	CALL apoc.schema.assert(
	{Category:['name']},
	{Business:['id'],User:['id'],Review:['id']});


	CALL apoc.periodic.iterate("
	CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/business.json') YIELD value RETURN value
	","
	MERGE (b:Business{id:value.business_id})
	SET b += apoc.map.clean(value, ['attributes','hours','business_id','categories','address','postal_code'],[])
	WITH b,value.categories as categories
	UNWIND categories as category
	MERGE (c:Category{id:category})
	MERGE (b)-[:IN_CATEGORY]->(c)
	",{batchSize: 10000, iterateList: true});

	CALL apoc.periodic.iterate("
	CALL apoc.load.json('file:///ssd/yelp/dataset/tip.json') YIELD value RETURN value
	","
	MATCH (b:Business{id:value.business_id})
	MERGE (u:User{id:value.user_id})
	MERGE (u)-[:TIP{date:value.date,likes:value.likes}]->(b)
	",{batchSize: 20000, iterateList: true});


	CALL apoc.periodic.iterate("
	CALL apoc.load.json('file:///home/tomasi/Downloads/dataset/review.json') YIELD value RETURN value
	","
	MATCH (b:Business{id:value.business_id})
	MERGE (u:User{id:value.user_id})
	MERGE (r:Review{id:value.review_id})
	MERGE (u)-[:WROTE]->(r)
	MERGE (r)-[:REVIEWS]->(b)
	SET r += apoc.map.clean(value, ['business_id','user_id','review_id','text'],["0"])
	",{batchSize: 10000, iterateList: true});

	CALL apoc.periodic.iterate("
	CALL apoc.load.json('file:///ssd/yelp/dataset/user.json') YIELD value RETURN value
	","
	MERGE (u:User{id:value.user_id})
	SET u += apoc.map.clean(value, ['friends','user_id'],[])
	WITH u,value.friends as friends
	UNWIND friends as friend
	MERGE (u1:User{id:friend})
	MERGE (u)-[:FRIEND]-(u1)
	",{batchSize: 100, iterateList: true});


	CALL apoc.periodic.iterate(
	"MATCH (p1:User)-->(r1:Review)-->(:Business)<--(r2:Review)<--(p2:User)
	where id(p1) < id(p2)
	RETURN p1,p2,collect(r1.stars) as s1,collect(r2.stars) as s2",
	"MERGE (p1)-[s:SIMILAR]-(p2) SET s.weight = apoc.algo.euclideanSimilarity(s1,s2)"
	, {batchSize:10000, parallel:false,iterateList:true});

	MATCH (b:User)
	RETURN avg(apoc.node.degree(b,'FRIEND')) as average_friends,
	stdev(apoc.node.degree(b,'FRIEND')) as stdev_friends,
	max(apoc.node.degree(b,'FRIEND')) as max_friends,
	min(apoc.node.degree(b,'FRIEND')) as min_friends


	MATCH (b:Business)
	RETURN avg(apoc.node.degree(b,'REVIEWS')) as average_reviews,
	stdev(apoc.node.degree(b,'REVIEWS')) as stdev_reviews,
	max(apoc.node.degree(b,'REVIEWS')) as max_reviews,
	min(apoc.node.degree(b,'REVIEWS')) as min_reviews