Skip to content

Instantly share code, notes, and snippets.

@hyonschu
Created March 10, 2014 04:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hyonschu/9459517 to your computer and use it in GitHub Desktop.
Save hyonschu/9459517 to your computer and use it in GitHub Desktop.
import json
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors.kde import KernelDensity
file_name_business = 'yelp_academic_dataset_business.json'
file_name_checkin = 'yelp_academic_dataset_checkin.json'
file_name_review = 'yelp_academic_dataset_review.json'
file_name_user = 'yelp_academic_dataset_user.json'
def read_data(file_name):
content = open(file_name, 'rb').read()
list_of_strings = content.split('\n')[:-1]
list_of_objects = [json.loads(string) for string in list_of_strings]
return list_of_objects
business = read_data(file_name_business)
checkin = read_data(file_name_business)
review = read_data(file_name_review)
user = read_data(file_name_user)
review[1]
reviewpd = pd.DataFrame(review)
reviewpd.votes[1]
cool = [ i['cool'] for i in reviewpd.votes ]
funny = [ i['funny'] for i in reviewpd.votes ]
useful = [ i['useful'] for i in reviewpd.votes ]
reviewpd['cool'] = cool
reviewpd['funny'] = funny
reviewpd['useful'] = useful
del cool
del funny
del useful
review[:5]
userpd = pd.DataFrame(user)
userpd[:5]
ucool = [ i['cool'] for i in userpd.votes ]
ufunny = [ i['funny'] for i in userpd.votes ]
uuseful = [ i['useful'] for i in userpd.votes ]
userpd['useful'] = uuseful
userpd['funny'] = ufunny
userpd['cool'] = ucool
del ucool
del ufunny
del uuseful
del userpd['votes']
userpd[:5]
user10k = userpd[:20000]
asdf = [ len(user10k['friends'][i]) for i in user10k['friends'] ]
plt = figsize(18,10)
plt = ylim(-5, 100), xlim(0.9,5.1)
scatter(user10k.average_stars, asdf, s=20, alpha=0.05)
user4 = userpd[userpd.average_stars >= 4.0]
len(user4)
asdf4 = [ len(user4['friends'][i]) for i in user4['friends'] ]
plt = figsize(18,10)
plt = ylim(-5, 500), xlim(3.9,5.1)
scatter(user4['average_stars'], asdf4, alpha=0.05)
mean(userpd.average_stars)
userpd['average_stars'].plot(kind="density", xlim=(-.1,5.1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment