Created
March 10, 2014 04:33
-
-
Save hyonschu/9459517 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.neighbors.kde import KernelDensity | |
file_name_business = 'yelp_academic_dataset_business.json' | |
file_name_checkin = 'yelp_academic_dataset_checkin.json' | |
file_name_review = 'yelp_academic_dataset_review.json' | |
file_name_user = 'yelp_academic_dataset_user.json' | |
def read_data(file_name): | |
content = open(file_name, 'rb').read() | |
list_of_strings = content.split('\n')[:-1] | |
list_of_objects = [json.loads(string) for string in list_of_strings] | |
return list_of_objects | |
business = read_data(file_name_business) | |
checkin = read_data(file_name_business) | |
review = read_data(file_name_review) | |
user = read_data(file_name_user) | |
review[1] | |
reviewpd = pd.DataFrame(review) | |
reviewpd.votes[1] | |
cool = [ i['cool'] for i in reviewpd.votes ] | |
funny = [ i['funny'] for i in reviewpd.votes ] | |
useful = [ i['useful'] for i in reviewpd.votes ] | |
reviewpd['cool'] = cool | |
reviewpd['funny'] = funny | |
reviewpd['useful'] = useful | |
del cool | |
del funny | |
del useful | |
review[:5] | |
userpd = pd.DataFrame(user) | |
userpd[:5] | |
ucool = [ i['cool'] for i in userpd.votes ] | |
ufunny = [ i['funny'] for i in userpd.votes ] | |
uuseful = [ i['useful'] for i in userpd.votes ] | |
userpd['useful'] = uuseful | |
userpd['funny'] = ufunny | |
userpd['cool'] = ucool | |
del ucool | |
del ufunny | |
del uuseful | |
del userpd['votes'] | |
userpd[:5] | |
user10k = userpd[:20000] | |
asdf = [ len(user10k['friends'][i]) for i in user10k['friends'] ] | |
plt = figsize(18,10) | |
plt = ylim(-5, 100), xlim(0.9,5.1) | |
scatter(user10k.average_stars, asdf, s=20, alpha=0.05) | |
user4 = userpd[userpd.average_stars >= 4.0] | |
len(user4) | |
asdf4 = [ len(user4['friends'][i]) for i in user4['friends'] ] | |
plt = figsize(18,10) | |
plt = ylim(-5, 500), xlim(3.9,5.1) | |
scatter(user4['average_stars'], asdf4, alpha=0.05) | |
mean(userpd.average_stars) | |
userpd['average_stars'].plot(kind="density", xlim=(-.1,5.1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment