Skip to content

Instantly share code, notes, and snippets.

@alanmbarr
Last active May 24, 2020 11:00
Show Gist options
  • Save alanmbarr/586dfda80f80a3e7fc0420a7fb99d8ba to your computer and use it in GitHub Desktop.
Save alanmbarr/586dfda80f80a3e7fc0420a7fb99d8ba to your computer and use it in GitHub Desktop.
Determine weighted average for hotel ratings
# http://times.cs.uiuc.edu/~wang296/Data/ tripadvisor dataset
import json
import numpy as np
import os
path = '/place/i/have/lots/of/json'
np.seterr(divide='ignore', invalid='ignore')
np.set_printoptions(nanstr="0")
#loop over all files in dir
for filename in os.listdir(path):
with open(path+filename) as json_file:
# hold the review category names and array of scores
score = {}
json_data = json.load(json_file)
hoteldata = json_data["HotelInfo"]
hotelReviews = json_data["Reviews"]
for review in hotelReviews:
for key in review["Ratings"].keys():
if(key not in score):
score[key] = [int(float(review["Ratings"][key]))]
else:
score[key].append(int(float(review["Ratings"][key])))
name = hoteldata["Name"] if "Name" in hoteldata else ""
print("{}\t{}\t".format(hoteldata["HotelID"],name),end="")
for key in score.keys():
if( len(score[key]) > 1):
# Weighted avg, since its onlines reviews negative is not wanted
weightsForAvg = np.array(range(len(score[key]))) / sum(range(len(score[key])))
weightedAverage = np.absolute(np.average(score[key], weights=weightsForAvg))
print("{} | {:.2f} | ".format(key.strip(), weightedAverage), end="")
else:
print("{} | {} | ".format(key.strip(),str(score[key][0))),end="")
print("")
@alanmbarr
Copy link
Author

alanmbarr commented Jun 4, 2016

Refactored to preserve -1 when user did not provide a score

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment