Last active
May 24, 2020 11:00
-
-
Save alanmbarr/586dfda80f80a3e7fc0420a7fb99d8ba to your computer and use it in GitHub Desktop.
Determine weighted average for hotel ratings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://times.cs.uiuc.edu/~wang296/Data/ tripadvisor dataset | |
import json | |
import numpy as np | |
import os | |
path = '/place/i/have/lots/of/json' | |
np.seterr(divide='ignore', invalid='ignore') | |
np.set_printoptions(nanstr="0") | |
#loop over all files in dir | |
for filename in os.listdir(path): | |
with open(path+filename) as json_file: | |
# hold the review category names and array of scores | |
score = {} | |
json_data = json.load(json_file) | |
hoteldata = json_data["HotelInfo"] | |
hotelReviews = json_data["Reviews"] | |
for review in hotelReviews: | |
for key in review["Ratings"].keys(): | |
if(key not in score): | |
score[key] = [int(float(review["Ratings"][key]))] | |
else: | |
score[key].append(int(float(review["Ratings"][key]))) | |
name = hoteldata["Name"] if "Name" in hoteldata else "" | |
print("{}\t{}\t".format(hoteldata["HotelID"],name),end="") | |
for key in score.keys(): | |
if( len(score[key]) > 1): | |
# Weighted avg, since its onlines reviews negative is not wanted | |
weightsForAvg = np.array(range(len(score[key]))) / sum(range(len(score[key]))) | |
weightedAverage = np.absolute(np.average(score[key], weights=weightsForAvg)) | |
print("{} | {:.2f} | ".format(key.strip(), weightedAverage), end="") | |
else: | |
print("{} | {} | ".format(key.strip(),str(score[key][0))),end="") | |
print("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Refactored to preserve -1 when user did not provide a score