Skip to content

Instantly share code, notes, and snippets.

@mzaksana
Created November 25, 2019 08:52
Show Gist options
  • Save mzaksana/64c8ff54a144b46a77ebb45b42788fd0 to your computer and use it in GitHub Desktop.
Save mzaksana/64c8ff54a144b46a77ebb45b42788fd0 to your computer and use it in GitHub Desktop.
Virtualization
import os
import pandas
import json
import re
from collections import OrderedDict
import plotly.graph_objects as go
import chart_studio.plotly as py
from bs4 import BeautifulSoup
import yaml
PRODUCT_DATA={}
SHOP_DATA={}
def orderDict(data):
return OrderedDict(sorted(data.items(), key=lambda kv: kv[1]['avg_rating'], reverse=True))
def ascii_encode_dict(data):
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
return dict(map(ascii_encode, pair) for pair in data.items())
def readData(text):
return ascii_encode_dict(json.loads(text))
def makeData(jsonObj):
global PRODUCT_DATA
if jsonObj['asin'] not in PRODUCT_DATA:
PRODUCT_DATA[jsonObj['asin']]={
'data':[jsonObj],
'avg_rating':1,
'stars':{1.0:0,2.0:0,3.0:0,4.0:0,5.0:0}
}
else:
PRODUCT_DATA[jsonObj['asin']]['data']+=[jsonObj]
PRODUCT_DATA[jsonObj['asin']]['stars'][jsonObj['overall']]+=1
def makeDataShop(jsonObj):
global SHOP_DATA
if jsonObj['brand'] not in SHOP_DATA:
SHOP_DATA[jsonObj['brand']]=[jsonObj]
else:
SHOP_DATA[jsonObj['brand']]+=[jsonObj]
def makeView(top):
labels_data=["review"]
values_data=[]
parents_data=[""]
counter=1
total_data=0
for key in PRODUCT_DATA.keys():
total=0
counter+=1
top-=1
if top == 0:
break
labels_data+=[key,"1.0"+str(counter),"2.0"+str(counter),"3.0"+str(counter),"4.0"+str(counter),"5.0"+str(counter)]
parents_data+=["review",key,key,key,key,key]
total+=(PRODUCT_DATA[key]['stars'][1.0]+PRODUCT_DATA[key]['stars'][2.0]+PRODUCT_DATA[key]['stars'][3.0]+PRODUCT_DATA[key]['stars'][4.0]+PRODUCT_DATA[key]['stars'][5.0])
total_data+=total
values_data+=[
total,
PRODUCT_DATA[key]['stars'][1.0],
PRODUCT_DATA[key]['stars'][2.0],
PRODUCT_DATA[key]['stars'][3.0],
PRODUCT_DATA[key]['stars'][4.0],
PRODUCT_DATA[key]['stars'][5.0]]
values_data.insert(0,total_data)
fig =go.Figure(go.Sunburst(
labels=labels_data ,
parents=parents_data,
values=values_data,
branchvalues="total",
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
fig.show()
def sumGroupCount(dictData):
total=0
for key in dictData:
total+=dictData[key]
return total
def to_utf8(loader, node):
return loader.construct_scalar(node).encode('utf-8')
def calculateAvgRating():
for key in PRODUCT_DATA.keys():
sumData=0;
deminator={};
for data in PRODUCT_DATA[key]['data']:
sumData+=data['overall']
if data['overall'] not in deminator:
deminator[data['overall']]=1
else:
deminator[data['overall']]+=1
#print(PRODUCT_DATA[key]['data'])
PRODUCT_DATA[key]['avg_rating']=sumData / sumGroupCount(deminator)
filepath = '../data/500.json'
with open(filepath) as fp:
line = fp.readline()
cnt = 1
while line:
line = fp.readline()
#print(line)
if(len(line)<5):
continue
makeData(readData(line))
#print(readData(line))
cnt+=1
#print("read",cnt)
calculateAvgRating()
orderDict(PRODUCT_DATA)
makeView(10)
#print(PRODUCT_DATA)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment