Skip to content

Instantly share code, notes, and snippets.

@thekindlyone
Last active November 27, 2015 07:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save thekindlyone/d30f8634160c22434d4d to your computer and use it in GitHub Desktop.
Save thekindlyone/d30f8634160c22434d4d to your computer and use it in GitHub Desktop.
from __future__ import division
from bs4 import BeautifulSoup as bs
import requests
import re
import time
from pymongo import MongoClient
from time import mktime
from datetime import datetime
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from itertools import product
def get_time(struct):
return datetime.fromtimestamp(mktime(struct))
def get_reviews():
for pno in range(1,200):
data={'reviewType':'0',
'pageNum':str(pno),
'id':'com.snapdeal.main',
'reviewSortOrder':'4',
'xhr':'1',
'token':'YEB6Kq1c9CQVYEF3hs_yi9_r9Bc:1448513248663',}
url='https://play.google.com/store/getreviews?authuser=0'
r=requests.post(url,data=data,verify=False)
s=r.content
html=s[s.find(',"')+2:s.rfind('"')]
html=html.replace('\\','')
table={'u0026': '&', 'u003c': '<', 'u003d': '=', 'u003e': '>'}
pattern = re.compile(r'|'.join(table.keys()))
html = pattern.sub(lambda x: table[x.group()], html)
soup=bs(html,'lxml')
for div in soup.findAll('div','single-review'):
data=dict(
date = get_time(time.strptime(div.find('span','review-date').text, "%d %B %Y")),
rating = int(re.search('\d',div.find('div','tiny-star').get('aria-label')).group()),
title=div.find('span',"review-title").text,
review=div.find('span',"review-title").next_sibling)
yield data
client = MongoClient()
db = client.snapdeal
collection=db.reviews
for review in get_reviews():
print review
collection.insert_one(review)
d={}
for date in collection.find().distinct('date'):
cursor = collection.find({'date':date})
total=cursor.count()
high=collection.find({'date':date,'rating':{'$gt':3}}).count()
low= collection.find({'date':date,'rating':{'$lt':2}}).count()
d[date]=dict(avg=sum([int(item['rating']) for item in cursor])/total,total=total,high=high,low=low)
dates=sorted(d.keys())
legend=dict(
avg = 'Average Ratings Received',
total = 'Total Ratings Received',
high = 'Ratings >= 4',
low = 'Ratings <= 1'
)
axes=dict(x=['Date']*4,
y=[legend[key] for key in 'avg total high low'.split()])
traces=[go.Scatter(
x=dates,
y=[d[date][key] for date in dates],
name=legend[key]
) for key in 'avg total high low'.split()]
layout = dict(title = 'Award Wapasi',
xaxis = dict(title = 'Date'),
yaxis = dict(title = 'Month'),
)
fig = tools.make_subplots(rows=2, cols=2,subplot_titles=axes.get('y'))
for (x,y),trace in zip(product(range(1,3),range(1,3)),traces):
fig.append_trace(trace,x,y)
template='{}axis{}'.format
for axis,num in product(['x','y'],range(1,5)):
fig['layout'][template(axis,num)].update(title=axes.get(axis)[num-1])
plot_url = py.plot(fig, filename='award wapasi')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment