Skip to content

Instantly share code, notes, and snippets.

@vajrasar
Forked from anilshanbhag/kolaveri.py
Created September 20, 2013 08:05
Show Gist options
  • Save vajrasar/6634576 to your computer and use it in GitHub Desktop.
Save vajrasar/6634576 to your computer and use it in GitHub Desktop.
######################
#Script1 : scraper.py
######################
import urllib2
import datetime
def main():
ph = urllib2.ProxyHandler({'http': 'http://xxx:xxxx@netmon.iitb.ac.in:80'})
opener = urllib2.build_opener(ph)
data = opener.open("http://gdata.youtube.com/feeds/api/videos/YR12Z8f1Dh8?alt=json").read()
open("dump/" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".yt","w").write(data)
if __name__=="__main__":
main()
######################
#Script2 : kolaveri.py
######################
import json
import os
import datetime
import matplotlib.pyplot as plt
# Get all the entries in dump
store = []
date_format = "%Y-%m-%d-%H-%M-%S"
for a,b,files in os.walk('dump/'):
for file in files:
d = datetime.datetime.strptime(file.split('.')[0],date_format)
store.append(d)
store.sort()
print store
# Get the view count returned by api
views_count = [0] * len(store)
for i in xrange(0,len(store)):
js = json.loads(open("dump/" + store[i].strftime(date_format) + ".yt").read())
views_count[i] = int(js['entry']['yt$statistics']['viewCount'])
# Hour count is difference between count of adjacent hours
hour_count = [views_count[i] - views_count[i-1] for i in xrange(1,len(store))]
# Youtube updates less frequently -- so average it out
i = 0
j = 0
average_count = [0] * len(hour_count)
while i != len(hour_count):
if hour_count[i] == 0:
i+=1
else:
for x in range(j,i+1):
average_count[x] = hour_count[i] / (i - j + 1)
j=i=i+1
print average_count
# Plot it
for i in xrange(0,len(average_count)):
plt.bar(i,average_count[i],0.9)
plt.annotate('Start Time\n01:00 Dec 7', xy=(0, 31000), xycoords='data',
xytext=(40, 30), textcoords='offset points',
bbox=dict(boxstyle="round4,pad=.5", fc="0.8"),
arrowprops=dict(arrowstyle="->",
connectionstyle="arc,angleA=0,armA=30,rad=-10"))
plt.annotate('Time\n00:00 Dec 8', xy=(23, 39000), xycoords='data',
xytext=(40, 30), textcoords='offset points',
bbox=dict(boxstyle="round4,pad=.5", fc="0.8"),
arrowprops=dict(arrowstyle="->",
connectionstyle="arc,angleA=0,armA=30,rad=-10"))
plt.annotate('End Time\n20:00 Dec 8', xy=(41.8, 28500), xycoords='data',
xytext=(10, 40), textcoords='offset points',
bbox=dict(boxstyle="round4,pad=.5", fc="0.8"),
arrowprops=dict(arrowstyle="->",
connectionstyle="arc,angleA=0,armA=30,rad=-10"))
plt.xlabel("Time")
plt.ylabel("Hourly Views")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment