Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Graphs downloads per day for PyPI packages using gnuplot, because vanity. Usage: ./pypistats.py packagename

View pypistats.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
#!/usr/bin/env python
 
import bz2
import datetime
import os
import sys
import time
import urllib2
import warnings
 
badclients = (
'FDM 3.x',
'pep381client/1.5',
'warehouse/0.1dev1',
'z3c.pypimirror/1.0.15.1',
'z3c.pypimirror/1.0.16',
'z3c.pypimirror/1.0.16.1',
'z3c.pypimirror/1.0.16.2',
)
 
def filenames():
date = datetime.date.today()
while True:
date -= datetime.timedelta(1)
yield time.strftime('%Y-%m-%d', date.timetuple())
 
def statsfiles(cachedir='.'):
baseurl = 'http://pypi.python.org/stats/days/%s.bz2'
for basename in filenames():
fname = os.path.join(cachedir, basename)
try:
yield open(fname, 'r')
except IOError:
print >>sys.stderr, 'Fetching %s...' % (baseurl % basename),
sys.stderr.flush()
r = urllib2.urlopen(baseurl % basename)
print >>sys.stderr, 'done.'
content = bz2.decompress(r.read())
with open(fname, 'w') as f:
f.write(content)
yield open(fname, 'r')
 
def statistics(package, cachedir='.', ignore_clients=[], mindays=0):
stats = {}
for f in statsfiles(cachedir=cachedir):
n = 0
for line in f:
if line.startswith('%s,' % package):
p, dl, client, downloads = line.split(',')
if client not in ignore_clients:
n += int(downloads)
f.close()
mindays -= 1
if n > 0:
stats[os.path.basename(f.name)] = n
elif mindays < 1:
return stats
 
def gnuplot(package, stats=None, outfile=None, script="""\
set xdata time
set ydata
set timefmt "%%Y-%%m-%%d"
set format x "%%m/%%d"
set xlabel "date"
set ylabel "# of downloads"
set title "Popularity of PyPI package %(package)s"
set x2label "%(total)d downloads total"
set xrange ["%(xrange_min)s":"%(xrange_max)s"]
set yrange [0:*]
set terminal png
set output "%(outfile)s"
plot "%(datfile)s" using 1:2 with lines title ""
"""):
if outfile is None:
outfile = package + '.png'
 
if stats is None:
stats = statistics(package)
 
with warnings.catch_warnings():
warnings.simplefilter("ignore")
datfile = os.tmpnam()
scriptfile = os.tmpnam()
 
script %= {'package': package,
'xrange_min': min(stats), 'xrange_max': max(stats),
'outfile': outfile, 'datfile': datfile,
'total': sum(stats.values())}
 
try:
with open(datfile, 'w') as f:
for key in sorted(stats):
f.write('%s %d\n' % (key, stats[key]))
 
with open(scriptfile, 'w') as f:
f.write(script)
 
os.system('gnuplot %s' % scriptfile)
finally:
os.unlink(datfile)
os.unlink(scriptfile)
 
if __name__ == '__main__':
for package in sys.argv[1:]:
print "Package", package
 
stats = statistics(package)
gnuplot(package, stats)
print "Total downloads:", sum(stats.values())
 
stats = statistics(package, ignore_clients=badclients, mindays=len(stats))
gnuplot(package, stats, outfile='%s.filtered.png' % package)
print "Total downloads (actual):", sum(stats.values())
 
print

@Cairnarvon great idea! thanks a lot for sharing

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.