Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Graphs downloads per day for PyPI packages using gnuplot, because vanity. Usage: ./pypistats.py packagename

View pypistats.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
#!/usr/bin/env python
 
import bz2
import datetime
import os
import sys
import time
import urllib2
import warnings
 
badclients = (
'FDM 3.x',
'pep381client/1.5',
'warehouse/0.1dev1',
'z3c.pypimirror/1.0.15.1',
'z3c.pypimirror/1.0.16',
'z3c.pypimirror/1.0.16.1',
'z3c.pypimirror/1.0.16.2',
)
 
def filenames():
date = datetime.date.today()
while True:
date -= datetime.timedelta(1)
yield time.strftime('%Y-%m-%d', date.timetuple())
 
def statsfiles(cachedir='.'):
baseurl = 'http://pypi.python.org/stats/days/%s.bz2'
for basename in filenames():
fname = os.path.join(cachedir, basename)
try:
yield open(fname, 'r')
except IOError:
print >>sys.stderr, 'Fetching %s...' % (baseurl % basename),
sys.stderr.flush()
r = urllib2.urlopen(baseurl % basename)
print >>sys.stderr, 'done.'
content = bz2.decompress(r.read())
with open(fname, 'w') as f:
f.write(content)
yield open(fname, 'r')
 
def statistics(package, cachedir='.', ignore_clients=[], mindays=0):
stats = {}
for f in statsfiles(cachedir=cachedir):
n = 0
for line in f:
if line.startswith('%s,' % package):
p, dl, client, downloads = line.split(',')
if client not in ignore_clients:
n += int(downloads)
f.close()
mindays -= 1
if n > 0:
stats[os.path.basename(f.name)] = n
elif mindays < 1:
return stats
 
def gnuplot(package, stats=None, outfile=None, script="""\
set xdata time
set ydata
set timefmt "%%Y-%%m-%%d"
set format x "%%m/%%d"
set xlabel "date"
set ylabel "# of downloads"
set title "Popularity of PyPI package %(package)s"
set x2label "%(total)d downloads total"
set xrange ["%(xrange_min)s":"%(xrange_max)s"]
set yrange [0:*]
set terminal png
set output "%(outfile)s"
plot "%(datfile)s" using 1:2 with lines title ""
"""):
if outfile is None:
outfile = package + '.png'
 
if stats is None:
stats = statistics(package)
 
with warnings.catch_warnings():
warnings.simplefilter("ignore")
datfile = os.tmpnam()
scriptfile = os.tmpnam()
 
script %= {'package': package,
'xrange_min': min(stats), 'xrange_max': max(stats),
'outfile': outfile, 'datfile': datfile,
'total': sum(stats.values())}
 
try:
with open(datfile, 'w') as f:
for key in sorted(stats):
f.write('%s %d\n' % (key, stats[key]))
 
with open(scriptfile, 'w') as f:
f.write(script)
 
os.system('gnuplot %s' % scriptfile)
finally:
os.unlink(datfile)
os.unlink(scriptfile)
 
if __name__ == '__main__':
for package in sys.argv[1:]:
print "Package", package
 
stats = statistics(package)
gnuplot(package, stats)
print "Total downloads:", sum(stats.values())
 
stats = statistics(package, ignore_clients=badclients, mindays=len(stats))
gnuplot(package, stats, outfile='%s.filtered.png' % package)
print "Total downloads (actual):", sum(stats.values())
 
print

@Cairnarvon great idea! thanks a lot for sharing

Does this script works anymore? I was trying to get statistics for collective.oaiintercom but I got 404 with few different combinations.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.