Skip to content

Instantly share code, notes, and snippets.

@akheron
Created May 18, 2012 07:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akheron/2723809 to your computer and use it in GitHub Desktop.
Save akheron/2723809 to your computer and use it in GitHub Desktop.
Python tracker statistic plotter
# Search for statistic messages from the Python trakcer robot in
# python-dev archives and plot the result.
#
# $ python-traker-stats.py collect
#
# Collects statistics from the mailing list and saves to
# python-tracker-stats.json
#
# $ python-tracker-stats.py plot
#
# Plots the statistics and saves to python-tracker-stats.png
#
# Requires matplotlib.
#
import argparse
import json
import datetime
import time
import email.utils
import mailbox
import gzip
import re
import errno
import os
import urllib
import urllib2
import shutil
MONTH_NAMES = [datetime.date(2012, n, 1).strftime('%B') for n in xrange(1, 13)]
ARCHIVE_URL = 'http://mail.python.org/pipermail/python-dev/%s'
STARTYEAR = 2008
STARTMONTH = 0 # January
NOW = datetime.date.today()
ENDYEAR = NOW.year
ENDMONTH = NOW.month - 1
STATISTICS_FILENAME = 'python-tracker-stats.json'
PLOT_FILENAME = 'python-tracker-stats.png'
def download_archive(source, target):
with open(target, 'w') as fout:
fdata = urllib2.urlopen(ARCHIVE_URL % urllib.quote(source))
shutil.copyfileobj(fdata, fout)
def convert_archive(source, target):
with gzip.open(source) as fin:
with open(target, 'w') as fout:
# Convert From: addresses
for line in fin:
line = re.sub(r'^(From:? .*) (at|en) ', r'\1@', line)
fout.write(line)
def make_statistics(timestamp, body):
m = re.search(r'^ (?P<open>\d+) open \( *[+-]\d+\) / +(?P<closed>\d+) closed \( *[+-]\d+\) / +(?P<total>\d+) total \( *[+-]\d+\)', body, re.MULTILINE)
if m:
return {
'timestamp': timestamp,
'open': int(m.group('open')),
'closed': int(m.group('closed')),
'total': int(m.group('total')),
}
needles = ['Issues stats:', 'Issues counts and deltas:']
for needle in needles:
if '\n%s\n' % needle in body:
return {
'timestamp': timestamp,
'open': int(re.search('^ open +(\d+) ', body, re.MULTILINE).group(1)),
'closed': int(re.search('^ closed +(\d+) ', body, re.MULTILINE).group(1)),
'total': int(re.search('^ total +(\d+) ', body, re.MULTILINE).group(1)),
}
print body
raise ValueError('Could not parse!')
def find_statistics(mbox_path):
mbox = mailbox.mbox(mbox_path)
mbox.lock()
try:
for message in mbox.values():
if 'From' not in message:
continue
realname, addr = email.utils.parseaddr(message['From'])
if addr != 'status@bugs.python.org':
continue
date = int(time.mktime(email.utils.parsedate(message['Date'])))
body = message.get_payload()
yield make_statistics(date, body)
finally:
mbox.unlock()
def collect_data():
try:
os.mkdir('cache')
except OSError, exc:
if exc.errno != errno.EEXIST:
raise
statistics = []
for year in xrange(STARTYEAR, ENDYEAR + 1):
# Assume STARTYEAR != ENDYEAR
if year == STARTYEAR:
month_range = xrange(STARTMONTH, 12)
elif year == ENDYEAR:
month_range = xrange(0, ENDMONTH)
else:
month_range = xrange(12)
for month in month_range:
prefix = '%04d-%s' % (year, MONTH_NAMES[month])
archive = prefix + '.txt.gz'
archive_path = os.path.join('cache', archive)
if not os.path.exists(archive_path):
print 'Downloading %s' % archive
download_archive(archive, archive_path)
mbox = prefix + '.mbox'
mbox_path = os.path.join('cache', mbox)
if not os.path.exists(mbox_path):
print 'Converting %s to mbox format' % archive
convert_archive(archive_path, mbox_path)
print 'Processing %s' % prefix
prev_timestamp = 0
for statistic in find_statistics(mbox_path):
timestamp = statistic['timestamp']
if timestamp <= prev_timestamp:
print 'Ignoring invalid timestamp (in the past)'
continue
statistics.append(statistic)
with open(STATISTICS_FILENAME, 'w') as fobj:
json.dump(statistics, fobj)
def plot_statistics():
from matplotlib.pyplot import figure, savefig
from matplotlib.dates import MonthLocator, DateFormatter
with open(STATISTICS_FILENAME) as fobj:
statistics = json.load(fobj)
dates = []
counts = []
for statistic in statistics:
dates.append(datetime.date.fromtimestamp(statistic['timestamp']))
counts.append([statistic[f] for f in ('open', 'closed', 'total')])
months = MonthLocator(range(1, 13), bymonthday=1, interval=6)
monthsFmt = DateFormatter("%b %Y")
fig = figure()
ax = fig.add_subplot(111)
lines = ax.plot_date(dates, counts, '-')
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthsFmt)
ax.autoscale_view()
ax.grid(True)
fig.legend(lines, ['Open issues', 'Closed issues', 'Total'])
fig.autofmt_xdate()
savefig(PLOT_FILENAME)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('command', choices=['collect', 'plot'])
args = parser.parse_args()
if args.command == 'collect':
collect_data()
elif args.command == 'plot':
plot_statistics()
else:
# Not reached
assert 0
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment