Created
May 18, 2012 07:42
-
-
Save akheron/2723809 to your computer and use it in GitHub Desktop.
Python tracker statistic plotter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Search for statistic messages from the Python trakcer robot in | |
# python-dev archives and plot the result. | |
# | |
# $ python-traker-stats.py collect | |
# | |
# Collects statistics from the mailing list and saves to | |
# python-tracker-stats.json | |
# | |
# $ python-tracker-stats.py plot | |
# | |
# Plots the statistics and saves to python-tracker-stats.png | |
# | |
# Requires matplotlib. | |
# | |
import argparse | |
import json | |
import datetime | |
import time | |
import email.utils | |
import mailbox | |
import gzip | |
import re | |
import errno | |
import os | |
import urllib | |
import urllib2 | |
import shutil | |
MONTH_NAMES = [datetime.date(2012, n, 1).strftime('%B') for n in xrange(1, 13)] | |
ARCHIVE_URL = 'http://mail.python.org/pipermail/python-dev/%s' | |
STARTYEAR = 2008 | |
STARTMONTH = 0 # January | |
NOW = datetime.date.today() | |
ENDYEAR = NOW.year | |
ENDMONTH = NOW.month - 1 | |
STATISTICS_FILENAME = 'python-tracker-stats.json' | |
PLOT_FILENAME = 'python-tracker-stats.png' | |
def download_archive(source, target): | |
with open(target, 'w') as fout: | |
fdata = urllib2.urlopen(ARCHIVE_URL % urllib.quote(source)) | |
shutil.copyfileobj(fdata, fout) | |
def convert_archive(source, target): | |
with gzip.open(source) as fin: | |
with open(target, 'w') as fout: | |
# Convert From: addresses | |
for line in fin: | |
line = re.sub(r'^(From:? .*) (at|en) ', r'\1@', line) | |
fout.write(line) | |
def make_statistics(timestamp, body): | |
m = re.search(r'^ (?P<open>\d+) open \( *[+-]\d+\) / +(?P<closed>\d+) closed \( *[+-]\d+\) / +(?P<total>\d+) total \( *[+-]\d+\)', body, re.MULTILINE) | |
if m: | |
return { | |
'timestamp': timestamp, | |
'open': int(m.group('open')), | |
'closed': int(m.group('closed')), | |
'total': int(m.group('total')), | |
} | |
needles = ['Issues stats:', 'Issues counts and deltas:'] | |
for needle in needles: | |
if '\n%s\n' % needle in body: | |
return { | |
'timestamp': timestamp, | |
'open': int(re.search('^ open +(\d+) ', body, re.MULTILINE).group(1)), | |
'closed': int(re.search('^ closed +(\d+) ', body, re.MULTILINE).group(1)), | |
'total': int(re.search('^ total +(\d+) ', body, re.MULTILINE).group(1)), | |
} | |
print body | |
raise ValueError('Could not parse!') | |
def find_statistics(mbox_path): | |
mbox = mailbox.mbox(mbox_path) | |
mbox.lock() | |
try: | |
for message in mbox.values(): | |
if 'From' not in message: | |
continue | |
realname, addr = email.utils.parseaddr(message['From']) | |
if addr != 'status@bugs.python.org': | |
continue | |
date = int(time.mktime(email.utils.parsedate(message['Date']))) | |
body = message.get_payload() | |
yield make_statistics(date, body) | |
finally: | |
mbox.unlock() | |
def collect_data(): | |
try: | |
os.mkdir('cache') | |
except OSError, exc: | |
if exc.errno != errno.EEXIST: | |
raise | |
statistics = [] | |
for year in xrange(STARTYEAR, ENDYEAR + 1): | |
# Assume STARTYEAR != ENDYEAR | |
if year == STARTYEAR: | |
month_range = xrange(STARTMONTH, 12) | |
elif year == ENDYEAR: | |
month_range = xrange(0, ENDMONTH) | |
else: | |
month_range = xrange(12) | |
for month in month_range: | |
prefix = '%04d-%s' % (year, MONTH_NAMES[month]) | |
archive = prefix + '.txt.gz' | |
archive_path = os.path.join('cache', archive) | |
if not os.path.exists(archive_path): | |
print 'Downloading %s' % archive | |
download_archive(archive, archive_path) | |
mbox = prefix + '.mbox' | |
mbox_path = os.path.join('cache', mbox) | |
if not os.path.exists(mbox_path): | |
print 'Converting %s to mbox format' % archive | |
convert_archive(archive_path, mbox_path) | |
print 'Processing %s' % prefix | |
prev_timestamp = 0 | |
for statistic in find_statistics(mbox_path): | |
timestamp = statistic['timestamp'] | |
if timestamp <= prev_timestamp: | |
print 'Ignoring invalid timestamp (in the past)' | |
continue | |
statistics.append(statistic) | |
with open(STATISTICS_FILENAME, 'w') as fobj: | |
json.dump(statistics, fobj) | |
def plot_statistics(): | |
from matplotlib.pyplot import figure, savefig | |
from matplotlib.dates import MonthLocator, DateFormatter | |
with open(STATISTICS_FILENAME) as fobj: | |
statistics = json.load(fobj) | |
dates = [] | |
counts = [] | |
for statistic in statistics: | |
dates.append(datetime.date.fromtimestamp(statistic['timestamp'])) | |
counts.append([statistic[f] for f in ('open', 'closed', 'total')]) | |
months = MonthLocator(range(1, 13), bymonthday=1, interval=6) | |
monthsFmt = DateFormatter("%b %Y") | |
fig = figure() | |
ax = fig.add_subplot(111) | |
lines = ax.plot_date(dates, counts, '-') | |
ax.xaxis.set_major_locator(months) | |
ax.xaxis.set_major_formatter(monthsFmt) | |
ax.autoscale_view() | |
ax.grid(True) | |
fig.legend(lines, ['Open issues', 'Closed issues', 'Total']) | |
fig.autofmt_xdate() | |
savefig(PLOT_FILENAME) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('command', choices=['collect', 'plot']) | |
args = parser.parse_args() | |
if args.command == 'collect': | |
collect_data() | |
elif args.command == 'plot': | |
plot_statistics() | |
else: | |
# Not reached | |
assert 0 | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment