Skip to content

Instantly share code, notes, and snippets.

@jdm
Created September 6, 2013 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jdm/6465718 to your computer and use it in GitHub Desktop.
Save jdm/6465718 to your computer and use it in GitHub Desktop.
from mercurial import ui, hg, cmdutil, match
from collections import defaultdict
import json
import sys
repo = hg.repository(ui.ui(), sys.argv[1])
from_rev = sys.argv[2]
to_rev = sys.argv[3]
employees = {}
def sanitize(s):
return s.replace(u"\u201c", '"').replace(u"\u201d", '"').replace(u"\u2018", "'").replace(u"\u2019", "'")
with open(sys.argv[4]) as f:
employees = json.load(f)[u'Report_Entry']
employees = filter(lambda x: u'primaryWorkEmail' in x, employees)
emails = map(lambda x: x[u'primaryWorkEmail'], employees)
names = map(lambda x: (sanitize(x[u'Preferred_Name_-_First_Name']),
sanitize(x[u'Preferred_Name_-_Last_Name'])),
employees)
with open(sys.argv[5]) as f:
lines = f.readlines()
if not lines[-1].split():
lines.pop()
emails += map(lambda x: x.split()[0], lines)
authors = defaultdict(int)
pats = ()
opts = {'rev': [to_rev + ':' + from_rev]}
matchfn = match.match(repo.root, repo.getcwd(), pats)
def prep(ctx, fns):
rev = ctx.rev()
if len(repo.changelog.parentrevs(rev)) == 2:
return
for rev in cmdutil.walkchangerevs(repo, matchfn, opts, prep):
author = str(rev.user()).decode('utf-8')
authors[author] += 1
employee_authors = set()
volunteer_authors = set()
partials = set()
for author in authors.keys():
for (first, last) in names:
if last in author:
# Really dumb stemming - if the provided first name matches part of
# a "word" in the full author's line, claim it's a match (eg. Josh in Joshua)
if first in author or filter(lambda x: x in first, author.split()):
employee_authors.add(author)
try:
partials.remove(author)
except:
pass
break
else:
#print 'partial: %s vs %s' % (author, first + " " + last)
partials.add(author)
else:
for email in emails:
if email in author:
employee_authors.add(author)
try:
partials.remove(author)
except:
pass
break
else:
# Last ditch. I feel bad.
if '@mozilla.org' in author or '@mozilla.com' in author:
try:
partials.remove(author)
except:
pass
employee_authors.add(author)
else:
if author in partials:
#print 'partial: %s' % author
pass
volunteer_authors.add(author)
print 'Employees: %d' % len(employee_authors)
print 'Volunteers: %d' % len(volunteer_authors)
#print 'Partial matches: %d' % len(partials)
#print partials
emp_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in employee_authors, authors)))
vol_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in volunteer_authors, authors)))
print 'Employee contributions: %d' % emp_contributions
print 'Volunteer contributions: %d' % vol_contributions
sorted_volunteers = sorted(volunteer_authors, key=lambda x: authors[x], reverse=True)
sorted_employees = sorted(employee_authors, key=lambda x: authors[x], reverse=True)
N = 10
top_n_vol = map(lambda x: float(authors[x]), sorted_volunteers[:N])
top_n_emp = map(lambda x: float(authors[x]), sorted_employees[:N])
print 'Contributions from top %d employees: %d' % (N, sum(top_n_emp))
print 'Contributions from top %d volunteers: %d' % (N, sum(top_n_vol))
print 'Top %d volunteers responsible for %f%% of volunteer commits, %f%% overall' % (N, sum(top_n_vol) / vol_contributions * 100, sum(top_n_vol) / (emp_contributions + vol_contributions) * 100)
print 'Top %d employees responsible for %f%% of employee commits, %f%% overall' % (N, sum(top_n_emp) / emp_contributions * 100, sum(top_n_emp) / (emp_contributions + vol_contributions) * 100)
print 'Volunteer commit distribution:'
volunteer_buckets = defaultdict(int)
for author in volunteer_authors:
volunteer_buckets[authors[author]] += 1
for key in sorted(volunteer_buckets.keys(), reverse=False):
print '%s: %d' % (key, volunteer_buckets[key])
assert sum(volunteer_buckets.values()) == len(volunteer_authors)
print 'Bucketed volunteer commit distribution:'
buckets = [(1, 2), (2, 3), (3, 4), (4, 5), (5, 10), (10, 20), (20, 2000)]
bucketed = []
for i, (lower, higher) in enumerate(buckets):
bucketed += [0]
for subbucket in filter(lambda x: x >= lower and x < higher, volunteer_buckets.keys()):
bucketed[i] += volunteer_buckets[subbucket]
print "[%d, %d) - %d" % (lower, higher, bucketed[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment