from mercurial import ui, hg, cmdutil, match | |
from collections import defaultdict | |
import json | |
import sys | |
repo = hg.repository(ui.ui(), sys.argv[1]) | |
from_rev = sys.argv[2] | |
to_rev = sys.argv[3] | |
employees = {} | |
def sanitize(s): | |
return s.replace(u"\u201c", '"').replace(u"\u201d", '"').replace(u"\u2018", "'").replace(u"\u2019", "'") | |
with open(sys.argv[4]) as f: | |
employees = json.load(f)[u'Report_Entry'] | |
employees = filter(lambda x: u'primaryWorkEmail' in x, employees) | |
emails = map(lambda x: x[u'primaryWorkEmail'], employees) | |
names = map(lambda x: (sanitize(x[u'Preferred_Name_-_First_Name']), | |
sanitize(x[u'Preferred_Name_-_Last_Name'])), | |
employees) | |
with open(sys.argv[5]) as f: | |
lines = f.readlines() | |
if not lines[-1].split(): | |
lines.pop() | |
emails += map(lambda x: x.split()[0], lines) | |
authors = defaultdict(int) | |
pats = () | |
opts = {'rev': [to_rev + ':' + from_rev]} | |
matchfn = match.match(repo.root, repo.getcwd(), pats) | |
def prep(ctx, fns): | |
rev = ctx.rev() | |
if len(repo.changelog.parentrevs(rev)) == 2: | |
return | |
for rev in cmdutil.walkchangerevs(repo, matchfn, opts, prep): | |
author = str(rev.user()).decode('utf-8') | |
authors[author] += 1 | |
employee_authors = set() | |
volunteer_authors = set() | |
partials = set() | |
for author in authors.keys(): | |
for (first, last) in names: | |
if last in author: | |
# Really dumb stemming - if the provided first name matches part of | |
# a "word" in the full author's line, claim it's a match (eg. Josh in Joshua) | |
if first in author or filter(lambda x: x in first, author.split()): | |
employee_authors.add(author) | |
try: | |
partials.remove(author) | |
except: | |
pass | |
break | |
else: | |
#print 'partial: %s vs %s' % (author, first + " " + last) | |
partials.add(author) | |
else: | |
for email in emails: | |
if email in author: | |
employee_authors.add(author) | |
try: | |
partials.remove(author) | |
except: | |
pass | |
break | |
else: | |
# Last ditch. I feel bad. | |
if '@mozilla.org' in author or '@mozilla.com' in author: | |
try: | |
partials.remove(author) | |
except: | |
pass | |
employee_authors.add(author) | |
else: | |
if author in partials: | |
#print 'partial: %s' % author | |
pass | |
volunteer_authors.add(author) | |
print 'Employees: %d' % len(employee_authors) | |
print 'Volunteers: %d' % len(volunteer_authors) | |
#print 'Partial matches: %d' % len(partials) | |
#print partials | |
emp_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in employee_authors, authors))) | |
vol_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in volunteer_authors, authors))) | |
print 'Employee contributions: %d' % emp_contributions | |
print 'Volunteer contributions: %d' % vol_contributions | |
sorted_volunteers = sorted(volunteer_authors, key=lambda x: authors[x], reverse=True) | |
sorted_employees = sorted(employee_authors, key=lambda x: authors[x], reverse=True) | |
N = 10 | |
top_n_vol = map(lambda x: float(authors[x]), sorted_volunteers[:N]) | |
top_n_emp = map(lambda x: float(authors[x]), sorted_employees[:N]) | |
print 'Contributions from top %d employees: %d' % (N, sum(top_n_emp)) | |
print 'Contributions from top %d volunteers: %d' % (N, sum(top_n_vol)) | |
print 'Top %d volunteers responsible for %f%% of volunteer commits, %f%% overall' % (N, sum(top_n_vol) / vol_contributions * 100, sum(top_n_vol) / (emp_contributions + vol_contributions) * 100) | |
print 'Top %d employees responsible for %f%% of employee commits, %f%% overall' % (N, sum(top_n_emp) / emp_contributions * 100, sum(top_n_emp) / (emp_contributions + vol_contributions) * 100) | |
print 'Volunteer commit distribution:' | |
volunteer_buckets = defaultdict(int) | |
for author in volunteer_authors: | |
volunteer_buckets[authors[author]] += 1 | |
for key in sorted(volunteer_buckets.keys(), reverse=False): | |
print '%s: %d' % (key, volunteer_buckets[key]) | |
assert sum(volunteer_buckets.values()) == len(volunteer_authors) | |
print 'Bucketed volunteer commit distribution:' | |
buckets = [(1, 2), (2, 3), (3, 4), (4, 5), (5, 10), (10, 20), (20, 2000)] | |
bucketed = [] | |
for i, (lower, higher) in enumerate(buckets): | |
bucketed += [0] | |
for subbucket in filter(lambda x: x >= lower and x < higher, volunteer_buckets.keys()): | |
bucketed[i] += volunteer_buckets[subbucket] | |
print "[%d, %d) - %d" % (lower, higher, bucketed[i]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment