Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from mercurial import ui, hg, cmdutil, match
from collections import defaultdict
import json
import sys
repo = hg.repository(ui.ui(), sys.argv[1])
from_rev = sys.argv[2]
to_rev = sys.argv[3]
employees = {}
def sanitize(s):
return s.replace(u"\u201c", '"').replace(u"\u201d", '"').replace(u"\u2018", "'").replace(u"\u2019", "'")
with open(sys.argv[4]) as f:
employees = json.load(f)[u'Report_Entry']
employees = filter(lambda x: u'primaryWorkEmail' in x, employees)
emails = map(lambda x: x[u'primaryWorkEmail'], employees)
names = map(lambda x: (sanitize(x[u'Preferred_Name_-_First_Name']),
sanitize(x[u'Preferred_Name_-_Last_Name'])),
employees)
with open(sys.argv[5]) as f:
lines = f.readlines()
if not lines[-1].split():
lines.pop()
emails += map(lambda x: x.split()[0], lines)
authors = defaultdict(int)
pats = ()
opts = {'rev': [to_rev + ':' + from_rev]}
matchfn = match.match(repo.root, repo.getcwd(), pats)
def prep(ctx, fns):
rev = ctx.rev()
if len(repo.changelog.parentrevs(rev)) == 2:
return
for rev in cmdutil.walkchangerevs(repo, matchfn, opts, prep):
author = str(rev.user()).decode('utf-8')
authors[author] += 1
employee_authors = set()
volunteer_authors = set()
partials = set()
for author in authors.keys():
for (first, last) in names:
if last in author:
# Really dumb stemming - if the provided first name matches part of
# a "word" in the full author's line, claim it's a match (eg. Josh in Joshua)
if first in author or filter(lambda x: x in first, author.split()):
employee_authors.add(author)
try:
partials.remove(author)
except:
pass
break
else:
#print 'partial: %s vs %s' % (author, first + " " + last)
partials.add(author)
else:
for email in emails:
if email in author:
employee_authors.add(author)
try:
partials.remove(author)
except:
pass
break
else:
# Last ditch. I feel bad.
if '@mozilla.org' in author or '@mozilla.com' in author:
try:
partials.remove(author)
except:
pass
employee_authors.add(author)
else:
if author in partials:
#print 'partial: %s' % author
pass
volunteer_authors.add(author)
print 'Employees: %d' % len(employee_authors)
print 'Volunteers: %d' % len(volunteer_authors)
#print 'Partial matches: %d' % len(partials)
#print partials
emp_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in employee_authors, authors)))
vol_contributions = sum(map(lambda x: authors[x], filter(lambda x: x in volunteer_authors, authors)))
print 'Employee contributions: %d' % emp_contributions
print 'Volunteer contributions: %d' % vol_contributions
sorted_volunteers = sorted(volunteer_authors, key=lambda x: authors[x], reverse=True)
sorted_employees = sorted(employee_authors, key=lambda x: authors[x], reverse=True)
N = 10
top_n_vol = map(lambda x: float(authors[x]), sorted_volunteers[:N])
top_n_emp = map(lambda x: float(authors[x]), sorted_employees[:N])
print 'Contributions from top %d employees: %d' % (N, sum(top_n_emp))
print 'Contributions from top %d volunteers: %d' % (N, sum(top_n_vol))
print 'Top %d volunteers responsible for %f%% of volunteer commits, %f%% overall' % (N, sum(top_n_vol) / vol_contributions * 100, sum(top_n_vol) / (emp_contributions + vol_contributions) * 100)
print 'Top %d employees responsible for %f%% of employee commits, %f%% overall' % (N, sum(top_n_emp) / emp_contributions * 100, sum(top_n_emp) / (emp_contributions + vol_contributions) * 100)
print 'Volunteer commit distribution:'
volunteer_buckets = defaultdict(int)
for author in volunteer_authors:
volunteer_buckets[authors[author]] += 1
for key in sorted(volunteer_buckets.keys(), reverse=False):
print '%s: %d' % (key, volunteer_buckets[key])
assert sum(volunteer_buckets.values()) == len(volunteer_authors)
print 'Bucketed volunteer commit distribution:'
buckets = [(1, 2), (2, 3), (3, 4), (4, 5), (5, 10), (10, 20), (20, 2000)]
bucketed = []
for i, (lower, higher) in enumerate(buckets):
bucketed += [0]
for subbucket in filter(lambda x: x >= lower and x < higher, volunteer_buckets.keys()):
bucketed[i] += volunteer_buckets[subbucket]
print "[%d, %d) - %d" % (lower, higher, bucketed[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.