Skip to content

Instantly share code, notes, and snippets.

@harlowja
Created November 17, 2015 21:28
Show Gist options
  • Save harlowja/5da0ae3d95f544a88891 to your computer and use it in GitHub Desktop.
Save harlowja/5da0ae3d95f544a88891 to your computer and use it in GitHub Desktop.
import collections
import contextlib
import datetime
import os
import sys
import tabulate
from gitinspector.changes import Changes
from gitinspector.metrics import MetricsLogic
Repository = collections.namedtuple('Repository', 'name,location')
CORE_SKIPS = frozenset([
u'Julien Danjou',
u'Davanum Srinivas',
u'Ben Nemec',
u'Joshua Harlow',
u'Brant Knudson',
u'Doug Hellmann',
u'Victor Stinner',
u'Michael Still',
u'Flavio Percoco',
u'Mehdi Abaakouk',
u'Robert Collins',
])
EMAIL_SKIPS = frozenset([
'openstack-infra@lists.openstack.org',
'flaper87@gmail.com',
'fpercoco@redhat.com',
])
OLDEST_COMMIT_YEAR = 2014
@contextlib.contextmanager
def auto_cwd(target_dir):
old_dir = os.getcwd()
if old_dir == target_dir:
yield
else:
os.chdir(target_dir)
try:
yield
finally:
os.chdir(old_dir)
def new_core_compare(c1, c2):
# Sort by insertions, deletions...
c1_info = (c1[3], c1[4])
c2_info = (c2[3], c2[4])
if c1_info == c2_info:
return 0
if c1_info < c2_info:
return -1
else:
return 1
def should_discard(change_date, author_name, author_email, author_info):
if author_name in CORE_SKIPS:
return True
if author_email in EMAIL_SKIPS:
return True
if change_date.year < OLDEST_COMMIT_YEAR:
return True
return False
def main(repos):
raw_repos = [os.path.abspath(p) for p in repos]
parsed_repos = []
for repo in raw_repos:
parsed_repos.append(Repository(os.path.basename(repo), repo))
for repo in parsed_repos:
with auto_cwd(repo.location):
print("Analyzing repo %s (%s):" % (repo.name, repo.location))
print("Please wait...")
changes = Changes(repo)
# This is needed to flush out changes progress message...
sys.stdout.write("\n")
# Force population of this info...
changes_per_author = changes.get_authordateinfo_list()
just_authors = changes.get_authorinfo_list()
better_changes_per_author = {}
maybe_new_cores = {}
for c in changes.get_commits():
change_date = c.timestamp
author_name = c.author
author_email = c.email
change_date = datetime.datetime.fromtimestamp(int(change_date))
try:
author_info = changes.authors[author_name]
better_changes_per_author[(change_date, author_name)] = author_info
except KeyError:
pass
for (change_date, author_name) in better_changes_per_author.keys():
author_email = changes.get_latest_email_by_author(author_name)
author_info = better_changes_per_author[(change_date, author_name)]
if not should_discard(change_date, author_name, author_email, author_info):
if author_name in maybe_new_cores:
existing_info = maybe_new_cores[author_name]
if existing_info[2] < change_date:
existing_info[2] = change_date
else:
maybe_core = [
author_name.encode("latin1", errors='replace'),
author_email,
change_date,
author_info.insertions,
author_info.deletions,
]
maybe_new_cores[author_name] = maybe_core
if maybe_new_cores:
print("%s potential new cores found!!" % len(maybe_new_cores))
maybe_new_cores = sorted(list(maybe_new_cores.values()),
cmp=new_core_compare, reverse=True)
headers = ['Name', 'Email', 'Last change made', 'Insertions', 'Deletions']
print(tabulate.tabulate(maybe_new_cores, headers=headers,
tablefmt="grid"))
else:
print("No new cores found!!")
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment