Skip to content

Instantly share code, notes, and snippets.

@akiavara
Last active January 1, 2021 21:25
Show Gist options
  • Save akiavara/51d0d9d302013521bb10 to your computer and use it in GitHub Desktop.
Save akiavara/51d0d9d302013521bb10 to your computer and use it in GitHub Desktop.
A python script that generates a report about contributions to a Git repository.
#!/usr/bin/env python
"""Git contributions
Reference: https://gist.github.com/ajardin/b9e0d33ec8c3a3b2874d
The script below allows to have a quick overview of contributions on a Git repository.
Especially useful when we do not have access to graphs like those that we can find on Github.
This script handles three non-mandatory parameters:
- START is used to keep commits only since the given date (default = 01 Jan 2000).
- END is used to keep commits only until the given date (default = 01 Jan 2050).
- MINIMUM is used to define the minimum number of commits in order to appear in the report (default = 10).
"""
import optparse
import sys
import os
import re
import operator
import unicodedata
from subprocess import Popen, PIPE, check_output
__version__ = "0.1"
PATH_TO_GIT = "." # CHANGE IT FOR YOUR GIT PROJECT LOCATION
USAGE = "%prog [options]"
VERSION = "%prog v" + __version__
def strip_accents(text):
"""
Strip accents from input String.
:param text: The input string.
:type text: String.
:returns: The processed String.
:rtype: String.
"""
try:
text = unicode(text, 'utf-8')
except NameError: # unicode is a default on python 3
pass
text = unicodedata.normalize('NFD', text)
text = text.encode('ascii', 'ignore')
text = text.decode("utf-8")
return str(text)
def remove_accents(input_str):
nkfd_form = unicodedata.normalize('NFKD', input_str)
return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])
def extract_stats(stats, minimum):
p = re.compile(ur'^\s*(\d+)\s+(.+)\s+<(.+)>$', re.MULTILINE)
raw_stats = {}
result = []
for stat in stats:
res = re.search(p, stat)
if res:
author_commits = int(res.group(1))
author_name = strip_accents(res.group(2))
author_mail = res.group(3)
if not author_mail in raw_stats.keys():
raw_stats[author_mail] = {'name': author_name, 'commits': 0}
raw_stats[author_mail]['commits'] += author_commits
sorted_x = list(reversed(sorted(raw_stats.items(), key=operator.itemgetter(1))))
for val in sorted_x:
if val[1]['commits'] >= minimum:
result.append((val[0], val[1]['name'], val[1]['commits']))
return result
def format_report(stats, since, before):
output = "\n%-5s\t%-20s\t%-33s\t%-8s\t%-10s\t%-10s\t" % ("Rank", "Name", "Email", "Commits", "Files Changed", "Insertions (+)")
output += "%-10s\t%-18s\t%-24s\t%-24s" % ("Deletions (-)", "Diff/Commits (~)", "First Commit", "Last Commit")
print output
rank=0
for stat in stats:
rank += 1
author_mail = stat[0]
author_name = stat[1]
author_commits = stat[2]
files = 0
insertions = 0
deletions = 0
cmd_data = 'git log --shortstat --author="%s" --since="%s" --before="%s" --no-merges' % (author_mail, since, before)
cmd_data += ' | grep -E "fil(e|es) changed"'
cmd_data += ' | awk \'{ files+=$1; inserted+=$4; deleted+=$6 } END { print "FILES="files, "INSERTIONS="inserted, "DELETIONS="deleted }\''
tmp = check_output(cmd_data, shell=True).rstrip('\n').split(" ")
data = {
'files': int(tmp[0].split("=")[1]),
'insertions': int(tmp[1].split("=")[1]),
'deletions': int(tmp[2].split("=")[1])
}
data['avg_commits'] = (data['insertions'] + data['deletions']) / author_commits
cmd_first_commit_hash = 'git log --author="%s" --since="%s" --before="%s" --no-merges --reverse | head -1' % (author_mail, since, before)
first_commit_hash = check_output(cmd_first_commit_hash, shell=True).rstrip('\n').split(" ")[1]
cmd_first_commit_date = 'git show -s --format=%%ar "%s"' % (first_commit_hash)
first_commit_date = check_output(cmd_first_commit_date, shell=True).rstrip('\n')
cmd_last_commit_hash = 'git log --author="%s" --since="%s" --before="%s" --no-merges | head -1' % (author_mail, since, before)
last_commit_hash = check_output(cmd_last_commit_hash, shell=True).rstrip('\n').split(" ")[1]
cmd_last_commit_date = 'git show -s --format=%%ar "%s"' % (last_commit_hash)
last_commit_date = check_output(cmd_last_commit_date, shell=True).rstrip('\n')
output = "%-5s\t%-20s\t%-33s\t%-8s\t%-10s\t%-10s\t%-10s" % (rank, author_name, author_mail, author_commits, data['files'], data['insertions'], data['deletions'])
output += "\t%-18s\t%-24s\t%-24s" % (data['avg_commits'], first_commit_date, last_commit_date)
print(output)
print("")
def parse_options():
"""parse_options() -> opts, args
Parse any command-line options given returning both
the parsed options and arguments.
"""
parser = optparse.OptionParser(usage=USAGE, version=VERSION)
parser.add_option("-s", "--start",
action="store", type="string", default="01 Jan 2000", dest="start",
help="Keep commits only since the given date (default = 01 Jan 2000)")
parser.add_option("-e", "--end",
action="store", type="string", default="01 Jan 2050", dest="end",
help="Keep commits only until the given date (default = 01 Jan 2050)")
parser.add_option("-m", "--minimum",
action="store", type="int", default=10, dest="minimum",
help="Define the minimum number of commits in order to appear in the report")
parser.add_option("-v", "--verbose", action="store_true", default=False,
dest="verbose", help="Launch in verbose mode")
opts, args = parser.parse_args()
return opts, args
def main():
opts, args = parse_options()
os.chdir(PATH_TO_GIT)
process = Popen(['git', 'shortlog', '-sne', '--since="%s"' % (opts.start), '--before="%s"' % (opts.end), '--no-merges'], stdout=PIPE, stderr=PIPE)
stdout, stderr = process.communicate()
if len(stdout):
stats = extract_stats(stdout.split('\n'), opts.minimum)
format_report(stats, opts.start, opts.end)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment