Skip to content

Instantly share code, notes, and snippets.

@nhoffman
Created May 31, 2021 03:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nhoffman/e641df575d49e99ee21746da28362488 to your computer and use it in GitHub Desktop.
Save nhoffman/e641df575d49e99ee21746da28362488 to your computer and use it in GitHub Desktop.
visualize author contributions to one or more git repos
#!/usr/bin/env python3
"""Describe author contributions for one or more git repositories by date
Output is a csv with columns (repo, author, timestamp, churn) where
'churn' is the sum of lines added and lines removed.
Requires pandas and plotnine
"""
import sys
import argparse
import subprocess
from pathlib import Path
import re
import csv
import pandas as pd
from plotnine import (ggplot, geom_bar, aes, facet_wrap, theme,
element_text, scale_y_log10)
import plotnine as p9
def parse_log(text, exclude=None):
repo, author, ts, churn = None, None, None, 0
if exclude:
exclude = re.compile(exclude)
for line in text.splitlines():
if line.startswith('#'):
if repo:
yield (repo, author, ts, churn)
(repo, author, ts), churn = line[1:].split('|'), 0
elif line.strip():
gain, loss, filename = line.split(None, 2)
if exclude and exclude.search(filename):
continue
try:
churn += int(gain) + int(loss)
except ValueError:
# eg, binary files report lines changes using '-'
churn += 1
yield (repo, author, ts, churn)
def main(arguments):
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('repos', help="Path to one or more repositories", nargs='+')
parser.add_argument('-x', '--exclude', help='regular expression excluding filenames')
parser.add_argument('-a', '--author-map', type=argparse.FileType(),
help="""headerless csv file with columns
(label, author-email) providing instructions
for consolidating or (if the first column is
left empty) excluding authors. """)
parser.add_argument('-o', '--outfile', help="CSV output file")
parser.add_argument('-p', '--plotfile', help="plot output file")
args = parser.parse_args(arguments)
if args.author_map:
author_map = {v: k for k, v in csv.reader(args.author_map) if k}
data = []
for path in args.repos:
pth = Path(path)
repo = pth.name
cmd = ['git', '-C', path, '--no-pager', 'log',
'--numstat', 'master', f'--format=format:#{repo}|%ce|%ci']
job = subprocess.run(cmd, capture_output=True, text=True)
data += list(parse_log(job.stdout, exclude=args.exclude))
df = pd.DataFrame(data, columns=['repo', 'author', 'timestamp', 'churn'])
if args.outfile:
df.to_csv(args.outfile, index=False)
if author_map:
df = df[df['author'].isin(author_map)]
df['author'] = df['author'].apply(lambda a: author_map[a])
df['date'] = pd.to_datetime(df['timestamp']).apply(lambda ts: ts.strftime('%Y-%m'))
df.drop(['timestamp'], axis=1, inplace=True)
grouped = df.groupby(['repo', 'author', 'date'])
tab = grouped.sum().reset_index()
plt = (ggplot(tab, aes('date', 'churn', fill='author')) +
geom_bar(position='stack', stat='identity') +
facet_wrap('~repo', ncol=1) +
scale_y_log10() +
p9.theme_538() +
theme(
axis_text_x=element_text(rotation=90),
axis_text_y=element_text(size=0),
axis_title_y=element_text(size=0),
legend_position='top'))
if args.plofile:
plt.save(args.plotfile)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment