Skip to content

Instantly share code, notes, and snippets.

@bendichter
Last active July 7, 2022 20:55
Show Gist options
  • Save bendichter/96953f94dc1cdde79f381013d17ddd25 to your computer and use it in GitHub Desktop.
Save bendichter/96953f94dc1cdde79f381013d17ddd25 to your computer and use it in GitHub Desktop.
# first run:
#
#!git log --all --numstat --pretty=format:'--%h--%ad--%aN' --no-renames > git.log
import datetime
import matplotlib.pyplot as plt
import pandas as pd
from pprint import pprint
import matplotlib.ticker
import numpy as np
author_map = {
"alfred_username1": "Alfred",
"alfred_username2": "Alfred",
"bobby_username1": "Bobby",
}
blacklisted = [
"!git for-each-ref --format='%(refname:short)' `git symbolic-ref HEAD`",
"root",
"add-bots-here",
"dependabot[bot]",
]
fpath = "path/to/git.log"
# parse git log into pandas dataframe
commits = pd.read_csv(fpath, sep="\u0012", header=None, names=['raw'])
commit_marker = commits[commits['raw'].str.startswith("--",na=False)]
commit_info = commit_marker['raw'].str.extract(r"^--(?P<sha>.*?)--(?P<date>.*?)--(?P<author>.*?)$", expand=True)
commit_info['date'] = pd.to_datetime(commit_info['date'])
file_stats_marker = commits[~commits.index.isin(commit_info.index)]
file_stats = file_stats_marker['raw'].str.split("\t", expand=True)
file_stats = file_stats.rename(columns={0: "insertions", 1: "deletions", 2: "filename"})
file_stats['insertions'] = pd.to_numeric(file_stats['insertions'], errors='coerce')
file_stats['deletions'] = pd.to_numeric(file_stats['deletions'], errors='coerce')
commit_data = commit_info.reindex(commits.index).fillna(method="ffill")
commit_data = commit_data[~commit_data.index.isin(commit_info.index)]
commit_data = commit_data.join(file_stats)
# get total authors and weeks
all_authors = commit_data["author"].unique()
all_authors = list(np.unique([author_map.get(x, x) for x in all_authors if x not in blacklisted]))
dates = commit_data["date"]
start = dates.min()
stop = dates.max()
n_weeks = (stop-start).days // 7
timesheet = np.zeros((len(all_authors), n_weeks))
# iterate over commits and timesheet per week
for week_n in range(n_weeks):
week_start = start + datetime.timedelta(7 * (week_n-1))
week_stop = start + datetime.timedelta(7 * week_n)
commit_data_for_week = commit_data[(week_start < commit_data["date"]) & (commit_data["date"] < week_stop)]
authors_for_week = commit_data_for_week["author"].unique()
# handle different usernames
authors_for_week = list(np.unique([author_map.get(x, x) for x in authors_for_week]))
for i, author in enumerate(all_authors):
if author in authors_for_week:
timesheet[i, week_n] = 1
fig, ax = plt.subplots(figsize=(15, 10))
ax.imshow(timesheet, cmap="Greys")
ax.set_yticks(range(len(all_authors)))
_ = ax.set_yticklabels(all_authors)
ax.set_xlabel("weeks")
plt.minorticks_on()
plt.gca().xaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
plt.gca().yaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(1))
plt.grid(which="both", linewidth=0.25,color="k")
plt.show()
total_weeks = {k:v for k,v in zip(all_authors,timesheet.sum(axis=1))}
print("total weeks:")
print("============")
for author, total_weeks in zip(all_authors,timesheet.sum(axis=1)):
print(f"{author}: {total_weeks}")
@bendichter
Copy link
Author

image

total weeks:

Alejandro Sánchez Yalí: 32.0
Amelia Ardath: 2.0
Angel Rey: 1.0
Anthony Anabila Abeo: 9.0
Ben Dichter: 19.0
Cesar Uribe: 1.0
Cristian: 7.0
César Alfredo Uribe León: 8.0
Daniel Lopez: 8.0
Joyce Obi: 5.0
Juan David Arias: 35.0
Karim Marzouq: 1.0
Kevin c: 7.0
Max: 6.0
Nick Sweeting: 5.0
Rey Messon: 3.0
Sergey Mankovsky: 3.0
Thomas Lisankie: 1.0
ana: 1.0
apkallum: 1.0
dnl-molina: 1.0
jdcaballerov: 17.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment