Skip to content

Instantly share code, notes, and snippets.

Forked from wesm/
Created May 29, 2012 22:50
Show Gist options
  • Save mahdibh/2831247 to your computer and use it in GitHub Desktop.
Save mahdibh/2831247 to your computer and use it in GitHub Desktop.
Little script to get a time series of code churn (insertions, deletions) on a git repo
from dateutil import parser
import subprocess
import os
import re
import sys
import tempfile
import numpy as np
from pandas import *
repo_path = os.getcwd()
git_path = os.path.join(repo_path, '.git')
# check that we are inside a git repository
if (not os.path.exists(git_path)):
sys.exit("current directory '" + repo_path + "' doesn't contain a git repository")
githist_file = tempfile.mktemp()
githist = 'git log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short ' + repo_path + ' > ' + githist_file
def rungithist():
def get_commit_history():
# return TimeSeries
tmfile = os.tmpfile()
githist = open(githist_file).read()
sha_date = []
for line in githist.split('\n'):
shas, dates = zip(*sha_date)
dates = [parser.parse(d) for d in dates]
return Series(dates, shas)
def get_commit_churn(sha, prev_sha):
stdout = subprocess.Popen(['git', 'diff', sha, prev_sha, '--stat'],
statline ='\n')[-2]
match = re.match('.*\s(.*)\sinsertions.*\s(.*)\sdeletions', statline)
insertions = int(
deletions = int(
return insertions, deletions
def get_code_churn(commits):
shas = commits.index
prev = shas[0]
insertions = [np.nan]
deletions = [np.nan]
for cur in shas[1:]:
i, d = get_commit_churn(cur, prev)
prev = cur
return DataFrame({'insertions' : insertions,
'deletions' : deletions}, index=shas)
if __name__ == '__main__':
commits = get_commit_history()
churn = get_code_churn(commits)
by_date = churn.groupby(commits).sum()
print by_date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment