Created
September 8, 2011 03:02
-
-
Save wesm/1202507 to your computer and use it in GitHub Desktop.
Little script to get a time series of code churn (insertions, deletions) on a git repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dateutil import parser | |
import subprocess | |
import os | |
import re | |
import sys | |
import numpy as np | |
from pandas import * | |
repo_path = '/home/wesm/code/pandas' | |
githist = ('git log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short ' + | |
repo_path + ' > githist.txt') | |
def rungithist(): | |
os.system(githist) | |
def get_commit_history(): | |
# return TimeSeries | |
rungithist() | |
githist = open('githist.txt').read() | |
os.remove('githist.txt') | |
sha_date = [] | |
for line in githist.split('\n'): | |
sha_date.append(line.split()[:2]) | |
shas, dates = zip(*sha_date) | |
dates = [parser.parse(d) for d in dates] | |
return Series(dates, shas) | |
def get_commit_churn(sha, prev_sha): | |
stdout = subprocess.Popen(['git', 'diff', sha, prev_sha, '--stat'], | |
stdout=subprocess.PIPE).stdout | |
statline = stdout.read().split('\n')[-2] | |
match = re.match('.*\s(.*)\sinsertions.*\s(.*)\sdeletions', statline) | |
insertions = int(match.group(1)) | |
deletions = int(match.group(2)) | |
return insertions, deletions | |
def get_code_churn(commits): | |
shas = commits.index | |
prev = shas[0] | |
insertions = [np.nan] | |
deletions = [np.nan] | |
for cur in shas[1:]: | |
i, d = get_commit_churn(cur, prev) | |
insertions.append(i) | |
deletions.append(d) | |
prev = cur | |
return DataFrame({'insertions' : insertions, | |
'deletions' : deletions}, index=shas) | |
if __name__ == '__main__': | |
commits = get_commit_history() | |
churn = get_code_churn(commits) | |
by_date = churn.groupby(commits).sum() | |
# clean out days where I touched Cython | |
by_date = by_date[by_date.sum(1) < 5000] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment