Skip to content

Instantly share code, notes, and snippets.

Created Jun 12, 2014
What would you like to do?
script to count the number of lines written by an author in a git repository
import argparse
import json
import os
import subprocess
class Chdir:
Initializer/destructor pattern for cd taken from here:
def __init__(self, newpath):
self.newpath = newpath
def __enter__(self):
self.oldpath = os.getcwd()
def __exit__(self, *err):
def command(s):
Runs the shell command and returns the stdout as a list
of strings.
proc = subprocess.Popen(s,
stderr=open(os.devnull, "w"),
# strip trailing newline characters
lines = proc.stdout.readlines()
return [l.strip() for l in lines]
def file_type(fname):
Given a file name, returns the file extension, or empty
string if there is none.
# handle special cases of tempfiles
if fname[-1] == "~":
return "<TEMP>"
if fname[0] == "#":
return "<TEMP>"
# handle special cases of Makefile and READMEs
if fname.lower().find("makefile") > -1:
return "Makefile"
if fname.lower().find("readme") > -1:
return "README"
parts = fname.split(".")
assert(len(parts) != 0)
# if there is only one part after splitting, there was no extension
if len(parts) == 1:
return ""
return parts[-1]
def commits_by_author(author):
Returns the list of commit hashes by the given author
lines = command("git log --author=\"%s\" --oneline" % author)
return [l.split(" ")[0] for l in lines]
def commit_stats(commit):
Returns a mapping of filename -> (additions, deletions)
lines = command("git show %s --oneline --numstat" % commit)
assert(len(lines) > 0)
# the first line is just a repeat of the hash, so skip it
lines = lines[1:]
# stats are of the form:
# additions deletions filename
stats = {}
for l in lines:
l = l.strip()
if l == "":
addition, deletion, fname = l.split()
if addition == "-":
addition = "0"
if deletion == "-":
deletion = "0"
addition = int(addition)
deletion = int(deletion)
stats[fname] = (addition, deletion)
return stats
def author_stats(author):
Returns a mapping of filename -> (total +, total -)
commits = commits_by_author(author)
astats = {}
for commit in commits:
stats = commit_stats(commit)
for fname in stats:
adds, dels = stats[fname]
if fname not in astats:
astats[fname] = (0,0)
totadds, totdels = astats[fname]
astats[fname] = (totadds + adds, totdels + dels)
return astats
def author_stats_filetype(author, exclude=set()):
Returns a mapping of filetype -> (total +, total -)
tstats = {}
astats = author_stats(author)
for fname in astats:
ftype = file_type(fname)
if ftype in exclude:
adds, dels = astats[fname]
if ftype not in tstats:
tstats[ftype] = (0,0)
totadds, totdels = tstats[ftype]
tstats[ftype] = (totadds + adds, totdels + dels)
return tstats
def total_lines(stats):
Counts the total number of +'s and -'s
totadds, totdels = 0, 0
for k in stats:
adds, dels = stats[k]
totadds += adds
totdels += dels
return totadds, totdels
EXCLUDE = ["jpg", "pdf", "dat", "data", "csv", "xlsx", "tgz", "so", "png", "swp", "a",
"expected", "out", "cproject", "project", "asm", "fish", "scish", "cish",
"mlish", "<TEMP>", "gexf", "gephi", "ipynb", "cls", "JPG", "PNG", "nb", "xls",
"classpath", "DS_Store", "class", "script", "names", "jar",
"mat", "ppt", "pptx", "tif", "zip", "bmp", "eps", "crt", "csr", "key", "orig",
parser = argparse.ArgumentParser("line count")
parser.add_argument("path", type=str, help="path to git repo")
parser.add_argument("author", type=str, help="author to count lines")
parser.add_argument("--out", type=str, help="outfile for results",
default=None, dest="out")
parser.add_argument("--exclude", type=lambda s: s.split(","), help="exclude file types",
default=EXCLUDE, dest="exclude")
if __name__ == "__main__":
args = vars(parser.parse_args())
path = args["path"]
author = args["author"]
exclude = args["exclude"]
out = args["out"]
meta = {}
meta["path"] = path
meta["author"] = author
meta["exclude"] = exclude
with Chdir(path) as cd:
stats = author_stats_filetype(author, exclude=exclude)
adds, dels = total_lines(stats)
meta["types"] = stats
meta["total"] = (adds, dels)
s = json.dumps(meta, indent=2, sort_keys=True)
if out is not None:
with open(out, "wb") as outfile:
print s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment