script to count the number of lines written by an author in a git repository
import argparse | |
import json | |
import os | |
import subprocess | |
class Chdir: | |
""" | |
Initializer/destructor pattern for cd taken from here: | |
http://stackoverflow.com/questions/431684/how-do-i-cd-in-python | |
""" | |
def __init__(self, newpath): | |
self.newpath = newpath | |
def __enter__(self): | |
self.oldpath = os.getcwd() | |
os.chdir(self.newpath) | |
def __exit__(self, *err): | |
os.chdir(self.oldpath) | |
def command(s): | |
""" | |
Runs the shell command and returns the stdout as a list | |
of strings. | |
""" | |
proc = subprocess.Popen(s, | |
stdout=subprocess.PIPE, | |
stderr=open(os.devnull, "w"), | |
shell=True) | |
# strip trailing newline characters | |
lines = proc.stdout.readlines() | |
return [l.strip() for l in lines] | |
def file_type(fname): | |
""" | |
Given a file name, returns the file extension, or empty | |
string if there is none. | |
""" | |
# handle special cases of tempfiles | |
if fname[-1] == "~": | |
return "<TEMP>" | |
if fname[0] == "#": | |
return "<TEMP>" | |
# handle special cases of Makefile and READMEs | |
if fname.lower().find("makefile") > -1: | |
return "Makefile" | |
if fname.lower().find("readme") > -1: | |
return "README" | |
parts = fname.split(".") | |
assert(len(parts) != 0) | |
# if there is only one part after splitting, there was no extension | |
if len(parts) == 1: | |
return "" | |
return parts[-1] | |
def commits_by_author(author): | |
""" | |
Returns the list of commit hashes by the given author | |
""" | |
lines = command("git log --author=\"%s\" --oneline" % author) | |
return [l.split(" ")[0] for l in lines] | |
def commit_stats(commit): | |
""" | |
Returns a mapping of filename -> (additions, deletions) | |
""" | |
lines = command("git show %s --oneline --numstat" % commit) | |
assert(len(lines) > 0) | |
# the first line is just a repeat of the hash, so skip it | |
lines = lines[1:] | |
# stats are of the form: | |
# additions deletions filename | |
stats = {} | |
for l in lines: | |
l = l.strip() | |
if l == "": | |
continue | |
addition, deletion, fname = l.split() | |
if addition == "-": | |
addition = "0" | |
if deletion == "-": | |
deletion = "0" | |
addition = int(addition) | |
deletion = int(deletion) | |
stats[fname] = (addition, deletion) | |
return stats | |
def author_stats(author): | |
""" | |
Returns a mapping of filename -> (total +, total -) | |
""" | |
commits = commits_by_author(author) | |
astats = {} | |
for commit in commits: | |
stats = commit_stats(commit) | |
for fname in stats: | |
adds, dels = stats[fname] | |
if fname not in astats: | |
astats[fname] = (0,0) | |
totadds, totdels = astats[fname] | |
astats[fname] = (totadds + adds, totdels + dels) | |
return astats | |
def author_stats_filetype(author, exclude=set()): | |
""" | |
Returns a mapping of filetype -> (total +, total -) | |
""" | |
tstats = {} | |
astats = author_stats(author) | |
for fname in astats: | |
ftype = file_type(fname) | |
if ftype in exclude: | |
continue | |
adds, dels = astats[fname] | |
if ftype not in tstats: | |
tstats[ftype] = (0,0) | |
totadds, totdels = tstats[ftype] | |
tstats[ftype] = (totadds + adds, totdels + dels) | |
return tstats | |
def total_lines(stats): | |
""" | |
Counts the total number of +'s and -'s | |
""" | |
totadds, totdels = 0, 0 | |
for k in stats: | |
adds, dels = stats[k] | |
totadds += adds | |
totdels += dels | |
return totadds, totdels | |
EXCLUDE = ["jpg", "pdf", "dat", "data", "csv", "xlsx", "tgz", "so", "png", "swp", "a", | |
"expected", "out", "cproject", "project", "asm", "fish", "scish", "cish", | |
"mlish", "<TEMP>", "gexf", "gephi", "ipynb", "cls", "JPG", "PNG", "nb", "xls", | |
"classpath", "DS_Store", "class", "script", "names", "jar", | |
"mat", "ppt", "pptx", "tif", "zip", "bmp", "eps", "crt", "csr", "key", "orig", | |
] | |
parser = argparse.ArgumentParser("line count") | |
parser.add_argument("path", type=str, help="path to git repo") | |
parser.add_argument("author", type=str, help="author to count lines") | |
parser.add_argument("--out", type=str, help="outfile for results", | |
default=None, dest="out") | |
parser.add_argument("--exclude", type=lambda s: s.split(","), help="exclude file types", | |
default=EXCLUDE, dest="exclude") | |
if __name__ == "__main__": | |
args = vars(parser.parse_args()) | |
path = args["path"] | |
author = args["author"] | |
exclude = args["exclude"] | |
out = args["out"] | |
meta = {} | |
meta["path"] = path | |
meta["author"] = author | |
meta["exclude"] = exclude | |
with Chdir(path) as cd: | |
stats = author_stats_filetype(author, exclude=exclude) | |
adds, dels = total_lines(stats) | |
meta["types"] = stats | |
meta["total"] = (adds, dels) | |
s = json.dumps(meta, indent=2, sort_keys=True) | |
if out is not None: | |
with open(out, "wb") as outfile: | |
outfile.write(s) | |
print s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment