Skip to content

Instantly share code, notes, and snippets.

@kennyyu
Created June 12, 2014 20:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kennyyu/5e59519b3972823e20bf to your computer and use it in GitHub Desktop.
Save kennyyu/5e59519b3972823e20bf to your computer and use it in GitHub Desktop.
script to count the number of lines written by an author in a git repository
import argparse
import json
import os
import subprocess
class Chdir:
"""
Initializer/destructor pattern for cd taken from here:
http://stackoverflow.com/questions/431684/how-do-i-cd-in-python
"""
def __init__(self, newpath):
self.newpath = newpath
def __enter__(self):
self.oldpath = os.getcwd()
os.chdir(self.newpath)
def __exit__(self, *err):
os.chdir(self.oldpath)
def command(s):
"""
Runs the shell command and returns the stdout as a list
of strings.
"""
proc = subprocess.Popen(s,
stdout=subprocess.PIPE,
stderr=open(os.devnull, "w"),
shell=True)
# strip trailing newline characters
lines = proc.stdout.readlines()
return [l.strip() for l in lines]
def file_type(fname):
"""
Given a file name, returns the file extension, or empty
string if there is none.
"""
# handle special cases of tempfiles
if fname[-1] == "~":
return "<TEMP>"
if fname[0] == "#":
return "<TEMP>"
# handle special cases of Makefile and READMEs
if fname.lower().find("makefile") > -1:
return "Makefile"
if fname.lower().find("readme") > -1:
return "README"
parts = fname.split(".")
assert(len(parts) != 0)
# if there is only one part after splitting, there was no extension
if len(parts) == 1:
return ""
return parts[-1]
def commits_by_author(author):
"""
Returns the list of commit hashes by the given author
"""
lines = command("git log --author=\"%s\" --oneline" % author)
return [l.split(" ")[0] for l in lines]
def commit_stats(commit):
"""
Returns a mapping of filename -> (additions, deletions)
"""
lines = command("git show %s --oneline --numstat" % commit)
assert(len(lines) > 0)
# the first line is just a repeat of the hash, so skip it
lines = lines[1:]
# stats are of the form:
# additions deletions filename
stats = {}
for l in lines:
l = l.strip()
if l == "":
continue
addition, deletion, fname = l.split()
if addition == "-":
addition = "0"
if deletion == "-":
deletion = "0"
addition = int(addition)
deletion = int(deletion)
stats[fname] = (addition, deletion)
return stats
def author_stats(author):
"""
Returns a mapping of filename -> (total +, total -)
"""
commits = commits_by_author(author)
astats = {}
for commit in commits:
stats = commit_stats(commit)
for fname in stats:
adds, dels = stats[fname]
if fname not in astats:
astats[fname] = (0,0)
totadds, totdels = astats[fname]
astats[fname] = (totadds + adds, totdels + dels)
return astats
def author_stats_filetype(author, exclude=set()):
"""
Returns a mapping of filetype -> (total +, total -)
"""
tstats = {}
astats = author_stats(author)
for fname in astats:
ftype = file_type(fname)
if ftype in exclude:
continue
adds, dels = astats[fname]
if ftype not in tstats:
tstats[ftype] = (0,0)
totadds, totdels = tstats[ftype]
tstats[ftype] = (totadds + adds, totdels + dels)
return tstats
def total_lines(stats):
"""
Counts the total number of +'s and -'s
"""
totadds, totdels = 0, 0
for k in stats:
adds, dels = stats[k]
totadds += adds
totdels += dels
return totadds, totdels
EXCLUDE = ["jpg", "pdf", "dat", "data", "csv", "xlsx", "tgz", "so", "png", "swp", "a",
"expected", "out", "cproject", "project", "asm", "fish", "scish", "cish",
"mlish", "<TEMP>", "gexf", "gephi", "ipynb", "cls", "JPG", "PNG", "nb", "xls",
"classpath", "DS_Store", "class", "script", "names", "jar",
"mat", "ppt", "pptx", "tif", "zip", "bmp", "eps", "crt", "csr", "key", "orig",
]
parser = argparse.ArgumentParser("line count")
parser.add_argument("path", type=str, help="path to git repo")
parser.add_argument("author", type=str, help="author to count lines")
parser.add_argument("--out", type=str, help="outfile for results",
default=None, dest="out")
parser.add_argument("--exclude", type=lambda s: s.split(","), help="exclude file types",
default=EXCLUDE, dest="exclude")
if __name__ == "__main__":
args = vars(parser.parse_args())
path = args["path"]
author = args["author"]
exclude = args["exclude"]
out = args["out"]
meta = {}
meta["path"] = path
meta["author"] = author
meta["exclude"] = exclude
with Chdir(path) as cd:
stats = author_stats_filetype(author, exclude=exclude)
adds, dels = total_lines(stats)
meta["types"] = stats
meta["total"] = (adds, dels)
s = json.dumps(meta, indent=2, sort_keys=True)
if out is not None:
with open(out, "wb") as outfile:
outfile.write(s)
print s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment