Skip to content

Instantly share code, notes, and snippets.

@earonesty
Last active December 21, 2020 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save earonesty/f76dec337ee64c5ae23c2be1557535a4 to your computer and use it in GitHub Desktop.
Save earonesty/f76dec337ee64c5ae23c2be1557535a4 to your computer and use it in GitHub Desktop.
diffcount.py
#!/usr/bin/env python
import sys
import re
import configparser
from fnmatch import fnmatch
from unidiff import PatchSet
EXTS = ["py"]
class Opts: # pylint: disable=too-few-public-methods
debug = False
exclude = []
def filtered_hunks(fil):
path_re = ".*[.](%s)$" % "|".join(EXTS)
for patch in PatchSet(fil):
if not re.match(path_re, patch.path):
continue
excluded = False
if Opts.exclude:
if Opts.debug:
print(">", patch.path, "=~", Opts.exclude)
for ex in Opts.exclude:
if fnmatch(patch.path, ex):
excluded = True
if excluded:
continue
for hunk in patch:
yield hunk
class Typ: # pylint: disable=too-few-public-methods
LINE = "."
COMMENT = "#"
DOCSTRING = "d"
WHITE = "w"
def classify_lines(fil):
for hunk in filtered_hunks(fil):
yield from classify_hunk(hunk)
def classify_line(lval):
"""Classify a single python line, noting comments, best efforts at docstring start/stop and pure-whitespace."""
lval = lval.rstrip("\n\r")
remaining_lval = lval
typ = Typ.LINE
if re.match(r"^ *$", lval):
return Typ.WHITE, None, ""
if re.match(r"^ *#", lval):
typ = Typ.COMMENT
remaining_lval = ""
else:
slug = re.match(r"^ *(\"\"\"|''')(.*)", lval)
if slug:
remaining_lval = slug[2]
slug = slug[1]
return Typ.DOCSTRING, slug, remaining_lval
return typ, None, remaining_lval
def classify_hunk(hunk):
"""Classify lines of a python diff-hunk, attempting to note comments and docstrings.
Ignores context lines.
Docstring detection is not guaranteed (changes in the middle of large docstrings won't have starts.)
Using ast would fix, but seems like overkill, and cannot be done on a diff-only.
"""
p = ""
prev_typ = 0
pslug = None
for line in hunk:
lval = line.value
lval = lval.rstrip("\n\r")
typ = Typ.LINE
naive_typ, slug, remaining_lval = classify_line(lval)
if p and p[-1] == "\\":
typ = prev_typ
else:
if prev_typ != Typ.DOCSTRING and naive_typ == Typ.COMMENT:
typ = naive_typ
elif naive_typ == Typ.DOCSTRING:
if prev_typ == Typ.DOCSTRING and pslug == slug:
# remainder of line could have stuff on it
typ, _, _ = classify_line(remaining_lval)
else:
typ = Typ.DOCSTRING
pslug = slug
elif prev_typ == Typ.DOCSTRING:
# continue docstring found in this context/hunk
typ = Typ.DOCSTRING
p = lval
prev_typ = typ
if typ == Typ.DOCSTRING:
if re.match(r"(%s) *$" % pslug, remaining_lval):
prev_typ = Typ.LINE
if line.is_context:
continue
yield typ, lval
def count_lines(fil):
"""Totals changed lines of python code, attempting to strip comments and docstrings.
Deletes/adds are counted equally.
Could miss some things, don't rely on exact counts.
"""
count = 0
for (typ, line) in classify_lines(fil):
if Opts.debug:
print(typ, line)
if typ == Typ.LINE:
count += 1
return count
def main():
Opts.debug = "--debug" in sys.argv
Opts.exclude = []
use_covrc = "--covrc" in sys.argv
if use_covrc:
config = configparser.ConfigParser()
config.read(".coveragerc")
cfg = {s: dict(config.items(s)) for s in config.sections()}
exclude = cfg.get("report", {}).get("omit", [])
Opts.exclude = [f.strip() for f in exclude.split("\n") if f.strip()]
for i in range(len(sys.argv)):
if sys.argv[i] == "--exclude":
Opts.exclude.append(sys.argv[i + 1])
if Opts.debug and Opts.exclude:
print("--exclude", Opts.exclude)
print(count_lines(sys.stdin))
example = '''
diff --git a/cryptvfs.py b/cryptvfs.py
index c68429cf6..ee90ecea8 100755
--- a/cryptvfs.py
+++ b/cryptvfs.py
@@ -2,5 +2,17 @@
from src.main import proc_entry
-if __name__ == "__main__":
- proc_entry()
+
+
+class Foo:
+ """some docstring
+ """
+ # some comment
+ pass
+
+class Bar:
+ """some docstring
+ """
+ # some comment
+ def method():
+ line1 + 1
'''
def strio(s):
import io
return io.StringIO(s)
def test_basic():
assert count_lines(strio(example)) == 10
def test_main(capsys):
sys.argv = []
sys.stdin = strio(example)
main()
cap = capsys.readouterr()
print(cap.out)
assert cap.out == "10\n"
def test_debug(capsys):
sys.argv = ["--debug"]
sys.stdin = strio(example)
main()
cap = capsys.readouterr()
print(cap.out)
assert Typ.DOCSTRING + ' """some docstring' in cap.out
def test_exclude(capsys):
sys.argv = ["--exclude", "cryptvfs.py"]
sys.stdin = strio(example)
main()
cap = capsys.readouterr()
print(cap.out)
assert cap.out == "0\n"
def test_covrc(capsys):
sys.argv = ["--covrc"]
sys.stdin = strio(example)
main()
cap = capsys.readouterr()
print(cap.out)
assert cap.out == "10\n"
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment