diffcount.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import re | |
import configparser | |
from fnmatch import fnmatch | |
from unidiff import PatchSet | |
EXTS = ["py"] | |
class Opts: # pylint: disable=too-few-public-methods | |
debug = False | |
exclude = [] | |
def filtered_hunks(fil): | |
path_re = ".*[.](%s)$" % "|".join(EXTS) | |
for patch in PatchSet(fil): | |
if not re.match(path_re, patch.path): | |
continue | |
excluded = False | |
if Opts.exclude: | |
if Opts.debug: | |
print(">", patch.path, "=~", Opts.exclude) | |
for ex in Opts.exclude: | |
if fnmatch(patch.path, ex): | |
excluded = True | |
if excluded: | |
continue | |
for hunk in patch: | |
yield hunk | |
class Typ: # pylint: disable=too-few-public-methods | |
LINE = "." | |
COMMENT = "#" | |
DOCSTRING = "d" | |
WHITE = "w" | |
def classify_lines(fil): | |
for hunk in filtered_hunks(fil): | |
yield from classify_hunk(hunk) | |
def classify_line(lval): | |
"""Classify a single python line, noting comments, best efforts at docstring start/stop and pure-whitespace.""" | |
lval = lval.rstrip("\n\r") | |
remaining_lval = lval | |
typ = Typ.LINE | |
if re.match(r"^ *$", lval): | |
return Typ.WHITE, None, "" | |
if re.match(r"^ *#", lval): | |
typ = Typ.COMMENT | |
remaining_lval = "" | |
else: | |
slug = re.match(r"^ *(\"\"\"|''')(.*)", lval) | |
if slug: | |
remaining_lval = slug[2] | |
slug = slug[1] | |
return Typ.DOCSTRING, slug, remaining_lval | |
return typ, None, remaining_lval | |
def classify_hunk(hunk): | |
"""Classify lines of a python diff-hunk, attempting to note comments and docstrings. | |
Ignores context lines. | |
Docstring detection is not guaranteed (changes in the middle of large docstrings won't have starts.) | |
Using ast would fix, but seems like overkill, and cannot be done on a diff-only. | |
""" | |
p = "" | |
prev_typ = 0 | |
pslug = None | |
for line in hunk: | |
lval = line.value | |
lval = lval.rstrip("\n\r") | |
typ = Typ.LINE | |
naive_typ, slug, remaining_lval = classify_line(lval) | |
if p and p[-1] == "\\": | |
typ = prev_typ | |
else: | |
if prev_typ != Typ.DOCSTRING and naive_typ == Typ.COMMENT: | |
typ = naive_typ | |
elif naive_typ == Typ.DOCSTRING: | |
if prev_typ == Typ.DOCSTRING and pslug == slug: | |
# remainder of line could have stuff on it | |
typ, _, _ = classify_line(remaining_lval) | |
else: | |
typ = Typ.DOCSTRING | |
pslug = slug | |
elif prev_typ == Typ.DOCSTRING: | |
# continue docstring found in this context/hunk | |
typ = Typ.DOCSTRING | |
p = lval | |
prev_typ = typ | |
if typ == Typ.DOCSTRING: | |
if re.match(r"(%s) *$" % pslug, remaining_lval): | |
prev_typ = Typ.LINE | |
if line.is_context: | |
continue | |
yield typ, lval | |
def count_lines(fil): | |
"""Totals changed lines of python code, attempting to strip comments and docstrings. | |
Deletes/adds are counted equally. | |
Could miss some things, don't rely on exact counts. | |
""" | |
count = 0 | |
for (typ, line) in classify_lines(fil): | |
if Opts.debug: | |
print(typ, line) | |
if typ == Typ.LINE: | |
count += 1 | |
return count | |
def main(): | |
Opts.debug = "--debug" in sys.argv | |
Opts.exclude = [] | |
use_covrc = "--covrc" in sys.argv | |
if use_covrc: | |
config = configparser.ConfigParser() | |
config.read(".coveragerc") | |
cfg = {s: dict(config.items(s)) for s in config.sections()} | |
exclude = cfg.get("report", {}).get("omit", []) | |
Opts.exclude = [f.strip() for f in exclude.split("\n") if f.strip()] | |
for i in range(len(sys.argv)): | |
if sys.argv[i] == "--exclude": | |
Opts.exclude.append(sys.argv[i + 1]) | |
if Opts.debug and Opts.exclude: | |
print("--exclude", Opts.exclude) | |
print(count_lines(sys.stdin)) | |
example = ''' | |
diff --git a/cryptvfs.py b/cryptvfs.py | |
index c68429cf6..ee90ecea8 100755 | |
--- a/cryptvfs.py | |
+++ b/cryptvfs.py | |
@@ -2,5 +2,17 @@ | |
from src.main import proc_entry | |
-if __name__ == "__main__": | |
- proc_entry() | |
+ | |
+ | |
+class Foo: | |
+ """some docstring | |
+ """ | |
+ # some comment | |
+ pass | |
+ | |
+class Bar: | |
+ """some docstring | |
+ """ | |
+ # some comment | |
+ def method(): | |
+ line1 + 1 | |
''' | |
def strio(s): | |
import io | |
return io.StringIO(s) | |
def test_basic(): | |
assert count_lines(strio(example)) == 10 | |
def test_main(capsys): | |
sys.argv = [] | |
sys.stdin = strio(example) | |
main() | |
cap = capsys.readouterr() | |
print(cap.out) | |
assert cap.out == "10\n" | |
def test_debug(capsys): | |
sys.argv = ["--debug"] | |
sys.stdin = strio(example) | |
main() | |
cap = capsys.readouterr() | |
print(cap.out) | |
assert Typ.DOCSTRING + ' """some docstring' in cap.out | |
def test_exclude(capsys): | |
sys.argv = ["--exclude", "cryptvfs.py"] | |
sys.stdin = strio(example) | |
main() | |
cap = capsys.readouterr() | |
print(cap.out) | |
assert cap.out == "0\n" | |
def test_covrc(capsys): | |
sys.argv = ["--covrc"] | |
sys.stdin = strio(example) | |
main() | |
cap = capsys.readouterr() | |
print(cap.out) | |
assert cap.out == "10\n" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment