Skip to content

Instantly share code, notes, and snippets.

@rhowardiv
Last active October 28, 2016 18:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rhowardiv/82ad4b9e788de5ca89922a277fec734a to your computer and use it in GitHub Desktop.
Save rhowardiv/82ad4b9e788de5ca89922a277fec734a to your computer and use it in GitHub Desktop.
Count lines in python functions
#!/usr/bin/env python
"""
Count lines in python functions
"""
import re
def funcs_lengths(lines):
"""
Loop through lines and yield (function name, line length)
Nested funcs are ignored.
Does not count decorator lines.
Because I try to ignore lines comprising only triple-quoted strings,
and I'm not actually parsing them, I can be fooled by unexpected
usage of triple quotes. Typical usage should be fine.
Example invocation:
find . -type f -name '*.py' \
| egrep -v 'virtualenv|node_modules|/tests/' \
| xargs pyfunclen \
| egrep -v '\b(routes|upgrade|downgrade|api_routes_v2)\b' \
| sort -n | vim -
Example of finding average function length:
find . -type f -name '*.py' \
| egrep -v 'virtualenv|node_modules|/tests/' \
| xargs pyfunclen \
| egrep -v '\b(routes|upgrade|downgrade|api_routes_v2)\b' \
| awk '{n++;sum+=$1} END {print n ? sum/n : 0}'
"""
start_re = re.compile(r'(?P<leadingws>\s*)(async)?\s*def\s+(?P<funcname>[^\s(]+)')
doc_1_line_re = re.compile(r'^""".*"""$')
close_bracket_re = re.compile(r"""^[)\]}"']$""")
length = 0
current_func = None
current_ws = None
in_comment = False
for line in lines:
stripped = line.strip()
if stripped == '' and not lines.isfirstline():
continue
if stripped.startswith('#'):
continue
if doc_1_line_re.match(stripped):
continue
if in_comment:
if '"""' in stripped:
in_comment = False
if stripped == '"""':
in_comment = False
continue
elif '"""' in stripped:
in_comment = True
continue
if close_bracket_re.match(stripped):
continue
if current_func:
# look for end of func -- a line with the same leading WS
m = re.match('^{}\S'.format(current_ws), line)
if m or lines.isfirstline():
yield length, current_func
current_func = None
else:
length += 1
if not current_func:
m = start_re.match(line)
if m:
current_func = m.group('funcname')
current_ws = m.group('leadingws')
length = 1
if __name__ == '__main__':
def main():
import fileinput
for length, funcname in funcs_lengths(fileinput.input()):
print length, funcname
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment