Last active
December 18, 2015 08:49
-
-
Save jorendorff/5757221 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python program to examine Python codebases and count various kinds of | |
# objects-being-iterated-over. | |
# | |
# Usage: cd into a directory containing some .py files, then | |
# python path/to/for-loop.py | |
from __future__ import division | |
import sys, os, ast, collections | |
BLACKLIST = ['test', 'tests', '_tests', '.git', '.hg', '_virtualenv'] | |
def target_files(dir): | |
""" Find some python files to examine. """ | |
for dirpath, dirnames, filenames in os.walk(dir): | |
dirnames[:] = [d for d in dirnames if d not in BLACKLIST] | |
for f in filenames: | |
if f.endswith('.py') and not f.endswith('Tests.py'): | |
yield os.path.join(dirpath, f) | |
def find_loop_iterables(tree): | |
""" Within an AST, find expressions iterated over in for-loops. """ | |
for node in ast.walk(tree): | |
if isinstance(node, ast.For): | |
yield node.iter | |
elif isinstance(node, (ast.ListComp, ast.DictComp)): | |
for g in node.generators: | |
yield g.iter | |
def main(): | |
counts = collections.defaultdict(int) | |
loc = 0 | |
for filename in target_files('.'): | |
try: | |
with open(filename) as f: | |
src = f.read() | |
lines = src.splitlines() | |
loc += len(lines) | |
if sys.version_info < (3, 0) and lines and 'python3' in lines[0]: | |
continue | |
parse_tree = ast.parse(src, filename) | |
except: | |
print("*** error parsing " + filename) | |
continue | |
def log(node, code): | |
counts[code] += 1 | |
print('{0} {1}:{2}:{3}'.format(code, filename, node.lineno, lines[node.lineno-1])) | |
for it in find_loop_iterables(parse_tree): | |
if isinstance(it, ast.Call): | |
if isinstance(it.func, ast.Name): | |
if it.func.id == "range": | |
log(it, "R") | |
elif it.func.id == "enumerate": | |
log(it, "E") | |
elif isinstance(it.func, ast.Attribute): | |
if it.func.attr in ("keys", "iterkeys"): | |
log(it, "K") | |
elif it.func.attr in ("values", "itervalues"): | |
log(it, "V") | |
elif it.func.attr in ("items", "iteritems"): | |
log(it, "I") | |
elif isinstance(it, ast.Name): | |
log(it, "N") | |
elif isinstance(it, ast.Attribute): | |
log(it, "A") | |
elif isinstance(it, (ast.List, ast.Tuple, ast.Str, ast.Set)): | |
log(it, "L") | |
else: | |
log(it, "O") | |
total = sum(counts.values()) | |
print() | |
for t, n in sorted(counts.items()): | |
print("Type {0}: {1} ({2:.1%})".format(t, n, n / total)) | |
print("total: {0}".format(total)) | |
print("for loops per 1000 lines of code: {0}".format(1000 * total / loc)) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment