Skip to content

Instantly share code, notes, and snippets.



Last active Dec 18, 2015
What would you like to do?
# Python program to examine Python codebases and count various kinds of
# objects-being-iterated-over.
# Usage: cd into a directory containing some .py files, then
# python path/to/
from __future__ import division
import sys, os, ast, collections
BLACKLIST = ['test', 'tests', '_tests', '.git', '.hg', '_virtualenv']
def target_files(dir):
""" Find some python files to examine. """
for dirpath, dirnames, filenames in os.walk(dir):
dirnames[:] = [d for d in dirnames if d not in BLACKLIST]
for f in filenames:
if f.endswith('.py') and not f.endswith(''):
yield os.path.join(dirpath, f)
def find_loop_iterables(tree):
""" Within an AST, find expressions iterated over in for-loops. """
for node in ast.walk(tree):
if isinstance(node, ast.For):
yield node.iter
elif isinstance(node, (ast.ListComp, ast.DictComp)):
for g in node.generators:
yield g.iter
def main():
counts = collections.defaultdict(int)
loc = 0
for filename in target_files('.'):
with open(filename) as f:
src =
lines = src.splitlines()
loc += len(lines)
if sys.version_info < (3, 0) and lines and 'python3' in lines[0]:
parse_tree = ast.parse(src, filename)
print("*** error parsing " + filename)
def log(node, code):
counts[code] += 1
print('{0} {1}:{2}:{3}'.format(code, filename, node.lineno, lines[node.lineno-1]))
for it in find_loop_iterables(parse_tree):
if isinstance(it, ast.Call):
if isinstance(it.func, ast.Name):
if == "range":
log(it, "R")
elif == "enumerate":
log(it, "E")
elif isinstance(it.func, ast.Attribute):
if it.func.attr in ("keys", "iterkeys"):
log(it, "K")
elif it.func.attr in ("values", "itervalues"):
log(it, "V")
elif it.func.attr in ("items", "iteritems"):
log(it, "I")
elif isinstance(it, ast.Name):
log(it, "N")
elif isinstance(it, ast.Attribute):
log(it, "A")
elif isinstance(it, (ast.List, ast.Tuple, ast.Str, ast.Set)):
log(it, "L")
log(it, "O")
total = sum(counts.values())
for t, n in sorted(counts.items()):
print("Type {0}: {1} ({2:.1%})".format(t, n, n / total))
print("total: {0}".format(total))
print("for loops per 1000 lines of code: {0}".format(1000 * total / loc))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment