Skip to content

Instantly share code, notes, and snippets.

@evmar
Created August 6, 2010 20:52
Show Gist options
  • Save evmar/511981 to your computer and use it in GitHub Desktop.
Save evmar/511981 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import fileinput
import re
import sys
from pprint import pprint
def parse(input):
"""Parse a log of gcc -H runs.
Yields:
Nodes of the form [filename, [included child nodes]].
"""
# Regex that matches a compilation line: expects the -H flag and
# that the last argument is the source file.
compile_re = re.compile(r'^.* -H .*?(\S+)$')
# Regex that matches a gcc -H debug line: a line starts with a a
# series of periods indicating the nesting depth, then the
# included file.
include_re = re.compile(r'^(\.+) (\S+)$')
# A stack tracking what the "parent" include of a given include
# level (nesting depth) is.
stack = None
for line in input:
match = compile_re.match(line)
if match:
if stack:
yield stack[0]
# Reset the stack for the new file.
stack = [[match.group(1), []]]
match = include_re.match(line)
if match:
level = len(match.group(1))
path = match.group(2)
new_node = [path, []]
if level < len(stack):
stack[level] = new_node
elif level == len(stack):
stack.append(new_node)
else:
raise "Unexpected nesting level on " + line
stack[level - 1][1].append(new_node)
continue
# Output last entry, if any.
if stack:
yield stack[0]
def accumulate(file, tree, log):
"""Accumulate include counts for a single tree of includes.
Arguments:
file is the file we're currently considering
tree is an include tree from parse()
log is filled in with pairs (filename, total subincludes)
Returns:
total number of files included from this file
"""
if file.startswith('/usr'):
# Don't accumulate through system headers.
return 0
total = 0
for child, children in tree:
total += accumulate(child, children, log) + 1
if total > 0:
log.append((file, total))
return total
class FileStats:
def __init__(self):
self.includes = 0
self.instances = 0
allstats = {}
for file, tree in parse(fileinput.input()):
if 'third_party' in file:
continue
log = []
print accumulate(file, tree, log), file
for file, includes in log:
if file not in allstats:
allstats[file] = FileStats()
stats = allstats[file]
stats.includes += includes
stats.instances += 1
for stats in allstats.values():
stats.fraction = stats.includes / float(stats.instances)
results = sorted(allstats.iteritems(), lambda x,y: cmp(x[1].includes, y[1].includes))
for file, stats in results:
if file.startswith('/usr'):
continue
if stats.instances > 1:
print "%s: %d total in %d source files (%.1f/file)" % \
(file, stats.includes, stats.instances, stats.fraction)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment