froydnj/includebloat.py

## includebloat.py
#!/usr/bin/env python

import collections
import os
import re
import subprocess
import sys

directory = sys.argv[1]

find_output = subprocess.check_output(['find', directory, '-name', '*.pp'])

includes = collections.defaultdict(int)

for pp_file in find_output.split('\n'):
    if not pp_file:
        continue
    # One dirname gets us the .deps/ directory.
    # Two dirnames gets us the actual directory for the object file.
    pp_file_dir = os.path.dirname(pp_file)
    pp_file_dir = os.path.dirname(pp_file_dir)
    with open(pp_file, 'r') as f:
        for dep_line in f:
            dep_line = dep_line.rstrip()
            if re.search('^ .*\\\\$', dep_line):
                dep_line = dep_line.lstrip()
                dep_line = re.sub(' *\\\\$', "", dep_line)
                headers = dep_line.split()
                def canonical_header(h):
                    canon = os.path.realpath(os.path.join(pp_file_dir, h))
                    return canon
                for dep in dep_line.split():
                    includes[canonical_header(dep)] += 1

sizes = collections.defaultdict(int)
totals = collections.defaultdict(int)

totalsize = 0

for include, count in includes.iteritems():
    size = os.stat(include).st_size
    sizes[include] = size
    totals[include] += size * count
    totalsize += totals[include]

print "sum total bytes included:", totalsize
for include, size in totals.iteritems():
    print size, sizes[include], includes[include], include
	#!/usr/bin/env python

	import collections
	import os
	import re
	import subprocess
	import sys

	directory = sys.argv[1]

	find_output = subprocess.check_output(['find', directory, '-name', '*.pp'])

	includes = collections.defaultdict(int)

	for pp_file in find_output.split('\n'):
	if not pp_file:
	continue
	# One dirname gets us the .deps/ directory.
	# Two dirnames gets us the actual directory for the object file.
	pp_file_dir = os.path.dirname(pp_file)
	pp_file_dir = os.path.dirname(pp_file_dir)
	with open(pp_file, 'r') as f:
	for dep_line in f:
	dep_line = dep_line.rstrip()
	if re.search('^ .*\\\\$', dep_line):
	dep_line = dep_line.lstrip()
	dep_line = re.sub(' *\\\\$', "", dep_line)
	headers = dep_line.split()
	def canonical_header(h):
	canon = os.path.realpath(os.path.join(pp_file_dir, h))
	return canon
	for dep in dep_line.split():
	includes[canonical_header(dep)] += 1

	sizes = collections.defaultdict(int)
	totals = collections.defaultdict(int)

	totalsize = 0

	for include, count in includes.iteritems():
	size = os.stat(include).st_size
	sizes[include] = size
	totals[include] += size * count
	totalsize += totals[include]

	print "sum total bytes included:", totalsize
	for include, size in totals.iteritems():
	print size, sizes[include], includes[include], include