Use the `import_logger.py` to "decorate" cPython's `import` statement, and log the actual memory grown to a file.
if True: | |
print("===> installing import_logger_orverride") | |
import os | |
import psutil | |
import pdb | |
import pprint | |
import __builtin__ | |
import logging | |
import sys | |
# setup the memory vars | |
_this_process = psutil.Process(os.getpid()) | |
_f_get_memory_info = _this_process.get_memory_info # call is different on other versions of psutil | |
GET_MEMORY = lambda: _f_get_memory_info()[0] | |
# set up the dirs | |
# we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc | |
REPORTS_DIR_BASE = os.path.join("imports_parser", "runs") | |
if not os.path.exists(REPORTS_DIR_BASE): | |
os.makedirs(REPORTS_DIR_BASE) | |
dirs = [i for i in os.listdir(REPORTS_DIR_BASE) | |
if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i)) | |
] | |
max_dirs = len(dirs) | |
REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs) | |
print("===- Logging to %s" % REPORTS_DIR_RUN) | |
os.makedirs(REPORTS_DIR_RUN) | |
writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a') | |
writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a') | |
# we need this still | |
realimport = __builtin__.__import__ | |
# our override | |
def import_logger_orverride(name, *args, **kwargs): | |
_mem_start = GET_MEMORY() | |
_package_name = name | |
if len(args) == 4: | |
_package_name = "%s.%s" % (name, | |
str(args[2]).replace(',', '|')) | |
_frame = sys._getframe(1) | |
try: | |
_caller_file = _frame.f_locals['__file__'] | |
except: | |
_caller_file = "<>" | |
try: | |
_imported = realimport(name, *args, **kwargs) | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_success.write(_line) | |
return _imported | |
except Exception as e: | |
if isinstance(e, ImportError) and e.message.startswith("No module named"): | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_error.write(_line) | |
raise | |
finally: | |
del _caller_file | |
del _frame | |
# install the override | |
__builtin__.__import__ = import_logger_orverride | |
print("<=== import_logger_orverride installed") |
import os | |
import pprint | |
# manually change this, because I'm lazy. | |
versions_dir = "runs/001/" | |
# written by `import_logger.py` | |
fname_imports = "imports.txt" | |
fname_errors = "errors.txt" | |
raw_data = open(os.path.join(versions_dir, fname_imports)).readlines() | |
# first pass, to python | |
data_formatted = [] | |
for (idx, row) in enumerate(raw_data): | |
# input is """import|{imported}{caller},{growth},{pre},{post}""" | |
if not row.startswith('import|'): | |
continue | |
row = row[7:] | |
vs = [c.strip() for c in row.split(',')] | |
# output is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
vs.insert(0, str(idx)) | |
data_formatted.append(vs) | |
# second pass, calculate the max | |
maxxed = float(data_formatted[-1][4]) | |
for row in data_formatted: | |
# input is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
# output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}""" | |
as_percent_growth = '0' | |
_growth = float(row[3]) if row[3] else 0 | |
if _growth: | |
as_percent_growth = str((_growth / maxxed)*100) | |
row.append(as_percent_growth) | |
as_percent_overall = str((float(row[5]) / maxxed)*100) | |
row.append(as_percent_overall) | |
# okay now let's try and figure out the level | |
current_max_mem = 0 | |
current_recursion = 0 | |
seen = {} | |
bypre = {} | |
bypost = {} | |
callers = {} | |
bys = {} | |
for row in data_formatted: | |
row_id = int(row[0]) | |
row_name = row[1] | |
row_caller = row[2] | |
row_growth = int(row[3]) if row[3] else 0 | |
row_pre = int(row[4]) | |
row_post = int(row[5]) | |
if row_caller not in callers: | |
callers[row_caller] = set({}) | |
callers[row_caller].add(row_name) | |
if row_name not in bys: | |
bys[row_name] = set({}) | |
bys[row_name].add(row_caller) | |
if row_pre not in bypre: | |
bypre[row_pre] = [] | |
bypre[row_pre].append((row_id, row_pre, row_post)) | |
if row_post not in bypost: | |
bypost[row_post] = [] | |
bypost[row_post].append((row_id, row_pre, row_post)) | |
open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers)) | |
open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys)) | |
data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall']) | |
data_formatted = [','.join(row) for row in data_formatted] | |
open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment