Created
October 24, 2016 16:20
-
-
Save jvanasco/5738702704cc556c47c9478691c93828 to your computer and use it in GitHub Desktop.
Use the `import_logger.py` to "decorate" cPython's `import` statement, and log the actual memory grown to a file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if True: | |
print("===> installing import_logger_orverride") | |
import os | |
import psutil | |
import pdb | |
import pprint | |
import __builtin__ | |
import logging | |
import sys | |
# setup the memory vars | |
_this_process = psutil.Process(os.getpid()) | |
_f_get_memory_info = _this_process.get_memory_info # call is different on other versions of psutil | |
GET_MEMORY = lambda: _f_get_memory_info()[0] | |
# set up the dirs | |
# we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc | |
REPORTS_DIR_BASE = os.path.join("imports_parser", "runs") | |
if not os.path.exists(REPORTS_DIR_BASE): | |
os.makedirs(REPORTS_DIR_BASE) | |
dirs = [i for i in os.listdir(REPORTS_DIR_BASE) | |
if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i)) | |
] | |
max_dirs = len(dirs) | |
REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs) | |
print("===- Logging to %s" % REPORTS_DIR_RUN) | |
os.makedirs(REPORTS_DIR_RUN) | |
writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a') | |
writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a') | |
# we need this still | |
realimport = __builtin__.__import__ | |
# our override | |
def import_logger_orverride(name, *args, **kwargs): | |
_mem_start = GET_MEMORY() | |
_package_name = name | |
if len(args) == 4: | |
_package_name = "%s.%s" % (name, | |
str(args[2]).replace(',', '|')) | |
_frame = sys._getframe(1) | |
try: | |
_caller_file = _frame.f_locals['__file__'] | |
except: | |
_caller_file = "<>" | |
try: | |
_imported = realimport(name, *args, **kwargs) | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_success.write(_line) | |
return _imported | |
except Exception as e: | |
if isinstance(e, ImportError) and e.message.startswith("No module named"): | |
_mem_finish = GET_MEMORY() | |
_mem_growth = _mem_finish - _mem_start | |
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish) | |
writer_error.write(_line) | |
raise | |
finally: | |
del _caller_file | |
del _frame | |
# install the override | |
__builtin__.__import__ = import_logger_orverride | |
print("<=== import_logger_orverride installed") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pprint | |
# manually change this, because I'm lazy. | |
versions_dir = "runs/001/" | |
# written by `import_logger.py` | |
fname_imports = "imports.txt" | |
fname_errors = "errors.txt" | |
raw_data = open(os.path.join(versions_dir, fname_imports)).readlines() | |
# first pass, to python | |
data_formatted = [] | |
for (idx, row) in enumerate(raw_data): | |
# input is """import|{imported}{caller},{growth},{pre},{post}""" | |
if not row.startswith('import|'): | |
continue | |
row = row[7:] | |
vs = [c.strip() for c in row.split(',')] | |
# output is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
vs.insert(0, str(idx)) | |
data_formatted.append(vs) | |
# second pass, calculate the max | |
maxxed = float(data_formatted[-1][4]) | |
for row in data_formatted: | |
# input is """{idx},{imported},{caller},{growth},{pre},{post}""" | |
# output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}""" | |
as_percent_growth = '0' | |
_growth = float(row[3]) if row[3] else 0 | |
if _growth: | |
as_percent_growth = str((_growth / maxxed)*100) | |
row.append(as_percent_growth) | |
as_percent_overall = str((float(row[5]) / maxxed)*100) | |
row.append(as_percent_overall) | |
# okay now let's try and figure out the level | |
current_max_mem = 0 | |
current_recursion = 0 | |
seen = {} | |
bypre = {} | |
bypost = {} | |
callers = {} | |
bys = {} | |
for row in data_formatted: | |
row_id = int(row[0]) | |
row_name = row[1] | |
row_caller = row[2] | |
row_growth = int(row[3]) if row[3] else 0 | |
row_pre = int(row[4]) | |
row_post = int(row[5]) | |
if row_caller not in callers: | |
callers[row_caller] = set({}) | |
callers[row_caller].add(row_name) | |
if row_name not in bys: | |
bys[row_name] = set({}) | |
bys[row_name].add(row_caller) | |
if row_pre not in bypre: | |
bypre[row_pre] = [] | |
bypre[row_pre].append((row_id, row_pre, row_post)) | |
if row_post not in bypost: | |
bypost[row_post] = [] | |
bypost[row_post].append((row_id, row_pre, row_post)) | |
open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers)) | |
open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys)) | |
data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall']) | |
data_formatted = [','.join(row) for row in data_formatted] | |
open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment