Skip to content

Instantly share code, notes, and snippets.

@jvanasco
Created October 24, 2016 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvanasco/5738702704cc556c47c9478691c93828 to your computer and use it in GitHub Desktop.
Save jvanasco/5738702704cc556c47c9478691c93828 to your computer and use it in GitHub Desktop.
Use the `import_logger.py` to "decorate" cPython's `import` statement, and log the actual memory grown to a file.
if True:
print("===> installing import_logger_orverride")
import os
import psutil
import pdb
import pprint
import __builtin__
import logging
import sys
# setup the memory vars
_this_process = psutil.Process(os.getpid())
_f_get_memory_info = _this_process.get_memory_info # call is different on other versions of psutil
GET_MEMORY = lambda: _f_get_memory_info()[0]
# set up the dirs
# we'll lot go `{CWD}/imports_parser/runs/{VERSION}` in which `VERSION` is 001, 002, etc
REPORTS_DIR_BASE = os.path.join("imports_parser", "runs")
if not os.path.exists(REPORTS_DIR_BASE):
os.makedirs(REPORTS_DIR_BASE)
dirs = [i for i in os.listdir(REPORTS_DIR_BASE)
if os.path.isdir(os.path.join(REPORTS_DIR_BASE, i))
]
max_dirs = len(dirs)
REPORTS_DIR_RUN = os.path.join(REPORTS_DIR_BASE, "%03d" % max_dirs)
print("===- Logging to %s" % REPORTS_DIR_RUN)
os.makedirs(REPORTS_DIR_RUN)
writer_success = open(os.path.join(REPORTS_DIR_RUN, 'imports.txt'), 'a')
writer_error = open(os.path.join(REPORTS_DIR_RUN, 'errors.txt'), 'a')
# we need this still
realimport = __builtin__.__import__
# our override
def import_logger_orverride(name, *args, **kwargs):
_mem_start = GET_MEMORY()
_package_name = name
if len(args) == 4:
_package_name = "%s.%s" % (name,
str(args[2]).replace(',', '|'))
_frame = sys._getframe(1)
try:
_caller_file = _frame.f_locals['__file__']
except:
_caller_file = "<>"
try:
_imported = realimport(name, *args, **kwargs)
_mem_finish = GET_MEMORY()
_mem_growth = _mem_finish - _mem_start
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
writer_success.write(_line)
return _imported
except Exception as e:
if isinstance(e, ImportError) and e.message.startswith("No module named"):
_mem_finish = GET_MEMORY()
_mem_growth = _mem_finish - _mem_start
_line = "import|%s,%s,%s,%s,%s\n" % (_package_name, _caller_file, _mem_growth, _mem_start, _mem_finish)
writer_error.write(_line)
raise
finally:
del _caller_file
del _frame
# install the override
__builtin__.__import__ = import_logger_orverride
print("<=== import_logger_orverride installed")
import os
import pprint
# manually change this, because I'm lazy.
versions_dir = "runs/001/"
# written by `import_logger.py`
fname_imports = "imports.txt"
fname_errors = "errors.txt"
raw_data = open(os.path.join(versions_dir, fname_imports)).readlines()
# first pass, to python
data_formatted = []
for (idx, row) in enumerate(raw_data):
# input is """import|{imported}{caller},{growth},{pre},{post}"""
if not row.startswith('import|'):
continue
row = row[7:]
vs = [c.strip() for c in row.split(',')]
# output is """{idx},{imported},{caller},{growth},{pre},{post}"""
vs.insert(0, str(idx))
data_formatted.append(vs)
# second pass, calculate the max
maxxed = float(data_formatted[-1][4])
for row in data_formatted:
# input is """{idx},{imported},{caller},{growth},{pre},{post}"""
# output is """{idx},{imported},{caller},{growth},{pre},{post},{pct_growth},{pct_overall}"""
as_percent_growth = '0'
_growth = float(row[3]) if row[3] else 0
if _growth:
as_percent_growth = str((_growth / maxxed)*100)
row.append(as_percent_growth)
as_percent_overall = str((float(row[5]) / maxxed)*100)
row.append(as_percent_overall)
# okay now let's try and figure out the level
current_max_mem = 0
current_recursion = 0
seen = {}
bypre = {}
bypost = {}
callers = {}
bys = {}
for row in data_formatted:
row_id = int(row[0])
row_name = row[1]
row_caller = row[2]
row_growth = int(row[3]) if row[3] else 0
row_pre = int(row[4])
row_post = int(row[5])
if row_caller not in callers:
callers[row_caller] = set({})
callers[row_caller].add(row_name)
if row_name not in bys:
bys[row_name] = set({})
bys[row_name].add(row_caller)
if row_pre not in bypre:
bypre[row_pre] = []
bypre[row_pre].append((row_id, row_pre, row_post))
if row_post not in bypost:
bypost[row_post] = []
bypost[row_post].append((row_id, row_pre, row_post))
open('%s/callers.txt' % versions_dir, 'w').write(pprint.pformat(callers))
open('%s/bys.txt' % versions_dir, 'w').write(pprint.pformat(bys))
data_formatted.insert(0, ['idx', 'imported', 'caller', 'growth', 'pre', 'post', 'pct-growth', 'pct-overall'])
data_formatted = [','.join(row) for row in data_formatted]
open('%s/imports-processed.csv' % versions_dir, 'w').write('\n'.join(data_formatted))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment