Skip to content

Instantly share code, notes, and snippets.

@ericjang
Created July 27, 2020 16:25
Show Gist options
  • Save ericjang/8f8f9aaec79e24c3915d98249ba6ae3b to your computer and use it in GitHub Desktop.
Save ericjang/8f8f9aaec79e24c3915d98249ba6ae3b to your computer and use it in GitHub Desktop.
"""
A lightweight experiment logbook for Jupyter/Colab-style ad hoc experiments.
Let's say you generate a plot with Matplotlib and want to re-run your notebook with a
different set of configurations and then compare the resulting plot to the one you saved (to see
if the new configuration is better).
# Saving experiments
f = plt.gcf()
elog.savefig(f,
exp='mymodel_accuracy_moredata',
train_set='1997-2015',
eval_set='2016-2017',
outcome='unknown',
description='Trained on 2011-2013, eval on 2010.')
# retrieving experiments
f = elog.getfig('mymodel_accuracy_moredata')
# Rendering inline table (with inline figure images) in Jupyter/Colab.
elog.html(elog._df)
"""
import os
import pickle
import datetime
import pandas as pd
import glob
from typing import Text
from IPython.core.display import HTML
# Root dir contains a set of projects. Root dir must be on same path as
# jupyter server or else HTML wont render correctly.
_root_dir ='experiment_logs'
_df = pd.DataFrame()
_default_values = {'root_dir': _root_dir}
# Reload the default dataframe.
_df_path = os.path.join(_default_values['root_dir'], 'experiment_df')
if os.path.exists(_df_path):
_df = pd.read_pickle(_df_path)
def set_project(project_name: Text):
_default_values['project_name'] = project_name
def savefig(fig, exp: Text, **kwargs):
# Saves thumbnail and figure to folder, and metadata to DataFrame.
global _df
d = _default_values
d.update(kwargs)
date = datetime.date.today().strftime('%Y%m%d')
dirname = os.path.join(d['root_dir'], d['project_name'], date)
os.makedirs(dirname, exist_ok=True)
# pkl already exists, timestamp a new one.
exp += '_%d' % int(datetime.datetime.timestamp(datetime.datetime.now()))
path = os.path.join(dirname, exp + '.pkl')
# Save figure and data.
pickle.dump(fig, open(path, 'wb'))
# Save rasterized image for convenient inline display.
image_path = os.path.join(dirname, exp + '.png')
fig.savefig(image_path, bbox_inches='tight')
# Update with metadata.
d['timestamp'] = [datetime.datetime.utcnow()]
d['image'] = image_path
row = pd.DataFrame(d)
# Reload pd from root dir.
df_path = os.path.join(_default_values['root_dir'], 'experiment_df')
if os.path.exists(df_path):
_df = pd.read_pickle(df_path)
_df = _df.append(row)
_df.to_pickle(df_path)
def undo(n=1):
# Removes the last experiment.
global _df
_df = _df.head(-n)
df_path = os.path.join(_default_values['root_dir'], 'experiment_df')
_df.to_pickle(df_path)
def path_to_image_html(path):
# Convert path to relative path.
relpath = os.path.relpath(path)
if relpath.startswith('..'):
print('Warning: Jupyter server needs to be started in parent to the elog root dir.')
return '<img src="'+ relpath + '" width="200" >'
def html(df):
# Renders the HTML for quickly viewing experiments.
return HTML(df.to_html(escape=False ,formatters=dict(image=path_to_image_html)))
def getfig(f, exp: Text, **kwargs):
# Retrieve a fig from storage.
d = _default_values
d.update(kwargs)
project_dir = os.path.join(d['root_dir'], d['project_name'])
if not os.path.exists(project_dir):
print('Project %s does not exist' % d['project_name'])
# Scan across all dates to find the experiment.
pattern = os.path.join(project_dir, '*', exp + '.pkl')
matches = sorted(glob.glob(pattern))
if len(matches) == 1:
path = matches[-1]
print(path)
if d.get('latest') and matches:
path = matches[-1]
print(path)
elif len(matches) > 1:
print('Multiple matching experiments. Specify date= attribute.')
for p in matches:
print(matches)
return
else:
print('Could not find project.')
return
return pickle.load(open(path, 'rb'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment