Skip to content

Instantly share code, notes, and snippets.

@julian-carpenter
Forked from wchargin/extract_scalars.py
Last active February 6, 2018 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save julian-carpenter/425179e8b573e6a248036d7208f57f7e to your computer and use it in GitHub Desktop.
Save julian-carpenter/425179e8b573e6a248036d7208f57f7e to your computer and use it in GitHub Desktop.
Extract scalars to pandas CSV using the TensorBoard event multiplexer API
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import glob
import re
import pandas as pd
import tensorflow as tf
from tensorboard.backend.event_processing import plugin_event_multiplexer as event_multiplexer # noqa
# Control downsampling: how many scalar data do we keep for each run/tag
# combination?
SIZE_GUIDANCE = {'scalars': 2000}
def extract_scalars(multiplexer, run, tag):
'''
Extract tabular data from the scalars at a given run and tag.
The result is a list of 3-tuples (wall_time, step, value).
'''
tensor_events = multiplexer.Tensors(run, tag)
return [(event.step, tf.make_ndarray(event.tensor_proto).item())
for event in tensor_events]
def create_multiplexer(logdir):
multiplexer = event_multiplexer.EventMultiplexer(
tensor_size_guidance=SIZE_GUIDANCE)
multiplexer.AddRunsFromDirectory(logdir)
multiplexer.Reload()
return multiplexer
NON_ALPHABETIC = re.compile('[^A-Za-z0-9_]')
def munge_filename(name):
'''Remove characters that might not be safe in a filename.'''
return NON_ALPHABETIC.sub('_', name)
def main():
tag_names = ['accuracy', 'precision']
logdir = 'logs'
output_dir = 'logs/results'
run_names = glob.glob(os.path.join(logdir, '*', '*eval*'), recursive=True)
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
print('Loading data...')
multiplexer = create_multiplexer(logdir)
data_frame = pd.DataFrame(columns=tag_names + ['run'])
# indexes = []
dict_data = {}
for run_name in run_names:
for tag_name in tag_names:
try:
all__ = extract_scalars(multiplexer,
run_name[len(logdir) + 1:], tag_name)
indexes = [x[0] for x in all__]
dict_data[tag_name] = [x[1] for x in all__]
dict_data['run'] = run_name[len(logdir) + 1:-5]
# dict_data['indexes'] = [x[0] for x in all__]
except KeyError:
pass
tmp_frame = pd.DataFrame(dict_data, index=indexes)
data_frame = pd.concat((data_frame, tmp_frame), axis=0, join='outer')
data_frame.to_csv(os.path.join(output_dir, 'scalars.csv'))
print('Done.')
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.ERROR)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment