Skip to content

Instantly share code, notes, and snippets.

@lucastheis
Last active December 17, 2015 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lucastheis/5571182 to your computer and use it in GitHub Desktop.
Save lucastheis/5571182 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Manage and display experimental results.
"""
__license__ = 'MIT License <http://www.opensource.org/licenses/mit-license.php>'
__author__ = 'Lucas Theis <lucas@theis.io>'
__docformat__ = 'epytext'
__version__ = '0.4.3'
import sys
import os
import numpy
import scipy
import socket
sys.path.append('./code')
from argparse import ArgumentParser
from pickle import Unpickler, dump
from subprocess import Popen, PIPE
from os import path
from warnings import warn
from time import time, strftime, localtime
from numpy import random, ceil, argsort
from numpy.random import rand, randint
from distutils.version import StrictVersion
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from httplib import HTTPConnection
from getopt import getopt
from StringIO import StringIO
class Experiment:
"""
@type time: float
@ivar time: time at initialization of experiment
@type duration: float
@ivar duration: time in seconds between initialization and saving
@type script: string
@ivar script: stores the content of the main Python script
@type stdout: StringIO
@ivar stdout: keeps a copy of everything printed to stdout during experiment
@type stderr: StringIO
@ivar stderr: keeps a copy of everything printed to stderr during experiment
@type platform: string
@ivar platform: information about operating system
@type processors: string
@ivar processors: some information about the processors
@type environ: string
@ivar environ: environment variables at point of initialization
@type hostname: string
@ivar hostname: hostname of server running the experiment
@type cwd: string
@ivar cwd: working directory at execution time
@type comment: string
@ivar comment: a comment describing the experiment
@type results: dictionary
@ivar results: container to store experimental results
@type commit: string
@ivar commit: git commit hash
@type modified: boolean
@ivar modified: indicates uncommited changes
@type filename: string
@ivar filename: path to stored results
@type seed: int
@ivar seed: random seed used through the experiment
@type versions: dictionary
@ivar versions: versions of Python, numpy and scipy
"""
def __str__(self):
"""
Summarize information about the experiment.
@rtype: string
@return: summary of the experiment
"""
strl = []
# date and duration of experiment
strl.append(strftime('date \t\t %a, %d %b %Y %H:%M:%S', localtime(self.time)))
strl.append('duration \t ' + str(int(self.duration)) + 's')
strl.append('hostname \t ' + self.hostname)
# commit hash
if self.commit:
if self.modified:
strl.append('commit \t\t ' + self.commit + ' (modified)')
else:
strl.append('commit \t\t ' + self.commit)
# results
strl.append('results \t {' + ', '.join(map(str, self.results.keys())) + '}')
# comment
if self.comment:
strl.append('\n' + self.comment)
return '\n'.join(strl)
def __del__(self):
self.status(None)
def __init__(self, filename='', comment='', seed=None, server=None, port=8000):
"""
If the filename is given and points to an existing experiment, load it.
Otherwise store the current timestamp and try to get commit information
from the repository in the current directory.
@type filename: string
@param filename: path to where the experiment will be stored
@type comment: string
@param comment: a comment describing the experiment
@type seed: integer
@param seed: random seed used in the experiment
"""
self.id = 0
self.time = time()
self.comment = comment
self.filename = filename
self.results = {}
self.seed = seed
self.script = ''
self.cwd = ''
self.platform = ''
self.processors = ''
self.environ = ''
self.duration = 0
self.versions = {}
self.server = ''
self.stdout = StringIO()
self.stderr = StringIO()
if self.seed is None:
self.seed = int((time() + 1e6 * rand()) * 1e3)
# set random seed
random.seed(self.seed)
numpy.random.seed(self.seed)
if self.filename:
# load given experiment
self.load()
else:
# makes sure everything printed to console will be stored
sys.stderr = SplitStream(os.fdopen(os.dup(2), 'w'), self.stderr)
sys.stdout = SplitStream(os.fdopen(os.dup(1), 'w'), self.stdout)
# identifies the experiment
self.id = randint(1E8)
# check if a comment was passed via the command line
parser = ArgumentParser(add_help=False)
parser.add_argument('--comment')
optlist, argv = parser.parse_known_args(sys.argv[1:])
optlist = vars(optlist)
# remove comment command line argument from argument list
sys.argv[1:] = argv
# comment given as command line argument
self.comment = optlist.get('comment', '')
# get OS information
self.platform = sys.platform
# arguments to the program
self.argv = sys.argv
self.script_path = sys.argv[0]
try:
with open(sys.argv[0]) as handle:
# store python script
self.script = handle.read()
except:
warn('Unable to read Python script.')
# environment variables
self.environ = os.environ
self.cwd = os.getcwd()
self.hostname = socket.gethostname()
# store some information about the processor(s)
if self.platform == 'linux2':
cmd = 'egrep "processor|model name|cpu MHz|cache size" /proc/cpuinfo'
with os.popen(cmd) as handle:
self.processors = handle.read()
elif self.platform == 'darwin':
cmd = 'system_profiler SPHardwareDataType | egrep "Processor|Cores|L2|Bus"'
with os.popen(cmd) as handle:
self.processors = handle.read()
# version information
self.versions['python'] = sys.version
self.versions['numpy'] = numpy.__version__
self.versions['scipy'] = scipy.__version__
# store information about git repository
if path.isdir('.git'):
# get commit hash
pr1 = Popen(['git', 'log', '-1'], stdout=PIPE)
pr2 = Popen(['head', '-1'], stdin=pr1.stdout, stdout=PIPE)
pr3 = Popen(['cut', '-d', ' ', '-f', '2'], stdin=pr2.stdout, stdout=PIPE)
self.commit = pr3.communicate()[0][:-1]
# check if project contains uncommitted changes
pr1 = Popen(['git', 'status', '--porcelain'], stdout=PIPE)
pr2 = Popen(['egrep', '^.M'], stdin=pr1.stdout, stdout=PIPE)
self.modified = pr2.communicate()[0]
if self.modified:
warn('Uncommitted changes.')
else:
# no git repository
self.commit = None
self.modified = False
# server managing experiments
self.server = server
self.port = port
self.status('running')
def status(self, status, **kwargs):
if self.server:
try:
conn = HTTPConnection(self.server, self.port)
conn.request('GET', '/version/')
resp = conn.getresponse()
if not resp.read().startswith('Experiment'):
raise RuntimeError()
HTTPConnection(self.server, self.port).request('POST', '', str(dict({
'id': self.id,
'version': __version__,
'status': status,
'hostname': self.hostname,
'cwd': self.cwd,
'script_path': self.script_path,
'script': self.script,
'comment': self.comment,
'time': self.time,
}, **kwargs)))
except:
warn('Unable to connect to \'{0}:{1}\'.'.format(self.server, self.port))
def progress(self, progress):
self.status('PROGRESS', progress=progress)
def save(self, filename=None, overwrite=False):
"""
Store results. If a filename is given, the default is overwritten.
@type filename: string
@param filename: path to where the experiment will be stored
@type overwrite: boolean
@param overwrite: overwrite existing files
"""
self.duration = time() - self.time
if filename is None:
filename = self.filename
# replace {0} and {1} by date and time
tmp1 = strftime('%d%m%Y', localtime(time()))
tmp2 = strftime('%H%M%S', localtime(time()))
filename = filename.format(tmp1, tmp2)
# make sure directory exists
try:
os.makedirs(path.dirname(filename))
except OSError:
pass
# make sure filename is unique
counter = 0
pieces = path.splitext(filename)
if not overwrite:
while path.exists(filename):
counter += 1
filename = pieces[0] + '.' + str(counter) + pieces[1]
if counter:
warn(''.join(pieces) + ' already exists. Saving to ' + filename + '.')
# store experiment
with open(filename, 'wb') as handle:
dump({
'version': __version__,
'id': self.id,
'time': self.time,
'seed': self.seed,
'duration': self.duration,
'environ': self.environ,
'hostname': self.hostname,
'cwd': self.cwd,
'argv': self.argv,
'script': self.script,
'script_path': self.script_path,
'processors': self.processors,
'platform': self.platform,
'comment': self.comment,
'commit': self.commit,
'modified': self.modified,
'versions': self.versions,
'results': self.results,
'stdout': self.stdout,
'stderr': self.stderr}, handle, 2)
self.status('SAVE', filename=filename, duration=self.duration)
def load(self, filename=None):
"""
Loads experimental results from the specified file.
@type filename: string
@param filename: path to where the experiment is stored
"""
if filename:
self.filename = filename
with open(self.filename, 'rb') as handle:
res = load(handle)
self.time = res['time']
self.seed = res['seed']
self.duration = res['duration']
self.processors = res['processors']
self.environ = res['environ']
self.platform = res['platform']
self.comment = res['comment']
self.commit = res['commit']
self.modified = res['modified']
self.versions = res['versions']
self.results = res['results']
self.argv = res['argv'] \
if StrictVersion(res['version']) >= '0.3.1' else None
self.script = res['script'] \
if StrictVersion(res['version']) >= '0.4.0' else None
self.script_path = res['script_path'] \
if StrictVersion(res['version']) >= '0.4.0' else None
self.cwd = res['cwd'] \
if StrictVersion(res['version']) >= '0.4.0' else None
self.hostname = res['hostname'] \
if StrictVersion(res['version']) >= '0.4.0' else None
self.id = res['id'] \
if StrictVersion(res['version']) >= '0.4.0' else None
self.stdout = res['stdout'] \
if StrictVersion(res['version']) >= '0.4.3' else None
self.stderr = res['stderr'] \
if StrictVersion(res['version']) >= '0.4.3' else None
def __getitem__(self, key):
return self.results[key]
def __setitem__(self, key, value):
self.results[key] = value
class ExperimentRequestHandler(BaseHTTPRequestHandler):
"""
Renders HTML showing running and finished experiments.
"""
xpck_path = ''
running = {}
finished = {}
def do_GET(self):
"""
Renders HTML displaying running and saved experiments.
"""
# number of bars representing progress
max_bars = 20
if self.path == '/version/':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write('Experiment {0}'.format(__version__))
elif self.path.startswith('/running/'):
id = int([s for s in self.path.split('/') if s != ''][-1])
# display running experiment
if id in ExperimentRequestHandler.running:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(HTML_HEADER)
self.wfile.write('<h2>Experiment</h2>')
instance = ExperimentRequestHandler.running[id]
num_bars = int(instance['progress']) * max_bars / 100
self.wfile.write('<table>')
self.wfile.write('<tr><th>Experiment:</th><td>{0}</td></tr>'.format(
os.path.join(instance['cwd'], instance['script_path'])))
self.wfile.write('<tr><th>Hostname:</th><td>{0}</td></tr>'.format(instance['hostname']))
self.wfile.write('<tr><th>Status:</th><td class="running">{0}</td></tr>'.format(instance['status']))
self.wfile.write('<tr><th>Progress:</th><td class="progress"><span class="bars">{0}</span>{1}</td></tr>'.format(
'|' * num_bars, '|' * (max_bars - num_bars)))
self.wfile.write('<tr><th>Start:</th><td>{0}</td></tr>'.format(
strftime('%a, %d %b %Y %H:%M:%S', localtime(instance['time']))))
self.wfile.write('<tr><th>Comment:</th><td>{0}</td></tr>'.format(
instance['comment'] if instance['comment'] else '-'))
self.wfile.write('</table>')
self.wfile.write('<h2>Script</h2>')
self.wfile.write('<pre>{0}</pre>'.format(instance['script']))
self.wfile.write(HTML_FOOTER)
elif id in ExperimentRequestHandler.finished:
self.send_response(302)
self.send_header('Location', '/finished/{0}/'.format(id))
self.end_headers()
else:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(HTML_HEADER)
self.wfile.write('<h2>404</h2>')
self.wfile.write('Requested experiment not found.')
self.wfile.write(HTML_FOOTER)
elif self.path.startswith('/finished/'):
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(HTML_HEADER)
id = int([s for s in self.path.split('/') if s != ''][-1])
# display finished experiment
if id in ExperimentRequestHandler.finished:
instance = ExperimentRequestHandler.finished[id]
if id in ExperimentRequestHandler.running:
progress = ExperimentRequestHandler.running[id]['progress']
else:
progress = 100
num_bars = int(progress) * max_bars / 100
self.wfile.write('<h2>Experiment</h2>')
self.wfile.write('<table>')
self.wfile.write('<tr><th>Experiment:</th><td>{0}</td></tr>'.format(
os.path.join(instance['cwd'], instance['script_path'])))
self.wfile.write('<tr><th>Results:</th><td>{0}</td></tr>'.format(
os.path.join(instance['cwd'], instance['filename'])))
self.wfile.write('<tr><th>Status:</th><td class="finished">{0}</td></tr>'.format(instance['status']))
self.wfile.write('<tr><th>Progress:</th><td class="progress"><span class="bars">{0}</span>{1}</td></tr>'.format(
'|' * num_bars, '|' * (max_bars - num_bars)))
self.wfile.write('<tr><th>Start:</th><td>{0}</td></tr>'.format(
strftime('%a, %d %b %Y %H:%M:%S', localtime(instance['time']))))
self.wfile.write('<tr><th>End:</th><td>{0}</td></tr>'.format(
strftime('%a, %d %b %Y %H:%M:%S', localtime(instance['duration']))))
self.wfile.write('<tr><th>Comment:</th><td>{0}</td></tr>'.format(
instance['comment'] if instance['comment'] else '-'))
self.wfile.write('</table>')
self.wfile.write('<h2>Results</h2>')
try:
experiment = Experiment(os.path.join(instance['cwd'], instance['filename']))
except:
self.wfile.write('Could not open file.')
else:
self.wfile.write('<table>')
for key, value in experiment.results.items():
self.wfile.write('<tr><th>{0}</th><td>{1}</td></tr>'.format(key, value))
self.wfile.write('</table>')
self.wfile.write('<h2>Script</h2>')
self.wfile.write('<pre>{0}</pre>'.format(instance['script']))
else:
self.wfile.write('<h2>404</h2>')
self.wfile.write('Requested experiment not found.')
self.wfile.write(HTML_FOOTER)
else:
files = []
if 'xpck_path' in ExperimentRequestHandler.__dict__:
if ExperimentRequestHandler.xpck_path != '':
for path in ExperimentRequestHandler.xpck_path.split(':'):
files += [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.xpck')]
if 'XPCK_PATH' in os.environ:
for path in os.environ['XPCK_PATH'].split(':'):
files += [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.xpck')]
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(HTML_HEADER)
self.wfile.write('<h2>Running</h2>')
# display running experiments
if ExperimentRequestHandler.running:
self.wfile.write('<table>')
self.wfile.write('<tr>')
self.wfile.write('<th>Experiment</th>')
self.wfile.write('<th>Hostname</th>')
self.wfile.write('<th>Status</th>')
self.wfile.write('<th>Progress</th>')
self.wfile.write('<th>Start</th>')
self.wfile.write('<th>Comment</th>')
self.wfile.write('</tr>')
# sort ids by start time of experiment
times = [instance['time'] for instance in ExperimentRequestHandler.running.values()]
ids = ExperimentRequestHandler.running.keys()
ids = [ids[i] for i in argsort(times)][::-1]
for id in ids:
instance = ExperimentRequestHandler.running[id]
num_bars = int(instance['progress']) * max_bars / 100
self.wfile.write('<tr>')
self.wfile.write('<td class="filepath"><a href="/running/{1}/">{0}</a></td>'.format(
instance['script_path'], instance['id']))
self.wfile.write('<td>{0}</td>'.format(instance['hostname']))
self.wfile.write('<td class="running">{0}</td>'.format(instance['status']))
self.wfile.write('<td class="progress"><span class="bars">{0}</span>{1}</td>'.format(
'|' * num_bars, '|' * (max_bars - num_bars)))
self.wfile.write('<td>{0}</td>'.format(strftime('%a, %d %b %Y %H:%M:%S',
localtime(instance['time']))))
self.wfile.write('<td class="comment">{0}</td>'.format(
instance['comment'] if instance['comment'] else '-'))
self.wfile.write('</tr>')
self.wfile.write('</table>')
else:
self.wfile.write('No running experiments.')
self.wfile.write('<h2>Saved</h2>')
# display saved experiments
if ExperimentRequestHandler.finished:
self.wfile.write('<table>')
self.wfile.write('<tr>')
self.wfile.write('<th>Results</th>')
self.wfile.write('<th>Status</th>')
self.wfile.write('<th>Progress</th>')
self.wfile.write('<th>Start</th>')
self.wfile.write('<th>End</th>')
self.wfile.write('<th>Comment</th>')
self.wfile.write('</tr>')
# sort ids by start time of experiment
times = [instance['time'] + instance['duration']
for instance in ExperimentRequestHandler.finished.values()]
ids = ExperimentRequestHandler.finished.keys()
ids = [ids[i] for i in argsort(times)][::-1]
for id in ids:
instance = ExperimentRequestHandler.finished[id]
if id in ExperimentRequestHandler.running:
progress = ExperimentRequestHandler.running[id]['progress']
else:
progress = 100
num_bars = int(progress) * max_bars / 100
self.wfile.write('<tr>')
self.wfile.write('<td class="filepath"><a href="/finished/{1}/">{0}</a></td>'.format(
instance['filename'], instance['id']))
self.wfile.write('<td class="finished">saved</td>')
self.wfile.write('<td class="progress"><span class="bars">{0}</span>{1}</td>'.format(
'|' * num_bars, '|' * (max_bars - num_bars)))
self.wfile.write('<td>{0}</td>'.format(strftime('%a, %d %b %Y %H:%M:%S',
localtime(instance['time']))))
self.wfile.write('<td>{0}</td>'.format(strftime('%a, %d %b %Y %H:%M:%S',
localtime(instance['time'] + instance['duration']))))
self.wfile.write('<td class="comment">{0}</td>'.format(
instance['comment'] if instance['comment'] else '-'))
self.wfile.write('</tr>')
self.wfile.write('</table>')
else:
self.wfile.write('No saved experiments.')
self.wfile.write(HTML_FOOTER)
def do_POST(self):
instances = ExperimentRequestHandler.running
instance = eval(self.rfile.read(int(self.headers['Content-Length'])))
if instance['status'] is 'PROGRESS':
if instance['id'] not in instances:
instances[instance['id']] = instance
instances[instance['id']]['status'] = 'running'
instances[instance['id']]['progress'] = instance['progress']
elif instance['status'] is 'SAVE':
ExperimentRequestHandler.finished[instance['id']] = instance
ExperimentRequestHandler.finished[instance['id']]['status'] = 'saved'
else:
if instance['id'] in instances:
progress = instances[instance['id']]['progress']
else:
progress = 0
instances[instance['id']] = instance
instances[instance['id']]['progress'] = progress
if instance['status'] is None:
try:
del instances[instance['id']]
except:
pass
class SplitStream:
"""
Stores the output
"""
def __init__(self, *args):
self.streams = args
def write(self, text):
for stream in self.streams:
stream.write(text)
class XUnpickler(Unpickler):
"""
An extension of the Unpickler class which resolves some backwards
compatibility issues of Numpy.
"""
def find_class(self, module, name):
"""
Helps Unpickler to find certain Numpy modules.
"""
try:
numpy_version = StrictVersion(numpy.__version__)
if numpy_version >= '1.5.0':
if module == 'numpy.core.defmatrix':
module = 'numpy.matrixlib.defmatrix'
except ValueError:
pass
return Unpickler.find_class(self, module, name)
def load(file):
return XUnpickler(file).load()
def main(argv):
"""
Load and display experiment information.
"""
if len(argv) < 2:
print 'Usage:', argv[0], '[--server] [--port=<port>] [--path=<path>] [filename]'
return 0
optlist, argv = getopt(argv[1:], '', ['server', 'port=', 'path='])
optlist = dict(optlist)
if '--server' in optlist:
try:
ExperimentRequestHandler.xpck_path = optlist.get('--path', '')
port = optlist.get('--port', 8000)
# start server
server = HTTPServer(('', port), ExperimentRequestHandler)
server.serve_forever()
except KeyboardInterrupt:
server.socket.close()
return 0
# load experiment
experiment = Experiment(sys.argv[1])
if len(argv) > 1:
# print arguments
for arg in argv[1:]:
try:
print experiment[arg]
except:
print experiment[int(arg)]
return 0
# print summary of experiment
print experiment
return 0
HTML_HEADER = '''<html>
<head>
<title>Experiments</title>
<style type="text/css">
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 11pt;
color: black;
background: white;
padding: 0pt 20pt;
}
h2 {
margin-top: 20pt;
font-size: 16pt;
}
table {
border-collapse: collapse;
}
tr:nth-child(even) {
background: #f4f4f4;
}
th {
font-size: 12pt;
text-align: left;
padding: 2pt 10pt 3pt 0pt;
}
td {
font-size: 10pt;
padding: 3pt 10pt 2pt 0pt;
}
pre {
font-size: 10pt;
background: #f4f4f4;
padding: 5pt;
}
a {
text-decoration: none;
color: #04a;
}
.running {
color: #08b;
}
.finished {
color: #390;
}
.comment {
min-width: 200pt;
font-style: italic;
}
.progress {
color: #ccc;
}
.progress .bars {
color: black;
}
</style>
</head>
<body>'''
HTML_FOOTER = '''
</body>
</html>'''
if __name__ == '__main__':
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment