Skip to content

Instantly share code, notes, and snippets.

@ant1441
Forked from jonathan-taylor/execute_and_save.py
Last active January 3, 2016 06:19
Show Gist options
  • Save ant1441/8421487 to your computer and use it in GitHub Desktop.
Save ant1441/8421487 to your computer and use it in GitHub Desktop.
Python script to load, execute and save an IPython notebook. Handles slow running cells, with a defined time out. Developed under IPython version 1.1.0.
"""
simple example script for running notebooks and saving the resulting notebook.
Usage: `execute_and_save.py foo.ipynb [bar.ipynb [...]]`
Each cell is submitted to the kernel, and the outputs are overwritten and
stored in new notebooks foo_executed.ipynb, etc.
See IPython messaging protocol
http://ipython.org/ipython-doc/stable/development/messaging.html
"""
from __future__ import print_function
import logging
import sys
from Queue import Empty
try:
from IPython.kernel import KernelManager
except ImportError:
logging.exception("Could not import KernelManager")
from IPython.zmq.blockingkernelmanager import (
BlockingKernelManager as KernelManager)
from IPython.nbformat.current import NotebookNode
from IPython.nbformat import current as nbformat
EXECUTION_TIMEOUT = 30
def run_cell(kc, cell):
shell = kc.shell_channel
iopub = kc.iopub_channel
# execute the cell
shell.execute(cell.input)
logging.debug("Submited cell for execution")
outs = []
while True:
try:
# Get output messages
msg = iopub.get_msg(timeout=EXECUTION_TIMEOUT)
except Empty:
print("Execution Timeout", file=sys.stderr)
sys.exit(5)
msg_type = msg['msg_type']
if msg_type == 'status':
# Status messages mark the start and stop of a cells execution
execution_state = msg['content']['execution_state']
logging.debug("status message received - %s", execution_state)
if execution_state == 'busy':
# We have started executing this cell
continue
elif execution_state == 'idle':
# This cell has finished executing
break
else:
print("Unknown status: %s" % (execution_state,))
elif msg_type == 'clear_output':
logging.debug('Clearing output')
outs = []
continue
content = msg['content']
if msg_type == 'pyin':
logging.debug('pyin mesage received')
if cell.input != content['code']:
print("Received message for another cell!", file=sys.stderr)
sys.exit(3)
continue
out = NotebookNode(output_type=msg_type)
if msg_type == 'stream':
logging.debug("Stream message received")
out.stream = content['name']
out.text = content['data']
elif msg_type in ('display_data', 'pyout'):
logging.debug("Display message received")
for mime, data in content['data'].iteritems():
attr = mime.split('/')[-1].lower()
# this gets most right, but fix svg+html, plain
attr = attr.replace('+xml', '').replace('plain', 'text')
setattr(out, attr, data)
out.metadata = content['metadata']
if msg_type == 'pyout':
out.prompt_number = content['execution_count']
elif msg_type == 'pyerr':
out.ename = content['ename']
out.evalue = content['evalue']
out.traceback = content['traceback']
else:
print("unhandled iopub msg: " + msg_type)
outs.append(out)
logging.info("Completed executing cell")
return outs
def execute_notebook(nb):
km = KernelManager()
# Creating the kernel with pylab for graphs
km.start_kernel(extra_arguments=['--pylab=inline'])
logging.info("Created IPython kernel")
try:
kc = km.client()
except AttributeError:
# 0.13
kc = km
kc.start_channels()
# Shell handles requests for code execution
# iopub is the 'broadcast channel' stdout, stderr, etc.
shell = kc.shell_channel
iopub = kc.iopub_channel
# Send a test command to check the communications
shell.execute("pass")
shell_m = shell.get_msg(timeout=0.5)
if shell_m['content']['status'] != u'ok':
logging.error("Error when testing IPython kernel communication")
sys.exit(2)
while iopub.msg_ready():
# Catch any IPython startup output
iom = iopub.get_msg(timeout=0.5)
if iom['msg_type'] == 'stream':
print(iom['content']['data'])
logging.info("Sucessfully pinged IPython kernel")
errors = 0
prompt_number = 1
for ws in nb.worksheets:
logging.info("Parsing %s cells", len(ws.cells))
for cell in ws.cells:
if cell.cell_type != 'code':
continue
cell.prompt_number = prompt_number
try:
outs = run_cell(kc, cell)
except Exception:
logging.exception("Failed to run cell with input %s",
cell.input)
errors += 1
continue
cell.outputs = outs
prompt_number += 1
logging.info("Executed notebook %s", f.name)
print("Executed notebook %s" % (f.name,))
# Shutdown the kernel
km.shutdown_kernel()
logging.info("Shutdown IPython kernel")
return nb
def execute_and_save(ipynb, save_file):
nb = nbformat.read(f, 'json')
logging.info("Reading file %s", f.name)
execd_nb = execute_notebook(nb)
nbformat.write(execd_nb, wf, 'json')
print("Written exectued notebook to %s" % (wf.name,))
if __name__ == '__main__':
for ipynb in sys.argv[1:]:
print("Executing %s" % ipynb)
with open(ipynb, 'rw') as f:
execute_and_save(f, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment