public
Last active

A script to load an .ipynb file, execute all cells in order and output the resulting notebook. Depends on https://github.com/jonathan-taylor/nbconvert/tree/json2json

  • Download Gist
execute_and_save.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
"""
simple example script for running notebooks and saving the resulting notebook.
 
Usage: `execute_and_save.py foo.ipynb [bar.ipynb [...]]`
 
Each cell is submitted to the kernel, and the outputs are overwritten and
stored in new notebooks foo_executed.ipynb, etc.
"""
 
import os,sys,time
import base64
import re
 
from collections import defaultdict
from Queue import Empty
 
from IPython.zmq.blockingkernelmanager import BlockingKernelManager
from IPython.nbformat.current import reads, NotebookNode
 
from nbconvert import ConverterNotebook
 
def compare_png(a64, b64):
"""compare two b64 PNGs (incomplete)"""
try:
import Image
except ImportError:
pass
adata = base64.decodestring(a64)
bdata = base64.decodestring(b64)
return True
 
def sanitize(s):
"""sanitize a string for comparison.
fix universal newlines, strip trailing newlines, and normalize likely random values (memory addresses and UUIDs)
"""
# normalize newline:
s = s.replace('\r\n', '\n')
# ignore trailing newlines (but not space)
s = s.rstrip('\n')
# normalize hex addresses:
s = re.sub(r'0x[a-f0-9]+', '0xFFFFFFFF', s)
 
 
# normalize UUIDs:
s = re.sub(r'[a-f0-9]{8}(\-[a-f0-9]{4}){3}\-[a-f0-9]{12}', 'U-U-I-D', s)
return s
 
 
def compare_outputs(test, ref, skip_compare=('png', 'traceback', 'latex', 'prompt_number')):
for key in ref:
if key not in test:
print "missing key: %s != %s" % (test.keys(), ref.keys())
return False
elif key not in skip_compare and sanitize(test[key]) != sanitize(ref[key]):
print "mismatch %s:" % key
print test[key]
print ' != '
print ref[key]
return False
return True
 
 
def run_cell(km, cell):
shell = km.shell_channel
iopub = km.sub_channel
# print "\n\ntesting:"
# print cell.input
shell.execute(cell.input)
# wait for finish, maximum 20s
shell.get_msg(timeout=20)
outs = []
 
while True:
try:
msg = iopub.get_msg(timeout=0.2)
except Empty:
break
msg_type = msg['msg_type']
if msg_type in ('status', 'pyin'):
continue
elif msg_type == 'clear_output':
outs = []
continue
content = msg['content']
# print msg_type, content
out = NotebookNode(output_type=msg_type)
if msg_type == 'stream':
out.stream = content['name']
out.text = content['data']
elif msg_type in ('display_data', 'pyout'):
for mime, data in content['data'].iteritems():
attr = mime.split('/')[-1].lower()
# this gets most right, but fix svg+html, plain
attr = attr.replace('+xml', '').replace('plain', 'text')
setattr(out, attr, data)
if msg_type == 'pyout':
out.prompt_number = content['execution_count']
elif msg_type == 'pyerr':
out.ename = content['ename']
out.evalue = content['evalue']
out.traceback = content['traceback']
else:
print "unhandled iopub msg:", msg_type
outs.append(out)
return outs
 
def execute_notebook(nb):
km = BlockingKernelManager()
km.start_kernel(extra_arguments=['--pylab=inline'], stderr=open(os.devnull, 'w'))
km.start_channels()
# run %pylab inline, because some notebooks assume this
# even though they shouldn't
km.shell_channel.execute("pass")
km.shell_channel.get_msg()
while True:
try:
km.sub_channel.get_msg(timeout=1)
except Empty:
break
successes = 0
failures = 0
errors = 0
prompt_number = 1
for ws in nb.worksheets:
for cell in ws.cells:
cell.prompt_number = prompt_number
if cell.cell_type != 'code':
continue
try:
outs = run_cell(km, cell)
except Exception as e:
print "failed to run cell:", repr(e)
print cell.input
errors += 1
continue
cell.outputs = outs
prompt_number += 1
km.shutdown_kernel()
del km
 
def execute_and_save(ipynb):
converter = ConverterNotebook(ipynb, os.path.splitext(ipynb)[0] + '_executed')
converter.read()
execute_notebook(converter.nb)
converter.render()
 
 
if __name__ == '__main__':
for ipynb in sys.argv[1:]:
print "executing %s" % ipynb
execute_and_save(ipynb)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.