public
Created

First draft of an ipython notebook html exporter for blogger

  • Download Gist
nb2html.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
#!/usr/bin/env python
"""A really simple notebook to rst/html exporter.
 
Usage
 
./nb2html.py file.ipynb
 
Produces 'file.rst' and 'file.html', along with auto-generated figure files
called nb_figure_NN.png.
 
NOTE: this *needs* docutils 0.9 for the code directive, which you'll
need to download and install yourself. docutils 0.8.x (the most recent one as of Jan/2012) is not sufficient.
"""
 
import os
import subprocess
import sys
 
from IPython.nbformat import current as nbformat
from IPython.utils.text import wrap_paragraphs, indent
 
 
# Cell converters
 
def markdown_cell(cell):
"""convert a markdown cell to rst
 
Returns list."""
return [cell.source]
 
 
def rst_directive(directive, text):
return [directive, '', indent(text), '']
 
def code_cell(cell):
"""Convert a code cell to rst
 
Returns list."""
 
if not cell.input:
return []
 
lines = ['In[%s]:' % cell.prompt_number, '']
lines.extend(rst_directive('.. code:: python', cell.input))
for output in cell.outputs:
conv = converters[output.output_type]
lines.extend(conv(output))
 
return lines
 
# Converters for parts of a cell.
figures_counter = 1
 
def out_display(output):
"""convert display data from the output of a code cell to rst.
 
Returns list.
"""
global figures_counter
 
lines = []
 
if 'png' in output:
fname = 'nb_figure_%s.png' % figures_counter
with open(fname, 'w') as f:
f.write(output.png.decode('base64'))
 
figures_counter += 1
lines.append('.. image:: %s' % fname)
lines.append('')
return lines
 
def out_pyout(output):
"""convert pyout part of a code cell to rst
 
Returns list."""
 
lines = ['Out[%s]:' % output.prompt_number, '']
if 'latex' in output:
lines.extend(rst_directive('.. math::', output.latex))
 
if 'text' in output:
lines.extend(rst_directive('.. parsed-literal::', output.text))
 
return lines
 
 
converters = dict(code = code_cell,
markdown = markdown_cell,
pyout = out_pyout,
display_data = out_display,
)
 
def convert_notebook(nb):
lines = []
for cell in nb.worksheets[0].cells:
conv = converters[cell.cell_type]
lines.extend(conv(cell))
lines.append('')
return '\n'.join(lines)
 
 
def nb2rst(fname):
"Convert notebook to rst"
with open(fname) as f:
nb = nbformat.read(f, 'json')
 
rst = convert_notebook(nb)
 
newfname = os.path.splitext(fname)[0] + '.rst'
with open(newfname, 'w') as f:
f.write(rst.encode('utf8'))
 
return newfname
 
 
def rst2simplehtml(fname):
"""Convert a rst file to simplified html suitable for blogger.
 
This just runs rst2html with certain parameters to produce really simple
html and strips the document header, so the resulting file can be easily
pasted into a blogger edit window.
"""
 
# This is the template for the rst2html call that produces the cleanest,
# simplest html I could find. This should help in making it easier to
# paste into the blogspot html window, though I'm still having problems
# with linebreaks there...
cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
"--no-generator --no-datestamp --no-source-link "
"--no-toc-backlinks --no-section-numbering "
"--strip-comments ")
 
cmd = "%s %s" % (cmd_template, fname)
proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
html, stderr = proc.communicate()
if stderr:
raise IOError(stderr)
 
# Make an iterator so breaking out holds state. Our implementation of
# searching for the html body below is basically a trivial little state
# machine, so we need this.
walker = iter(html.splitlines())
 
# Find start of main text, break out to then print until we find end /div.
# This may only work if there's a real title defined so we get a 'div class'
# tag, I haven't really tried.
for line in walker:
if line.startswith('<div class'):
break
 
newfname = os.path.splitext(fname)[0] + '.html'
with open(newfname, 'w') as f:
for line in walker:
if line.startswith('</div>'):
break
f.write(line)
f.write('\n')
return newfname
 
 
def main(fname):
"""Convert a notebook to html in one step"""
rst2simplehtml(nb2rst(fname))
 
 
if __name__ == '__main__':
main(sys.argv[1])

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.