Created September 11, 2014 05:58
Convert IPython notebooks to Jekyll compatiblem markdown.
import json #for reading .ipynb
import sys #python version for proper unicode support
import os #directory and path operations
import errno #safe directory creation
import argparse #command line args
import re #latex to liquid
parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.')
parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file')
parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.')
inputs = parser.parse_args()
imgBaseDir = 'images' #directory containing images (relative to base Jekyll directory)
autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts)
#figure out directory names and create autogen directories if necessary
fDir, fName = os.path.split(inputs.filename[0])
fName = fName.replace('.ipynb', '')
fullPath = os.path.dirname(os.path.realpath(__file__))
jekyllDir = os.path.split(fullPath)[0]
imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName)
mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md')
def makedirSafe(dirPath):
except OSError as exception:
if exception.errno != errno.EEXIST:
makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed
#output file already exists
if(os.path.exists(mdFName) and not inputs.f):
raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.')
#for Unicode support across python verions
if sys.version_info[0] < 3:
import io
_open_func_bak = open # Make a back up, just in case
open =
#expressions to convert to Liquid math tags
displayRe = re.compile('([^\$]*)\$\$([^\$]+)\$\$([^\$]*)')
inlineRe = re.compile('([^\$]*)\$([^\$]+)\$([^\$]*)')
#Code cells have inputs and outputs
def processCodeCell(codeCell):
result = {'text': '', 'images': []}
if(codeCell['input'] != []):
result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language'])
for line in codeCell['input']:
result['text'] += line
result['text'] += '\n{% endhighlight %}'
result['text'] += '\n'
if(codeCell['outputs'] != []):
for out in codeCell['outputs']:
if(out['output_type'] == 'pyout'):
if(type(out) is dict and out.has_key('latex')):
for line in out['latex']:
line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
result['text'] += line
for line in out['text']:
result['text'] += line
if(out['output_type'] == 'display_data'):
result['images'].append({'format': 'svg', 'imgData': out['svg']})
return result
#Markdown cells only have md text
def processMarkdownCell(mdCell):
result = ''
for line in mdCell['source']:
line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
result += line
return result
with open(inputs.filename[0], 'r') as f:
rd = json.load(f)
#only need to do one worksheet for now.
wb = rd['worksheets'][0]
out = []
nImages = 0
imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '')
for cell in wb['cells']:
if(cell['cell_type'] == 'code'):
codeCellOut = processCodeCell(cell)
cellText = codeCellOut['text']
if(codeCellOut['images'] != []):
#at least one image found
if(nImages == 0):
for image in codeCellOut['images']:
imgName = fName + str(nImages) + '.' + image['format']
imgFullPath = os.path.join(imgFullDir, imgName)
if(os.path.exists(imgFullPath) and not inputs.f):
raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite')
#svg files are just xml so we can generate the output
if(image['format'] == 'svg'):
with open(imgFullPath, 'w+', encoding='utf-8') as f:
for line in image['imgData']:
raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.')
#generate image tag
cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')'
nImages += 1
out.append(cellText + u'\n')
#write markdown file
with open(mdFName, 'w', encoding='utf-8') as f:
for cell in out:
