satyamsatyarthi/ipynb2jekyll.py

## ipynb2jekyll.py
import json #for reading .ipynb
import sys #python version for proper unicode support
import os #directory and path operations
import errno #safe directory creation
import argparse #command line args
import re #latex to liquid

parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.')
parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file')
parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.')
inputs = parser.parse_args()


imgBaseDir = 'images' #directory containing images (relative to  base Jekyll directory)
autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts)

#figure out directory names and create autogen directories if necessary
fDir, fName = os.path.split(inputs.filename[0])
fName = fName.replace('.ipynb', '')
fullPath = os.path.dirname(os.path.realpath(__file__))
jekyllDir = os.path.split(fullPath)[0]
imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName)
mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md')

def makedirSafe(dirPath):
    try:
        os.makedirs(dirPath)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed

#output file already exists
if(os.path.exists(mdFName) and not inputs.f):
    raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.')

#for Unicode support across python verions
if sys.version_info[0] < 3:
    import io
    _open_func_bak = open # Make a back up, just in case
    open = io.open

#expressions to convert to Liquid math tags
displayRe = re.compile('([^\$]*)\$\$([^\$]+)\$\$([^\$]*)')
inlineRe = re.compile('([^\$]*)\$([^\$]+)\$([^\$]*)')

#Code cells have inputs and outputs
def processCodeCell(codeCell):
    result = {'text': '', 'images': []}
    if(codeCell['input'] != []):
        result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language'])
        for line in codeCell['input']:
            result['text'] += line
        result['text'] += '\n{% endhighlight %}'
        result['text'] += '\n'
        if(codeCell['outputs'] != []):
            for out in codeCell['outputs']:
                if(out['output_type'] == 'pyout'):
                    if(type(out) is dict and out.has_key('latex')):
                        for line in out['latex']:
                            line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
                            line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
                            result['text'] += line
                    else:
                        for line in out['text']:
                            result['text'] += line
                else:
                    if(out['output_type'] == 'display_data'):
                        if(out.has_key('svg')):
                            result['images'].append({'format': 'svg', 'imgData': out['svg']})

    return result


#Markdown cells only have md text
def processMarkdownCell(mdCell):
    result = ''
    for line in mdCell['source']:
        line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
        line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
        result += line

    return result

#%%
with open(inputs.filename[0], 'r') as f:
    rd = json.load(f)


#only need to do one worksheet for now.
wb = rd['worksheets'][0]

#%%
out = []
nImages = 0
imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '')

for cell in wb['cells']:
    if(cell['cell_type'] == 'code'):
        codeCellOut = processCodeCell(cell)
        cellText = codeCellOut['text']
        if(codeCellOut['images'] != []):
            #at least one image found
            if(nImages == 0):
                makedirSafe(imgFullDir)

            for image in codeCellOut['images']:
                imgName = fName + str(nImages) + '.' + image['format']
                imgFullPath = os.path.join(imgFullDir, imgName)

                if(os.path.exists(imgFullPath) and not inputs.f):
                    raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite')

                #svg files are just xml so we can generate the output
                if(image['format'] == 'svg'):
                    with open(imgFullPath, 'w+', encoding='utf-8') as f:
                        for line in image['imgData']:
                            f.write(line)
                else:
                    raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.')

                #generate image tag
                cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')'
                nImages += 1
        out.append(cellText + u'\n')

    else:
        out.append(processMarkdownCell(cell)+'\n')


#write markdown file
with open(mdFName, 'w', encoding='utf-8') as f:
    for cell in out:
        f.write(cell)
        f.write(u'\n')
	import json #for reading .ipynb
	import sys #python version for proper unicode support
	import os #directory and path operations
	import errno #safe directory creation
	import argparse #command line args
	import re #latex to liquid

	parser = argparse.ArgumentParser(description='Convert IPython notebooks to Jekyll Markdown with Liquid Tags.')
	parser.add_argument('filename', metavar='file', type=str, nargs=1, help='full path to .ipynb file')
	parser.add_argument('--f', action='store_true', help='overwrite existing files without warning.')
	inputs = parser.parse_args()


	imgBaseDir = 'images' #directory containing images (relative to base Jekyll directory)
	autogenPostDir = '_autogen' #directory containing autogenerated posts (within _posts)

	#figure out directory names and create autogen directories if necessary
	fDir, fName = os.path.split(inputs.filename[0])
	fName = fName.replace('.ipynb', '')
	fullPath = os.path.dirname(os.path.realpath(__file__))
	jekyllDir = os.path.split(fullPath)[0]
	imgFullDir = os.path.join(jekyllDir, imgBaseDir, fName)
	mdFName = os.path.join(jekyllDir, '_posts', autogenPostDir, fName + '.md')

	def makedirSafe(dirPath):
	try:
	os.makedirs(dirPath)
	except OSError as exception:
	if exception.errno != errno.EEXIST:
	raise

	makedirSafe(os.path.join(jekyllDir, '_posts', autogenPostDir)) #create autogen directory if needed

	#output file already exists
	if(os.path.exists(mdFName) and not inputs.f):
	raise IOError('The markdown file to be generated already exists.\n Run with --f to overwrite.')

	#for Unicode support across python verions
	if sys.version_info[0] < 3:
	import io
	_open_func_bak = open # Make a back up, just in case
	open = io.open

	#expressions to convert to Liquid math tags
	displayRe = re.compile('([^\$])\$\$([^\$]+)\$\$([^\$])')
	inlineRe = re.compile('([^\$])\$([^\$]+)\$([^\$])')

	#Code cells have inputs and outputs
	def processCodeCell(codeCell):
	result = {'text': '', 'images': []}
	if(codeCell['input'] != []):
	result['text'] = '{{% highlight {language} %}}\n'.format(language=codeCell['language'])
	for line in codeCell['input']:
	result['text'] += line
	result['text'] += '\n{% endhighlight %}'
	result['text'] += '\n'
	if(codeCell['outputs'] != []):
	for out in codeCell['outputs']:
	if(out['output_type'] == 'pyout'):
	if(type(out) is dict and out.has_key('latex')):
	for line in out['latex']:
	line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
	line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
	result['text'] += line
	else:
	for line in out['text']:
	result['text'] += line
	else:
	if(out['output_type'] == 'display_data'):
	if(out.has_key('svg')):
	result['images'].append({'format': 'svg', 'imgData': out['svg']})

	return result


	#Markdown cells only have md text
	def processMarkdownCell(mdCell):
	result = ''
	for line in mdCell['source']:
	line = displayRe.sub('\\1{% math %}\\2{% endmath %} \\3', line)
	line = inlineRe.sub('\\1 {% m %}\\2{% em %} \\3', line)
	result += line

	return result

	#%%
	with open(inputs.filename[0], 'r') as f:
	rd = json.load(f)


	#only need to do one worksheet for now.
	wb = rd['worksheets'][0]

	#%%
	out = []
	nImages = 0
	imgDirString = os.path.join('{{site.url}}', imgBaseDir, fName, '')

	for cell in wb['cells']:
	if(cell['cell_type'] == 'code'):
	codeCellOut = processCodeCell(cell)
	cellText = codeCellOut['text']
	if(codeCellOut['images'] != []):
	#at least one image found
	if(nImages == 0):
	makedirSafe(imgFullDir)

	for image in codeCellOut['images']:
	imgName = fName + str(nImages) + '.' + image['format']
	imgFullPath = os.path.join(imgFullDir, imgName)

	if(os.path.exists(imgFullPath) and not inputs.f):
	raise IOError('Image file: ' + fName + str(nImages) + '.' + image['format'] + ' already exists.\n Run with --f to overwrite')

	#svg files are just xml so we can generate the output
	if(image['format'] == 'svg'):
	with open(imgFullPath, 'w+', encoding='utf-8') as f:
	for line in image['imgData']:
	f.write(line)
	else:
	raise TypeError('You encountered a ' + image['format'] + ' file. I don\'t know how to deal with that.')

	#generate image tag
	cellText += u'\n![' + imgName + '](' + imgDirString + imgName + ')'
	nImages += 1
	out.append(cellText + u'\n')

	else:
	out.append(processMarkdownCell(cell)+'\n')



	#write markdown file
	with open(mdFName, 'w', encoding='utf-8') as f:
	for cell in out:
	f.write(cell)
	f.write(u'\n')