Instantly share code, notes, and snippets.

Embed
What would you like to do?
Merge several Jupyter notebooks and then render them as pdf or docx
#https://stackoverflow.com/a/3207973/454773
from nbformat.v4 import new_notebook, new_markdown_cell
import nbformat
import io
import os
import subprocess
import random
import string
#from PyPDF2 import PdfFileMerger, PdfFileReader
def merged_notebooks_in_dir(dirpath,filenames):
''' Merge all notebooks in a directory into a single notebook '''
fns = ['{}/{}'.format(dirpath, fn) for fn in filenames if '.ipynb_checkpoints' not in dirpath and fn.endswith('.ipynb')]
if fns:
merged = new_notebook()
#Identify directory containing merged notebooks
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(dirpath)
merged.cells.append(new_markdown_cell(cell))
else: return
for fn in fns:
#print(fn)
notebook_name = fn.split('/')[-1]
with io.open(fn, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, as_version=4)
#Identify filename of notebook
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(fn)
merged.cells.append(new_markdown_cell(cell))
merged.cells.extend(nb.cells)
if not hasattr(merged.metadata, 'name'):
merged.metadata.name = ''
merged.metadata.name += "_merged"
return nbformat.writes(merged)
def merged_notebooks_down_path(path, typ='docx', execute=False):
''' Walk a path, creating an output file in each directory that merges all notebooks in the directory '''
for (dirpath, dirnames, filenames) in os.walk(path):
if '.ipynb_checkpoints' in dirpath: continue
#Should we run the execute processor here on each notebook separately,
# ensuring that images are embedded, and then merge the executed notebook files?
merged_nb = merged_notebooks_in_dir(dirpath,filenames)
if not merged_nb: continue
fn=''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
with open('{}/{}.ipynbx'.format(dirpath,fn), 'w') as f:
f.write(merged_nb)
# Execute the merged notebook in its directory so that images are correctly handled
# Using html_embed seems to cause pandoc to fall over?
# The pdf conversion requires installation of texlive-xetex and inkscape
# This adds significant weight to the VM: maybe we need an MT/prouction VM and a student build?
# Inline code execution generated using python-markdown extension seems to break PDF generation
# at the first instance of inline code? Need to add a preprocessor?
# We could maybe process the notebook inline rather than via the commandline
# In such a case, the following may be a useful reference:
#https://github.com/ipython-contrib/jupyter_contrib_nbextensions/blob/master/docs/source/exporting.rst
execute = ' --ExecutePreprocessor.timeout=600 --ExecutePreprocessor.allow_errors=True --execute' if execute else ''
if typ=='pdf':
cmd='jupyter nbconvert --to pdf {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
elif typ in ['docx']:
cmd='jupyter nbconvert --to html {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
cmd='pandoc -s "{fn_out}".html -o _merged_notebooks.{typ}'.format(fn_out=fn, typ=typ)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
os.remove("{}/{}.html".format(dirpath,fn))
os.remove("{}/{}.ipynbx".format(dirpath,fn))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment