Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Merge several Jupyter notebooks and then render them as pdf or docx
from nbformat.v4 import new_notebook, new_markdown_cell
import nbformat
import io
import os
import subprocess
import random
import string
#from PyPDF2 import PdfFileMerger, PdfFileReader
def merged_notebooks_in_dir(dirpath,filenames):
''' Merge all notebooks in a directory into a single notebook '''
fns = ['{}/{}'.format(dirpath, fn) for fn in filenames if '.ipynb_checkpoints' not in dirpath and fn.endswith('.ipynb')]
if fns:
merged = new_notebook()
#Identify directory containing merged notebooks
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(dirpath)
else: return
for fn in fns:
notebook_name = fn.split('/')[-1]
with, 'r', encoding='utf-8') as f:
nb =, as_version=4)
#Identify filename of notebook
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(fn)
if not hasattr(merged.metadata, 'name'): = '' += "_merged"
return nbformat.writes(merged)
def merged_notebooks_down_path(path, typ='docx', execute=False):
''' Walk a path, creating an output file in each directory that merges all notebooks in the directory '''
for (dirpath, dirnames, filenames) in os.walk(path):
if '.ipynb_checkpoints' in dirpath: continue
#Should we run the execute processor here on each notebook separately,
# ensuring that images are embedded, and then merge the executed notebook files?
merged_nb = merged_notebooks_in_dir(dirpath,filenames)
if not merged_nb: continue
fn=''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
with open('{}/{}.ipynbx'.format(dirpath,fn), 'w') as f:
# Execute the merged notebook in its directory so that images are correctly handled
# Using html_embed seems to cause pandoc to fall over?
# The pdf conversion requires installation of texlive-xetex and inkscape
# This adds significant weight to the VM: maybe we need an MT/prouction VM and a student build?
# Inline code execution generated using python-markdown extension seems to break PDF generation
# at the first instance of inline code? Need to add a preprocessor?
# We could maybe process the notebook inline rather than via the commandline
# In such a case, the following may be a useful reference:
execute = ' --ExecutePreprocessor.timeout=600 --ExecutePreprocessor.allow_errors=True --execute' if execute else ''
if typ=='pdf':
cmd='jupyter nbconvert --to pdf {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
elif typ in ['docx']:
cmd='jupyter nbconvert --to html {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
cmd='pandoc -s "{fn_out}".html -o _merged_notebooks.{typ}'.format(fn_out=fn, typ=typ)
subprocess.check_call(cmd, shell=True, cwd=dirpath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment