Merge several Jupyter notebooks and then render them as pdf or docx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://stackoverflow.com/a/3207973/454773 | |
from nbformat.v4 import new_notebook, new_markdown_cell | |
import nbformat | |
import io | |
import os | |
import subprocess | |
import random | |
import string | |
#from PyPDF2 import PdfFileMerger, PdfFileReader | |
def merged_notebooks_in_dir(dirpath,filenames): | |
''' Merge all notebooks in a directory into a single notebook ''' | |
fns = ['{}/{}'.format(dirpath, fn) for fn in filenames if '.ipynb_checkpoints' not in dirpath and fn.endswith('.ipynb')] | |
if fns: | |
merged = new_notebook() | |
#Identify directory containing merged notebooks | |
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(dirpath) | |
merged.cells.append(new_markdown_cell(cell)) | |
else: return | |
for fn in fns: | |
#print(fn) | |
notebook_name = fn.split('/')[-1] | |
with io.open(fn, 'r', encoding='utf-8') as f: | |
nb = nbformat.read(f, as_version=4) | |
#Identify filename of notebook | |
cell = '\n\n---\n\n# {}\n\n---\n\n'.format(fn) | |
merged.cells.append(new_markdown_cell(cell)) | |
merged.cells.extend(nb.cells) | |
if not hasattr(merged.metadata, 'name'): | |
merged.metadata.name = '' | |
merged.metadata.name += "_merged" | |
return nbformat.writes(merged) | |
def merged_notebooks_down_path(path, typ='docx', execute=False): | |
''' Walk a path, creating an output file in each directory that merges all notebooks in the directory ''' | |
for (dirpath, dirnames, filenames) in os.walk(path): | |
if '.ipynb_checkpoints' in dirpath: continue | |
#Should we run the execute processor here on each notebook separately, | |
# ensuring that images are embedded, and then merge the executed notebook files? | |
merged_nb = merged_notebooks_in_dir(dirpath,filenames) | |
if not merged_nb: continue | |
fn=''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) | |
with open('{}/{}.ipynbx'.format(dirpath,fn), 'w') as f: | |
f.write(merged_nb) | |
# Execute the merged notebook in its directory so that images are correctly handled | |
# Using html_embed seems to cause pandoc to fall over? | |
# The pdf conversion requires installation of texlive-xetex and inkscape | |
# This adds significant weight to the VM: maybe we need an MT/prouction VM and a student build? | |
# Inline code execution generated using python-markdown extension seems to break PDF generation | |
# at the first instance of inline code? Need to add a preprocessor? | |
# We could maybe process the notebook inline rather than via the commandline | |
# In such a case, the following may be a useful reference: | |
#https://github.com/ipython-contrib/jupyter_contrib_nbextensions/blob/master/docs/source/exporting.rst | |
execute = ' --ExecutePreprocessor.timeout=600 --ExecutePreprocessor.allow_errors=True --execute' if execute else '' | |
if typ=='pdf': | |
cmd='jupyter nbconvert --to pdf {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn) | |
subprocess.check_call(cmd, shell=True, cwd=dirpath) | |
elif typ in ['docx']: | |
cmd='jupyter nbconvert --to html {exe} "{fn}".ipynbx'.format(exe=execute, fn=fn) | |
subprocess.check_call(cmd, shell=True, cwd=dirpath) | |
cmd='pandoc -s "{fn_out}".html -o _merged_notebooks.{typ}'.format(fn_out=fn, typ=typ) | |
subprocess.check_call(cmd, shell=True, cwd=dirpath) | |
os.remove("{}/{}.html".format(dirpath,fn)) | |
os.remove("{}/{}.ipynbx".format(dirpath,fn)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment