Skip to content

Instantly share code, notes, and snippets.

Created March 29, 2017 17:41
Show Gist options
  • Save brazilbean/3ebb31324f6dad212817b3663c7a0219 to your computer and use it in GitHub Desktop.
Save brazilbean/3ebb31324f6dad212817b3663c7a0219 to your computer and use it in GitHub Desktop.
Custom Jupyter Notebook Pre-processors
'''Specialized Preprocessors'''
import nbconvert, nbformat, re, sys
from nbconvert.preprocessors import ExecutePreprocessor, Preprocessor
from traitlets import Dict, Unicode
from textwrap import dedent
from warnings import warn
def warn_deprecated(msg):
'''Raise a DeprecationWarning'''
warn(msg, DeprecationWarning, stacklevel=2)
class RemoveCodePreprocessor(Preprocessor):
def _should_remove(self, cell):
if cell.cell_type == 'code':
return True
elif cell.cell_type == 'markdown':
# For cells starting with headers
# The first token will be a set of #'s
# Check for a '!' as the second token in the source
tokens = cell.source.split()
return all(t == '#' for t in tokens[0]) and tokens[1] == '!'
return False
def preprocess(self, notebook, resources):
'''Skip code cells and special markdown cells'''
notebook.cells = [cell for cell in notebook.cells if not self._should_remove(cell)]
return notebook, resources
class FilterCodePreprocessor(Preprocessor):
def _should_filter(self, cell):
'''Determine whether a cell should be filtered'''
if cell.cell_type != 'code':
return False
# Check for a ##Hidecell comment
if'##hidecell', cell.source, re.I) is not None:
return True
return False
def preprocess(self, notebook, resources):
'''Filter code cells'''
notebook.cells = [cell for cell in notebook.cells if not self._should_filter(cell)]
return notebook, resources
class ClearEmptyRawCellsPreprocessor(Preprocessor):
'''Remove empty raw cells from the notebook'''
def preprocess(self, notebook, resources):
notebook.cells = [cell for cell in notebook.cells
if not (cell.cell_type == 'raw' and cell.source == '')]
return notebook, resources
class ArgumentSubstitutionPreprocessor(Preprocessor):
args = Dict(Unicode(),
def replace_variables(self, source, variables):
Replace <<variablename>> with stored value
replaced = re.sub("<<(.*?)>>", lambda m: variables.get(,, source)
except TypeError:
print("WARNING: unable to perform replacement in cell: {}".format(source), sys.stderr)
replaced = source
return replaced
def preprocess_cell(self, cell, resources, index):
Preprocess cell
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
cell_index : int
Index of the cell being processed (see
if cell.cell_type == "code":
if len(self.args) > 0:
cell.source = self.replace_variables(cell.source, self.args)
return cell, resources
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
self.MetaCodeProcessor = MetaCodePreprocessor(self)
return super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index):
Executes a single code cell. See for details.
To execute all cells see :meth:`preprocess_cell`.
cell, resources = self.MetaCodeProcessor.process_cell(cell, resources)
if cell is None:
return self.EmptyCell, resources
if cell.cell_type not in ['code','markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index)
elif cell.cell_type == 'markdown':
# Do markdown stuff
return self.preprocess_markdown_cell(cell, resources, cell_index)
# Don't do anything
return cell, resources
except TimeoutError:
print("Timeout on execution of cell: {}".format(cell.source), file=sys.stderr, flush=True)
def preprocess_code_cell(self, cell, resources, cell_index):
''' Process code cell. '''
outputs = self.run_cell(cell)
cell.outputs = outputs
if not self.allow_errors:
for out in outputs:
if out.output_type == 'error':
pattern = u"""\
An error occurred while executing the following cell:
{out.ename}: {out.evalue}
msg = dedent(pattern).format(out=out, cell=cell)
raise nbconvert.preprocessors.execute.CellExecutionError(msg)
return cell, resources
def preprocess_markdown_cell(self, cell, resources, cell_index):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
fakecell = nbformat.v4.nbbase.new_code_cell(
fakecell, resources = self.preprocess_code_cell(fakecell, resources, cell_index)
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
return cell, resources
def convert_output_to_html(self, output):
'''Convert IOpub output to HTML
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue;
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data =
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'\1', text)
text = re.sub(r"'([\s\S]*?)'",r'\1', text)
return text
# Some tag we don't support
return None
return None
class MetaCodePreprocessor:
def __init__(self, cell_runner):
self._in_exclude_mode = False
self.cell_runner = cell_runner
def _output_to_bool(self, output):
'''Convert cell execution output to a boolean'''
if not output:
# Empty string is false
return False
# Return the boolean version of evaluating the string
# Should handle cases like "True", "False", "0", "1", etc.
return bool(eval(output))
except NameError:
# The output did not comprise a valid python expression
# Return True
return True
def _evaluate_cell(self, cell):
'''Evaluate the cell and return True or False'''
# Execute the cell source to see whether the section should be kept
# Default to True
outputs = self.cell_runner.run_cell(cell)
for output in outputs:
if "data" in output and "text/plain" in
return self._output_to_bool(["text/plain"])
return True
def process_cell(self, cell, resources):
'''Identify and process metacode tags
Metacode tags are found on the first line of the cell and start with "#$"
m = re.match(r'\#\$\s+(\w+)\s+(.+)', cell.source)
if m:
command =
args =
if command == 'if':
# Start if block
self._in_exclude_mode = not self._evaluate_cell(cell)
elif command == 'else':
# Start else block
self._in_exclude_mode = not self._in_exclude_mode
elif command == 'end':
# End block
if args.split()[0] == 'if':
# End if/else block
self._in_exclude_mode = False
print("Unrecognized metacode end tag: " +
elif command == 'endif':
# Deprecated: end if/else block
warn_deprecated("'endif'' is deprecated. Use 'end if'")
self._in_exclude_mode = False
elif command == 'section':
# Deprecated functionality kept for backwards compatibility
warn_deprecated("'section' tag is deprecated. Use 'if' and 'end if'")
if 'start' in args.lower():
# Start section block
self._in_exclude_mode = not self._evaluate_cell(cell)
elif 'end' in args.lower():
# End section block
self._in_exclude_mode = False
# Unrecognized section tag
print("Unrecognized metacode tag: " +
# All cells with metacode tags are removed
return None, resources
if self._in_exclude_mode:
return None, resources
return cell, resources
Copy link

Hi, I found your repo via stackoverflow, when I was looking for a way to prevent some code and markdown cells from being exported upon exporting a notebook to HTML. Is that something I can achieve with any of these preprocessors? If not, do you have an idea on how to modify these to achieve it anyway? Thank you in advance.

Copy link

Here's what I wrote recently for that purpose:

from nbconvert.preprocessors import Preprocessor

class RemoveSkip(Preprocessor):
    def preprocess(self, notebook, resources):
        notebook.cells = [
            cell for cell in notebook.cells
            if not ((md := cell.metadata.get('slideshow')) and (st := md.get('slide_type')) and st == 'skip')
        return notebook, resources

This little preprocessor is available on pypi as jupybeans. For example:

pip install jupybeans
jupyter nbconvert "notebook.ipynb" --Exporter.preprocessors jupybeans.RemoveSkip --to html_embed 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment