Skip to content

Instantly share code, notes, and snippets.

@chrisjsewell
Last active November 12, 2019 01:56
Show Gist options
  • Save chrisjsewell/73f7d49423b0fd4e40efd5f51b651d73 to your computer and use it in GitHub Desktop.
Save chrisjsewell/73f7d49423b0fd4e40efd5f51b651d73 to your computer and use it in GitHub Desktop.
from textwrap import dedent
import re
from nbconvert.filters import (
indent,
strip_ansi,
escape_latex,
ansi2latex,
citation2latex,
strip_files_prefix,
convert_pandoc,
posix_path,
resolve_references,
)
def chain_funcs(arg, *funcs):
"""Chain multiple functions."""
for func in funcs:
arg = func(arg)
return arg
class OutputHandlerBase:
def __init__(self, doc):
self._doc = doc
self._output = None
@property
def doc(self):
return self._doc
@property
def output(self):
return self._output
def __call__(self, output, output_num):
self._output = output
content = []
if output.output_type == "execute_result":
content.append(self.execute_result())
elif output.output_type == "stream":
content.append(self.stream_enter())
if output.name == "stdout":
content.append(self.stream_stderr())
elif output.name == "stderr":
content.append(self.stream_stdout())
content.append(self.stream_exit())
elif output.output_type == "display_data":
content.append(self.display_data())
elif output.output_type == "error":
content.append(self.error_enter())
content.append(self.error())
content.append(self.error_exit())
self._output = None
return "\n".join([c for c in content if c])
def execute_result(self):
pass
def stream_enter(self):
pass
def stream_exit(self):
pass
def stream_stdout(self):
pass
def stream_stderr(self):
pass
def get_data_priority(self):
return (
("application/pdf", self.data_pdf),
("image/svg+xml", self.data_svg),
("image/png", self.data_png),
("text/html", self.data_html),
("text/markdown", self.data_markdown),
("image/jpeg", self.data_jpg),
("text/plain", self.data_text),
("text/latex", self.data_latex),
("application/javascript", self.data_javascript),
("application/vnd.jupyter.widget-state+json", self.data_widget_state),
("application/vnd.jupyter.widget-view+json", self.data_widget_view),
)
def display_data(self):
try:
data_func = next(
dfunc
for dtype, dfunc in self.data_priority
if dtype in self.output.data
)
except StopIteration:
data_func = self.data_other
return data_func()
def error_enter(self):
pass
def error_exit(self):
pass
def error(self):
pass
def data_pdf(self):
pass
def data_svg(self):
pass
def data_png(self):
pass
def data_html(self):
pass
def data_markdown(self):
pass
def data_jpg(self):
pass
def data_text(self):
pass
def data_latex(self):
pass
def data_javascript(self):
pass
def data_widget_state(self):
pass
def data_widget_view(self):
pass
class DocBase:
output_handler_cls = OutputHandlerBase
def __init__(self, output_handler_cls=None):
self._notebook = None
self._resources = None
self._cell = None
self.codecell_output = output_handler_cls(self)
@property
def nb(self):
return self._notebook
@property
def resources(self):
return self._resources
@property
def cell(self):
return self._cell
def process_notebook(self, notebook, resources):
self._resources = resources
self._notebook = notebook
content = []
content.append(self.header())
content.append(self.body_enter())
for cell in notebook.cells:
self._cell = cell
content.append(self.cell_enter())
if cell.cell_type == "code":
if resources.global_content_filter.include_code:
content.append(self.codecell_enter())
if resources.global_content_filter.include_input and not cell.get(
"transient", {}
).get("remove_source", False):
content.append(self.codecell_input_group_enter())
if resources.global_content_filter.include_input_prompt:
content.append(self.input_prompt())
content.append(self.codecell_input())
content.append(self.codecell_input_group_exit())
if cell.outputs and resources.global_content_filter.include_output:
content.append(self.codecell_output_group_enter())
if resources.global_content_filter.include_output_prompt:
content.append(self.output_prompt())
content.append(self.codecell_outputs_enter())
for output_num, output in enumerate(cell.outputs):
content.append(self.codecell_output_enter())
content.append(self.codecell_output(output, output_num))
content.append(self.codecell_output_exit())
content.append(self.codecell_outputs_exit())
content.append(self.codecell_output_group_exit())
content.append(self.codecell_exit())
elif cell.cell_type in ["markdown"]:
if resources.global_content_filter.include_markdown and not cell.get(
"transient", {}
).get("remove_source", False):
content.append(self.markdown_cell())
elif cell.cell_type in ["raw"]:
if resources.global_content_filter.include_raw and not cell.get(
"transient", {}
).get("remove_source", False):
content.append(self.raw_cell())
else:
if resources.global_content_filter.include_unknown and not cell.get(
"transient", {}
).get("remove_source", False):
content.append(self.unknown_cell())
content.append(self.cell_exit())
content.append(self.body_exit())
content.append(self.footer())
self._resources = None
self._notebook = None
self._cell = None
return "\n".join(c for c in content if content)
def header(self):
pass
def footer(self):
pass
def body_enter(self):
pass
def body_exit(self):
pass
def cell_enter(self):
pass
def cell_exit(self):
pass
def codecell_enter(self):
pass
def codecell_exit(self):
pass
def codecell_input_group_enter(self):
pass
def codecell_input_group_exit(self):
pass
def codecell_in_prompt(self):
pass
def codecell_input(self):
pass
def codecell_output_group_enter(self):
pass
def codecell_output_group_exit(self):
pass
def codecell_out_prompt(self):
pass
def codecell_outputs_enter(self):
pass
def codecell_outputs_exit(self):
pass
def codecell_output_enter(self):
pass
def codecell_output_exit(self):
pass
def markdown_cell(self):
pass
def raw_cell(self):
if self.cell.metadata.get("raw_mimetype", "").lower() in self.resources.get(
"raw_mimetypes", [""]
):
return self.cell.source
def unknown_cell(self):
pass
class LatexBaseOutputHandler(OutputHandlerBase):
def execute_result(self):
"""Redirect execute_result to display data."""
return self.display_data()
def error(self):
"""Display python error text as-is."""
content = [r"\begin{Verbatim}[commandchars=\\\{\}]"]
for line in self.output.traceback:
content.append(chain_funcs(line, indent, strip_ansi, escape_latex))
content.append(r"\end{Verbatim}")
return "\n".join(content)
def stream_enter(self):
"""Display stream output with coloring."""
return "\n".join(
[
r"\begin{verbatim}",
chain_funcs(self.output.text, escape_latex, ansi2latex),
r"\end{verbatim}",
]
)
def data_text(self):
"""Display simple data text."""
return "\n".join(
[r"\begin{verbatim}", self.output.data["text/plain"], r"\end{verbatim}"]
)
def data_latex(self):
return strip_files_prefix(self.output.data["text/latex"])
def data_markdown(self):
return chain_funcs(
self.output.data["text/markdown"],
citation2latex,
strip_files_prefix,
convert_pandoc("markdown+tex_math_double_backslash", "latex"),
)
def data_png(self):
return self.draw_figure(self.output.metadata.filenames["image/png"])
def data_jpg(self):
return self.draw_figure(self.output.metadata.filenames["image/png"])
def data_svg(self):
return self.draw_figure(self.output.metadata.filenames["image/png"])
def data_pdf(self):
return self.draw_figure(self.output.metadata.filenames["image/png"])
def draw_figure(self, filename):
filename = posix_path(posix_path)
return re.sub(
"filename",
filename,
dedent(
r"""\
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{filename}
\end{center}
{ \hspace*{\fill} \\}"""
),
)
class LatexBaseDoc(DocBase):
output_handler_cls = LatexBaseOutputHandler
def header(self):
return "\n".join(
c
for c in [
self.head_docclass(),
self.head_packages(),
self.head_definitions(),
self.head_commands(),
]
if c
)
def head_docclass(self):
return r"\documentclass[11pt]{article}"
def head_packages(self):
return PACKAGES
def head_definitions(self):
return "\n".join(
c
for c in [DEFINITIONS, self.define_title(), self.define_date(), self.define_author()]
if c
)
def define_title(self):
"""Document title."""
nb_title = (
self.nb.metadata.get("title", "") or self.resources["metadata"]["name"]
)
return r"\title{{{}}}".format(escape_latex(nb_title))
def define_date(self):
"""Document date."""
pass
def define_author(self):
"""Document author."""
if "authors" in self.nb.metadata:
return r"\author{{{}}}".format(", ".join(a.name for a in self.nb.metadata))
def head_commands(self):
"""Prevent overflowing lines due to hard-to-break entities."""
string = dedent(r"""\
\sloppy
% Setup hyperref package
\hypersetup{
breaklinks=true, % so long urls are correctly broken across lines
colorlinks=true,
urlcolor=urlcolor,
linkcolor=linkcolor,
citecolor=citecolor,
}""")
return "\n".join([string, self.cmnd_margins()])
def cmnd_margins(self):
"""Slightly bigger margins than the latex defaults."""
return r"\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}"
def body_enter(self):
return r"\begin{document}"
def body_exit(self):
return r"\end{document}"
def markdown_cell(self):
return chain_funcs(
self.cell.source,
citation2latex,
strip_files_prefix,
convert_pandoc("markdown+tex_math_double_backslash", "json", extra_args=[]),
resolve_references,
convert_pandoc("json", "latex"),
)
PACKAGES = r"""\
\usepackage{iftex}
\ifPDFTeX
\usepackage[T1]{fontenc}
\usepackage{mathpazo}
\else
\usepackage{fontspec}
\fi
% Basic figure setup, for now with no caption control since it's done
% automatically by Pandoc (which extracts ![](path) syntax from Markdown).
\usepackage{graphicx}
% Maintain compatibility with old templates. Remove in nbconvert 6.0
\let\Oldincludegraphics\includegraphics
% Ensure that by default, figures have no caption (until we provide a
% proper Figure object with a Caption API and a way to capture that
% in the conversion process - todo).
\usepackage{caption}
\DeclareCaptionFormat{nocaption}{}
\captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt}
\usepackage[Export]{adjustbox} % Used to constrain images to a maximum size
\adjustboxset{max size={0.9\linewidth}{0.9\paperheight}}
\usepackage{float}
\floatplacement{figure}{H} % forces figures to be placed at the correct location
\usepackage{xcolor} % Allow colors to be defined
\usepackage{enumerate} % Needed for markdown enumerations to work
\usepackage{geometry} % Used to adjust the document margins
\usepackage{amsmath} % Equations
\usepackage{amssymb} % Equations
\usepackage{textcomp} % defines textquotesingle
% Hack from http://tex.stackexchange.com/a/47451/13684:
\AtBeginDocument{%
\def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
}
\usepackage{upquote} % Upright quotes for verbatim code
\usepackage{eurosym} % defines \euro
\usepackage[mathletters]{ucs} % Extended unicode (utf-8) support
\usepackage{fancyvrb} % verbatim replacement that allows latex
\usepackage{grffile} % extends the file name processing of package graphics
% to support a larger range
\makeatletter % fix for grffile with XeLaTeX
\def\Gread@@xetex#1{%
\IfFileExists{"\Gin@base".bb}%
{\Gread@eps{\Gin@base.bb}}%
{\Gread@@xetex@aux#1}%
}
\makeatother
% The hyperref package gives us a pdf with properly built
% internal navigation ('pdf bookmarks' for the table of contents,
% internal cross-reference links, web links for URLs, etc.)
\usepackage{hyperref}
% The default LaTeX title has an obnoxious amount of whitespace. By default,
% titling removes some of it. It also provides customization options.
\usepackage{titling}
\usepackage{longtable} % longtable support required by pandoc >1.10
\usepackage{booktabs} % table support for pandoc > 1.12.2
\usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment)
\usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout)
% normalem makes italics be italics, not underlines
\usepackage{mathrsfs}
"""
DEFINITIONS = r"""\
% Colors for the hyperref package
\definecolor{urlcolor}{rgb}{0,.145,.698}
\definecolor{linkcolor}{rgb}{.71,0.21,0.01}
\definecolor{citecolor}{rgb}{.12,.54,.11}
% ANSI colors
\definecolor{ansi-black}{HTML}{3E424D}
\definecolor{ansi-black-intense}{HTML}{282C36}
\definecolor{ansi-red}{HTML}{E75C58}
\definecolor{ansi-red-intense}{HTML}{B22B31}
\definecolor{ansi-green}{HTML}{00A250}
\definecolor{ansi-green-intense}{HTML}{007427}
\definecolor{ansi-yellow}{HTML}{DDB62B}
\definecolor{ansi-yellow-intense}{HTML}{B27D12}
\definecolor{ansi-blue}{HTML}{208FFB}
\definecolor{ansi-blue-intense}{HTML}{0065CA}
\definecolor{ansi-magenta}{HTML}{D160C4}
\definecolor{ansi-magenta-intense}{HTML}{A03196}
\definecolor{ansi-cyan}{HTML}{60C6C8}
\definecolor{ansi-cyan-intense}{HTML}{258F8F}
\definecolor{ansi-white}{HTML}{C5C1B4}
\definecolor{ansi-white-intense}{HTML}{A1A6B2}
\definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF}
\definecolor{ansi-default-inverse-bg}{HTML}{000000}
% commands and environments needed by pandoc snippets
% extracted from the output of `pandoc -s`
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\newenvironment{Shaded}{}{}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
\newcommand{\RegionMarkerTok}[1]{{#1}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\NormalTok}[1]{{#1}}
% Additional commands for more recent versions of Pandoc
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}}
\newcommand{\ImportTok}[1]{{#1}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}}
\newcommand{\BuiltInTok}[1]{{#1}}
\newcommand{\ExtensionTok}[1]{{#1}}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
% Define a nice break command that doesn't care if a line doesn't already
% exist.
\def\br{\hspace*{\fill} \\* }
% Math Jax compatibility definitions
\def\gt{>}
\def\lt{<}
\let\Oldtex\TeX
\let\Oldlatex\LaTeX
\renewcommand{\TeX}{\textrm{\Oldtex}}
\renewcommand{\LaTeX}{\textrm{\Oldlatex}}
"""
if __name__ == "__main__":
_latex_doc = LatexBaseDoc()
print(_latex_doc.process_notebook())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment