Last active
November 12, 2019 01:56
-
-
Save chrisjsewell/73f7d49423b0fd4e40efd5f51b651d73 to your computer and use it in GitHub Desktop.
Test rewritting https://github.com/jupyter/nbconvert/blob/master/share/jupyter/nbconvert/templates/latex/base.tex.j2 in pure python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from textwrap import dedent | |
import re | |
from nbconvert.filters import ( | |
indent, | |
strip_ansi, | |
escape_latex, | |
ansi2latex, | |
citation2latex, | |
strip_files_prefix, | |
convert_pandoc, | |
posix_path, | |
resolve_references, | |
) | |
def chain_funcs(arg, *funcs): | |
"""Chain multiple functions.""" | |
for func in funcs: | |
arg = func(arg) | |
return arg | |
class OutputHandlerBase: | |
def __init__(self, doc): | |
self._doc = doc | |
self._output = None | |
@property | |
def doc(self): | |
return self._doc | |
@property | |
def output(self): | |
return self._output | |
def __call__(self, output, output_num): | |
self._output = output | |
content = [] | |
if output.output_type == "execute_result": | |
content.append(self.execute_result()) | |
elif output.output_type == "stream": | |
content.append(self.stream_enter()) | |
if output.name == "stdout": | |
content.append(self.stream_stderr()) | |
elif output.name == "stderr": | |
content.append(self.stream_stdout()) | |
content.append(self.stream_exit()) | |
elif output.output_type == "display_data": | |
content.append(self.display_data()) | |
elif output.output_type == "error": | |
content.append(self.error_enter()) | |
content.append(self.error()) | |
content.append(self.error_exit()) | |
self._output = None | |
return "\n".join([c for c in content if c]) | |
def execute_result(self): | |
pass | |
def stream_enter(self): | |
pass | |
def stream_exit(self): | |
pass | |
def stream_stdout(self): | |
pass | |
def stream_stderr(self): | |
pass | |
def get_data_priority(self): | |
return ( | |
("application/pdf", self.data_pdf), | |
("image/svg+xml", self.data_svg), | |
("image/png", self.data_png), | |
("text/html", self.data_html), | |
("text/markdown", self.data_markdown), | |
("image/jpeg", self.data_jpg), | |
("text/plain", self.data_text), | |
("text/latex", self.data_latex), | |
("application/javascript", self.data_javascript), | |
("application/vnd.jupyter.widget-state+json", self.data_widget_state), | |
("application/vnd.jupyter.widget-view+json", self.data_widget_view), | |
) | |
def display_data(self): | |
try: | |
data_func = next( | |
dfunc | |
for dtype, dfunc in self.data_priority | |
if dtype in self.output.data | |
) | |
except StopIteration: | |
data_func = self.data_other | |
return data_func() | |
def error_enter(self): | |
pass | |
def error_exit(self): | |
pass | |
def error(self): | |
pass | |
def data_pdf(self): | |
pass | |
def data_svg(self): | |
pass | |
def data_png(self): | |
pass | |
def data_html(self): | |
pass | |
def data_markdown(self): | |
pass | |
def data_jpg(self): | |
pass | |
def data_text(self): | |
pass | |
def data_latex(self): | |
pass | |
def data_javascript(self): | |
pass | |
def data_widget_state(self): | |
pass | |
def data_widget_view(self): | |
pass | |
class DocBase: | |
output_handler_cls = OutputHandlerBase | |
def __init__(self, output_handler_cls=None): | |
self._notebook = None | |
self._resources = None | |
self._cell = None | |
self.codecell_output = output_handler_cls(self) | |
@property | |
def nb(self): | |
return self._notebook | |
@property | |
def resources(self): | |
return self._resources | |
@property | |
def cell(self): | |
return self._cell | |
def process_notebook(self, notebook, resources): | |
self._resources = resources | |
self._notebook = notebook | |
content = [] | |
content.append(self.header()) | |
content.append(self.body_enter()) | |
for cell in notebook.cells: | |
self._cell = cell | |
content.append(self.cell_enter()) | |
if cell.cell_type == "code": | |
if resources.global_content_filter.include_code: | |
content.append(self.codecell_enter()) | |
if resources.global_content_filter.include_input and not cell.get( | |
"transient", {} | |
).get("remove_source", False): | |
content.append(self.codecell_input_group_enter()) | |
if resources.global_content_filter.include_input_prompt: | |
content.append(self.input_prompt()) | |
content.append(self.codecell_input()) | |
content.append(self.codecell_input_group_exit()) | |
if cell.outputs and resources.global_content_filter.include_output: | |
content.append(self.codecell_output_group_enter()) | |
if resources.global_content_filter.include_output_prompt: | |
content.append(self.output_prompt()) | |
content.append(self.codecell_outputs_enter()) | |
for output_num, output in enumerate(cell.outputs): | |
content.append(self.codecell_output_enter()) | |
content.append(self.codecell_output(output, output_num)) | |
content.append(self.codecell_output_exit()) | |
content.append(self.codecell_outputs_exit()) | |
content.append(self.codecell_output_group_exit()) | |
content.append(self.codecell_exit()) | |
elif cell.cell_type in ["markdown"]: | |
if resources.global_content_filter.include_markdown and not cell.get( | |
"transient", {} | |
).get("remove_source", False): | |
content.append(self.markdown_cell()) | |
elif cell.cell_type in ["raw"]: | |
if resources.global_content_filter.include_raw and not cell.get( | |
"transient", {} | |
).get("remove_source", False): | |
content.append(self.raw_cell()) | |
else: | |
if resources.global_content_filter.include_unknown and not cell.get( | |
"transient", {} | |
).get("remove_source", False): | |
content.append(self.unknown_cell()) | |
content.append(self.cell_exit()) | |
content.append(self.body_exit()) | |
content.append(self.footer()) | |
self._resources = None | |
self._notebook = None | |
self._cell = None | |
return "\n".join(c for c in content if content) | |
def header(self): | |
pass | |
def footer(self): | |
pass | |
def body_enter(self): | |
pass | |
def body_exit(self): | |
pass | |
def cell_enter(self): | |
pass | |
def cell_exit(self): | |
pass | |
def codecell_enter(self): | |
pass | |
def codecell_exit(self): | |
pass | |
def codecell_input_group_enter(self): | |
pass | |
def codecell_input_group_exit(self): | |
pass | |
def codecell_in_prompt(self): | |
pass | |
def codecell_input(self): | |
pass | |
def codecell_output_group_enter(self): | |
pass | |
def codecell_output_group_exit(self): | |
pass | |
def codecell_out_prompt(self): | |
pass | |
def codecell_outputs_enter(self): | |
pass | |
def codecell_outputs_exit(self): | |
pass | |
def codecell_output_enter(self): | |
pass | |
def codecell_output_exit(self): | |
pass | |
def markdown_cell(self): | |
pass | |
def raw_cell(self): | |
if self.cell.metadata.get("raw_mimetype", "").lower() in self.resources.get( | |
"raw_mimetypes", [""] | |
): | |
return self.cell.source | |
def unknown_cell(self): | |
pass | |
class LatexBaseOutputHandler(OutputHandlerBase): | |
def execute_result(self): | |
"""Redirect execute_result to display data.""" | |
return self.display_data() | |
def error(self): | |
"""Display python error text as-is.""" | |
content = [r"\begin{Verbatim}[commandchars=\\\{\}]"] | |
for line in self.output.traceback: | |
content.append(chain_funcs(line, indent, strip_ansi, escape_latex)) | |
content.append(r"\end{Verbatim}") | |
return "\n".join(content) | |
def stream_enter(self): | |
"""Display stream output with coloring.""" | |
return "\n".join( | |
[ | |
r"\begin{verbatim}", | |
chain_funcs(self.output.text, escape_latex, ansi2latex), | |
r"\end{verbatim}", | |
] | |
) | |
def data_text(self): | |
"""Display simple data text.""" | |
return "\n".join( | |
[r"\begin{verbatim}", self.output.data["text/plain"], r"\end{verbatim}"] | |
) | |
def data_latex(self): | |
return strip_files_prefix(self.output.data["text/latex"]) | |
def data_markdown(self): | |
return chain_funcs( | |
self.output.data["text/markdown"], | |
citation2latex, | |
strip_files_prefix, | |
convert_pandoc("markdown+tex_math_double_backslash", "latex"), | |
) | |
def data_png(self): | |
return self.draw_figure(self.output.metadata.filenames["image/png"]) | |
def data_jpg(self): | |
return self.draw_figure(self.output.metadata.filenames["image/png"]) | |
def data_svg(self): | |
return self.draw_figure(self.output.metadata.filenames["image/png"]) | |
def data_pdf(self): | |
return self.draw_figure(self.output.metadata.filenames["image/png"]) | |
def draw_figure(self, filename): | |
filename = posix_path(posix_path) | |
return re.sub( | |
"filename", | |
filename, | |
dedent( | |
r"""\ | |
\begin{center} | |
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{filename} | |
\end{center} | |
{ \hspace*{\fill} \\}""" | |
), | |
) | |
class LatexBaseDoc(DocBase): | |
output_handler_cls = LatexBaseOutputHandler | |
def header(self): | |
return "\n".join( | |
c | |
for c in [ | |
self.head_docclass(), | |
self.head_packages(), | |
self.head_definitions(), | |
self.head_commands(), | |
] | |
if c | |
) | |
def head_docclass(self): | |
return r"\documentclass[11pt]{article}" | |
def head_packages(self): | |
return PACKAGES | |
def head_definitions(self): | |
return "\n".join( | |
c | |
for c in [DEFINITIONS, self.define_title(), self.define_date(), self.define_author()] | |
if c | |
) | |
def define_title(self): | |
"""Document title.""" | |
nb_title = ( | |
self.nb.metadata.get("title", "") or self.resources["metadata"]["name"] | |
) | |
return r"\title{{{}}}".format(escape_latex(nb_title)) | |
def define_date(self): | |
"""Document date.""" | |
pass | |
def define_author(self): | |
"""Document author.""" | |
if "authors" in self.nb.metadata: | |
return r"\author{{{}}}".format(", ".join(a.name for a in self.nb.metadata)) | |
def head_commands(self): | |
"""Prevent overflowing lines due to hard-to-break entities.""" | |
string = dedent(r"""\ | |
\sloppy | |
% Setup hyperref package | |
\hypersetup{ | |
breaklinks=true, % so long urls are correctly broken across lines | |
colorlinks=true, | |
urlcolor=urlcolor, | |
linkcolor=linkcolor, | |
citecolor=citecolor, | |
}""") | |
return "\n".join([string, self.cmnd_margins()]) | |
def cmnd_margins(self): | |
"""Slightly bigger margins than the latex defaults.""" | |
return r"\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}" | |
def body_enter(self): | |
return r"\begin{document}" | |
def body_exit(self): | |
return r"\end{document}" | |
def markdown_cell(self): | |
return chain_funcs( | |
self.cell.source, | |
citation2latex, | |
strip_files_prefix, | |
convert_pandoc("markdown+tex_math_double_backslash", "json", extra_args=[]), | |
resolve_references, | |
convert_pandoc("json", "latex"), | |
) | |
PACKAGES = r"""\ | |
\usepackage{iftex} | |
\ifPDFTeX | |
\usepackage[T1]{fontenc} | |
\usepackage{mathpazo} | |
\else | |
\usepackage{fontspec} | |
\fi | |
% Basic figure setup, for now with no caption control since it's done | |
% automatically by Pandoc (which extracts ![](path) syntax from Markdown). | |
\usepackage{graphicx} | |
% Maintain compatibility with old templates. Remove in nbconvert 6.0 | |
\let\Oldincludegraphics\includegraphics | |
% Ensure that by default, figures have no caption (until we provide a | |
% proper Figure object with a Caption API and a way to capture that | |
% in the conversion process - todo). | |
\usepackage{caption} | |
\DeclareCaptionFormat{nocaption}{} | |
\captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt} | |
\usepackage[Export]{adjustbox} % Used to constrain images to a maximum size | |
\adjustboxset{max size={0.9\linewidth}{0.9\paperheight}} | |
\usepackage{float} | |
\floatplacement{figure}{H} % forces figures to be placed at the correct location | |
\usepackage{xcolor} % Allow colors to be defined | |
\usepackage{enumerate} % Needed for markdown enumerations to work | |
\usepackage{geometry} % Used to adjust the document margins | |
\usepackage{amsmath} % Equations | |
\usepackage{amssymb} % Equations | |
\usepackage{textcomp} % defines textquotesingle | |
% Hack from http://tex.stackexchange.com/a/47451/13684: | |
\AtBeginDocument{% | |
\def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code | |
} | |
\usepackage{upquote} % Upright quotes for verbatim code | |
\usepackage{eurosym} % defines \euro | |
\usepackage[mathletters]{ucs} % Extended unicode (utf-8) support | |
\usepackage{fancyvrb} % verbatim replacement that allows latex | |
\usepackage{grffile} % extends the file name processing of package graphics | |
% to support a larger range | |
\makeatletter % fix for grffile with XeLaTeX | |
\def\Gread@@xetex#1{% | |
\IfFileExists{"\Gin@base".bb}% | |
{\Gread@eps{\Gin@base.bb}}% | |
{\Gread@@xetex@aux#1}% | |
} | |
\makeatother | |
% The hyperref package gives us a pdf with properly built | |
% internal navigation ('pdf bookmarks' for the table of contents, | |
% internal cross-reference links, web links for URLs, etc.) | |
\usepackage{hyperref} | |
% The default LaTeX title has an obnoxious amount of whitespace. By default, | |
% titling removes some of it. It also provides customization options. | |
\usepackage{titling} | |
\usepackage{longtable} % longtable support required by pandoc >1.10 | |
\usepackage{booktabs} % table support for pandoc > 1.12.2 | |
\usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment) | |
\usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout) | |
% normalem makes italics be italics, not underlines | |
\usepackage{mathrsfs} | |
""" | |
DEFINITIONS = r"""\ | |
% Colors for the hyperref package | |
\definecolor{urlcolor}{rgb}{0,.145,.698} | |
\definecolor{linkcolor}{rgb}{.71,0.21,0.01} | |
\definecolor{citecolor}{rgb}{.12,.54,.11} | |
% ANSI colors | |
\definecolor{ansi-black}{HTML}{3E424D} | |
\definecolor{ansi-black-intense}{HTML}{282C36} | |
\definecolor{ansi-red}{HTML}{E75C58} | |
\definecolor{ansi-red-intense}{HTML}{B22B31} | |
\definecolor{ansi-green}{HTML}{00A250} | |
\definecolor{ansi-green-intense}{HTML}{007427} | |
\definecolor{ansi-yellow}{HTML}{DDB62B} | |
\definecolor{ansi-yellow-intense}{HTML}{B27D12} | |
\definecolor{ansi-blue}{HTML}{208FFB} | |
\definecolor{ansi-blue-intense}{HTML}{0065CA} | |
\definecolor{ansi-magenta}{HTML}{D160C4} | |
\definecolor{ansi-magenta-intense}{HTML}{A03196} | |
\definecolor{ansi-cyan}{HTML}{60C6C8} | |
\definecolor{ansi-cyan-intense}{HTML}{258F8F} | |
\definecolor{ansi-white}{HTML}{C5C1B4} | |
\definecolor{ansi-white-intense}{HTML}{A1A6B2} | |
\definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF} | |
\definecolor{ansi-default-inverse-bg}{HTML}{000000} | |
% commands and environments needed by pandoc snippets | |
% extracted from the output of `pandoc -s` | |
\providecommand{\tightlist}{% | |
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} | |
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} | |
% Add ',fontsize=\small' for more characters per line | |
\newenvironment{Shaded}{}{} | |
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} | |
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}} | |
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} | |
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} | |
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} | |
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} | |
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} | |
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}} | |
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}} | |
\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} | |
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}} | |
\newcommand{\RegionMarkerTok}[1]{{#1}} | |
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} | |
\newcommand{\NormalTok}[1]{{#1}} | |
% Additional commands for more recent versions of Pandoc | |
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}} | |
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} | |
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} | |
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}} | |
\newcommand{\ImportTok}[1]{{#1}} | |
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}} | |
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} | |
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} | |
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}} | |
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} | |
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}} | |
\newcommand{\BuiltInTok}[1]{{#1}} | |
\newcommand{\ExtensionTok}[1]{{#1}} | |
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}} | |
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}} | |
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} | |
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} | |
% Define a nice break command that doesn't care if a line doesn't already | |
% exist. | |
\def\br{\hspace*{\fill} \\* } | |
% Math Jax compatibility definitions | |
\def\gt{>} | |
\def\lt{<} | |
\let\Oldtex\TeX | |
\let\Oldlatex\LaTeX | |
\renewcommand{\TeX}{\textrm{\Oldtex}} | |
\renewcommand{\LaTeX}{\textrm{\Oldlatex}} | |
""" | |
if __name__ == "__main__": | |
_latex_doc = LatexBaseDoc() | |
print(_latex_doc.process_notebook()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment