Skip to content

Instantly share code, notes, and snippets.

@chrisjsewell
Last active November 13, 2019 03:40
Show Gist options
  • Save chrisjsewell/90bd30d003894f78e3608d740c508a27 to your computer and use it in GitHub Desktop.
Save chrisjsewell/90bd30d003894f78e3608d740c508a27 to your computer and use it in GitHub Desktop.
A prototype implementation of an IMarkdown parser
from contextlib import contextmanager
from nbconvert.preprocessors.execute import (
ExecutePreprocessor,
CellExecutionComplete,
Empty,
)
from panflute import CodeBlock, convert_text, Div
from panflute.tools import meta2builtin
import yaml
class SourceExecuter(ExecutePreprocessor):
"""This is a first stab at an executor that runs directly on source code."""
@contextmanager
def setup_preprocessor(self):
self._display_id_map = {}
self.widget_state = {}
self.widget_buffers = {}
self.km, self.kc = self.start_new_kernel(cwd=None)
try:
yield self.km, self.kc
finally:
self.kc.stop_channels()
self.km.shutdown_kernel(now=self.shutdown_kernel == "immediate")
delattr(self, "km")
delattr(self, "kc")
def run_cell(self, source, cell_index=None):
parent_msg_id = self.kc.execute(source)
self.log.debug("Executing cell:\n%s", source)
exec_reply = self._wait_for_reply(parent_msg_id)
outputs = []
self.clear_before_next_output = False
while True:
try:
msg = self.kc.iopub_channel.get_msg(timeout=self.iopub_timeout)
except Empty:
self.log.warning("Timeout waiting for IOPub output")
if self.raise_on_iopub_timeout:
raise RuntimeError("Timeout waiting for IOPub output")
else:
break
if msg["parent_header"].get("msg_id") != parent_msg_id:
# not an output from our execution
continue
# Will raise CellExecutionComplete when completed
try:
self.process_message(msg, outputs, cell_index)
except CellExecutionComplete:
break
return exec_reply, outputs
def process_message(self, msg, outputs, cell_index):
msg_type = msg["msg_type"]
self.log.debug("msg_type: %s", msg_type)
content = msg["content"]
self.log.debug("content: %s", content)
display_id = content.get("transient", {}).get("display_id", None)
if display_id and msg_type in {
"execute_result",
"display_data",
"update_display_data",
}:
self._update_display_id(display_id, msg)
if msg_type == "status":
if content["execution_state"] == "idle":
raise CellExecutionComplete()
elif msg_type.startswith("comm"):
self.handle_comm_msg(outputs, msg, cell_index)
# Check for remaining messages we don't process
elif msg_type not in ["execute_input", "update_display_data"]:
# Assign output as our processed "result"
return self.output(outputs, msg, display_id, cell_index)
def insert_missing_meta(content, doc_level_metadata):
missing_meta = CodeBlock(
yaml.safe_dump(dict(doc_level_metadata.get("imd", {}).get("global_meta", {}))),
classes=["yaml", "metadata"],
)
content.insert(0, missing_meta)
with open("test.md") as handle:
doc = convert_text(handle.read(), input_format="markdown", standalone=True)
doc_metadata = meta2builtin(doc.metadata)
doc_content = []
cell_content = []
found_metadata = False
cell_number = 1
executer = SourceExecuter(
kernel_name=doc_metadata.get("kernelspec", {}).get("name", "python")
)
with executer.setup_preprocessor():
for element in doc.content:
if isinstance(element, CodeBlock) and "yaml" in element.classes:
if not found_metadata:
# add missing metadata element to previous cell
insert_missing_meta(cell_content, doc_metadata)
# save previous cell
doc_content.append(
Div(
*cell_content,
attributes={"cell-number": str(cell_number)},
classes=["nb-cell", "markdown"]
)
)
# add global metadata to code metadata
cell_meta = yaml.safe_load(element.text)
cell_meta.update(doc_metadata.get("imd", {}).get("global_meta", {}))
element.text = yaml.safe_dump(cell_meta)
# start a new cell
cell_number += 1
cell_content = [element]
found_metadata = True
elif isinstance(element, CodeBlock):
# handle if a code cell, with no metadata, is before a markdown cell,
# also with no metadata
if cell_content and not found_metadata or len(cell_content) > 1:
if not found_metadata:
insert_missing_meta(cell_content, doc_metadata)
doc_content.append(
Div(
*cell_content,
attributes={"cell-number": str(cell_number)},
classes=["nb-cell", "markdown"]
)
)
cell_number += 1
cell_content = []
insert_missing_meta(cell_content, doc_metadata)
elif not found_metadata:
insert_missing_meta(cell_content, doc_metadata)
cell_content.append(element)
# run cell, and add outputs to document
if "python" in element.classes:
exec_reply, outputs = executer.run_cell(element.text, cell_number)
cell_content.append(
CodeBlock(str(outputs), classes=["json", "outputs"])
)
doc_content.append(
Div(
*cell_content,
attributes={"cell-number": str(cell_number)},
classes=["nb-cell", "code"]
)
)
# start a new cell
cell_number += 1
cell_content = []
found_metadata = False
else:
cell_content.append(element)
# TODO deal with any remaining cell_content
doc.content = doc_content
with open("test_step1.md", "w") as handle:
handle.write(
convert_text(
doc, input_format="panflute", output_format="markdown", standalone=True
)
)
kernelspec imd
name
python3
global_meta
centre
true

Header

Hallo there, how are you?

imd:
    type: note

Here's a note

a = 1
print("I've just been run :)")
a

The next code cell has metadata.

imd:
    type: figure
var i;
for (i = 0; i < cars.length; i++) {
  text += cars[i] + "<br>";
}
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
<title>Untitled</title>
<style>
code {
white-space: pre-wrap;
}
span.smallcaps {
font-variant: small-caps;
}
span.underline {
text-decoration: underline;
}
div.column {
display: inline-block;
vertical-align: top;
width: 50%;
}
</style>
<style>
code.sourceCode>span {
display: inline-block;
line-height: 1.25;
}
code.sourceCode>span {
color: inherit;
text-decoration: inherit;
}
code.sourceCode>span:empty {
height: 1.2em;
}
.sourceCode {
overflow: visible;
}
code.sourceCode {
white-space: pre;
position: relative;
}
div.sourceCode {
margin: 1em 0;
}
pre.sourceCode {
margin: 0;
}
@media screen {
div.sourceCode {
overflow: auto;
}
}
@media print {
code.sourceCode {
white-space: pre-wrap;
}
code.sourceCode>span {
text-indent: -5em;
padding-left: 5em;
}
}
pre.numberSource code {
counter-reset: source-line 0;
}
pre.numberSource code>span {
position: relative;
left: -4em;
counter-increment: source-line;
}
pre.numberSource code>span>a:first-child::before {
content: counter(source-line);
position: relative;
left: -1em;
text-align: right;
vertical-align: baseline;
border: none;
display: inline-block;
-webkit-touch-callout: none;
-webkit-user-select: none;
-khtml-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
padding: 0 4px;
width: 4em;
color: #aaaaaa;
}
pre.numberSource {
margin-left: 3em;
border-left: 1px solid #aaaaaa;
padding-left: 4px;
}
div.sourceCode {}
@media screen {
code.sourceCode>span>a:first-child::before {
text-decoration: underline;
}
}
code span.al {
color: #ff0000;
font-weight: bold;
}
/* Alert */
code span.an {
color: #60a0b0;
font-weight: bold;
font-style: italic;
}
/* Annotation */
code span.at {
color: #7d9029;
}
/* Attribute */
code span.bn {
color: #40a070;
}
/* BaseN */
code span.bu {}
/* BuiltIn */
code span.cf {
color: #007020;
font-weight: bold;
}
/* ControlFlow */
code span.ch {
color: #4070a0;
}
/* Char */
code span.cn {
color: #880000;
}
/* Constant */
code span.co {
color: #60a0b0;
font-style: italic;
}
/* Comment */
code span.cv {
color: #60a0b0;
font-weight: bold;
font-style: italic;
}
/* CommentVar */
code span.do {
color: #ba2121;
font-style: italic;
}
/* Documentation */
code span.dt {
color: #902000;
}
/* DataType */
code span.dv {
color: #40a070;
}
/* DecVal */
code span.er {
color: #ff0000;
font-weight: bold;
}
/* Error */
code span.ex {}
/* Extension */
code span.fl {
color: #40a070;
}
/* Float */
code span.fu {
color: #06287e;
}
/* Function */
code span.im {}
/* Import */
code span.in {
color: #60a0b0;
font-weight: bold;
font-style: italic;
}
/* Information */
code span.kw {
color: #007020;
font-weight: bold;
}
/* Keyword */
code span.op {
color: #666666;
}
/* Operator */
code span.ot {
color: #007020;
}
/* Other */
code span.pp {
color: #bc7a00;
}
/* Preprocessor */
code span.sc {
color: #4070a0;
}
/* SpecialChar */
code span.ss {
color: #bb6688;
}
/* SpecialString */
code span.st {
color: #4070a0;
}
/* String */
code span.va {
color: #19177c;
}
/* Variable */
code span.vs {
color: #4070a0;
}
/* VerbatimString */
code span.wa {
color: #60a0b0;
font-weight: bold;
font-style: italic;
}
/* Warning */
</style>
<!--[if lt IE 9]>
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
<![endif]-->
</head>
<body>
<div class="nb-cell markdown" data-cell-number="1">
<div class="sourceCode" id="cb1">
<pre
class="sourceCode yaml metadata"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">{</span><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="kw">}</span></span></code></pre>
</div>
<h1 id="header">Header</h1>
</div>
<div class="nb-cell markdown" data-cell-number="2">
<div class="sourceCode" id="cb2">
<pre
class="sourceCode yaml"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1"></a><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb2-2"><a href="#cb2-2"></a><span class="fu">imd</span><span class="kw">:</span><span class="at"> </span><span class="kw">{</span><span class="fu">type</span><span class="kw">:</span><span class="at"> other</span><span class="kw">}</span></span></code></pre>
</div>
<p>Hallo <em>there</em>, how are <strong>you</strong>?</p>
</div>
<div class="nb-cell markdown" data-cell-number="3">
<div class="sourceCode" id="cb3">
<pre
class="sourceCode yaml"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1"></a><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb3-2"><a href="#cb3-2"></a><span class="fu">imd</span><span class="kw">:</span><span class="at"> </span><span class="kw">{</span><span class="fu">type</span><span class="kw">:</span><span class="at"> note</span><span class="kw">}</span></span></code></pre>
</div>
<p>Here’s a note</p>
</div>
<div class="nb-cell code" data-cell-number="4">
<div class="sourceCode" id="cb4">
<pre
class="sourceCode yaml metadata"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw">{</span><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="kw">}</span></span></code></pre>
</div>
<div class="sourceCode" id="cb5">
<pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>a <span class="op">=</span> <span class="dv">1</span></span>
<span id="cb5-2"><a href="#cb5-2"></a><span class="bu">print</span>(<span class="st">&quot;I&#39;ve just been run :)&quot;</span>)</span>
<span id="cb5-3"><a href="#cb5-3"></a>a</span></code></pre>
</div>
<div class="sourceCode" id="cb6">
<pre
class="sourceCode json outputs"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1"></a><span class="ot">[</span><span class="fu">{</span><span class="er">&#39;output_type&#39;</span><span class="fu">:</span> <span class="er">&#39;stream&#39;</span><span class="fu">,</span> <span class="er">&#39;name&#39;</span><span class="fu">:</span> <span class="er">&#39;stdout&#39;</span><span class="fu">,</span> <span class="er">&#39;text&#39;</span><span class="fu">:</span> <span class="st">&quot;I&#39;ve just been run :)</span><span class="ch">\n</span><span class="st">&quot;</span><span class="fu">}</span><span class="ot">,</span> <span class="fu">{</span><span class="er">&#39;output_type&#39;</span><span class="fu">:</span> <span class="er">&#39;execute_result&#39;</span><span class="fu">,</span> <span class="er">&#39;metadata&#39;</span><span class="fu">:</span> <span class="fu">{},</span> <span class="er">&#39;data&#39;</span><span class="fu">:</span> <span class="fu">{</span><span class="er">&#39;text/plain&#39;</span><span class="fu">:</span> <span class="er">&#39;</span><span class="dv">1</span><span class="er">&#39;</span><span class="fu">},</span> <span class="er">&#39;execution_count&#39;</span><span class="fu">:</span> <span class="dv">1</span><span class="fu">}</span><span class="ot">]</span></span></code></pre>
</div>
</div>
<div class="nb-cell markdown" data-cell-number="5">
<div class="sourceCode" id="cb7">
<pre
class="sourceCode yaml metadata"><code class="sourceCode yaml"><span id="cb7-1"><a href="#cb7-1"></a><span class="kw">{</span><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="kw">}</span></span></code></pre>
</div>
<p>The next code cell has metadata.</p>
</div>
<div class="nb-cell code" data-cell-number="6">
<div class="sourceCode" id="cb8">
<pre
class="sourceCode yaml"><code class="sourceCode yaml"><span id="cb8-1"><a href="#cb8-1"></a><span class="fu">centre</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb8-2"><a href="#cb8-2"></a><span class="fu">imd</span><span class="kw">:</span><span class="at"> </span><span class="kw">{</span><span class="fu">type</span><span class="kw">:</span><span class="at"> figure</span><span class="kw">}</span></span></code></pre>
</div>
<div class="sourceCode" id="cb9">
<pre class="sourceCode javascript"><code class="sourceCode javascript"><span id="cb9-1"><a href="#cb9-1"></a><span class="kw">var</span> i<span class="op">;</span></span>
<span id="cb9-2"><a href="#cb9-2"></a><span class="cf">for</span> (i <span class="op">=</span> <span class="dv">0</span><span class="op">;</span> i <span class="op">&lt;</span> <span class="va">cars</span>.<span class="at">length</span><span class="op">;</span> i<span class="op">++</span>) <span class="op">{</span></span>
<span id="cb9-3"><a href="#cb9-3"></a> text <span class="op">+=</span> cars[i] <span class="op">+</span> <span class="st">&quot;&lt;br&gt;&quot;</span><span class="op">;</span></span>
<span id="cb9-4"><a href="#cb9-4"></a><span class="op">}</span></span></code></pre>
</div>
</div>
</body>
</html>
imd kernelspec
global_meta
centre
true
name
python3

::: {.nb-cell .markdown cell-number="1"}

{centre: true}

Header

:::

::: {.nb-cell .markdown cell-number="2"}

centre: true
imd: {type: other}

Hallo there, how are you? :::

::: {.nb-cell .markdown cell-number="3"}

centre: true
imd: {type: note}

Here's a note :::

::: {.nb-cell .code cell-number="4"}

{centre: true}
a = 1
print("I've just been run :)")
a
[{'output_type': 'stream', 'name': 'stdout', 'text': "I've just been run :)\n"}, {'output_type': 'execute_result', 'metadata': {}, 'data': {'text/plain': '1'}, 'execution_count': 1}]

:::

::: {.nb-cell .markdown cell-number="5"}

{centre: true}

The next code cell has metadata. :::

::: {.nb-cell .code cell-number="6"}

centre: true
imd: {type: figure}
var i;
for (i = 0; i < cars.length; i++) {
  text += cars[i] + "<br>";
}

:::

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment