Skip to content

Instantly share code, notes, and snippets.

@tonyfast
Created December 7, 2024 03:43
Show Gist options
  • Save tonyfast/8f86485d91b3d1129c9598ffc15db64c to your computer and use it in GitHub Desktop.
Save tonyfast/8f86485d91b3d1129c9598ffc15db64c to your computer and use it in GitHub Desktop.
import { program } from "commander";
import * as fs from 'fs';
import path from "path";
import markdownit from "markdown-it";
import * as d3 from 'd3';
import hljs from 'highlight.js';
// folx warned me about jsdom
import slugify from "slugify";
import { DOMParser, parseHTML } from 'linkedom';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const HEADINGS = "h1,h2,h3,h4,h5,h6,[role=heading]";
program.version("2024-11-30").arguments("<file>").parse(process.argv)
const print = console.log;
function err(object) {
process.stderr.write(JSON.stringify(object) + "\n")
}
const IDREF_SELECTORS = ["id", "aria-labelledby", "aria-describedby", "aria-controls", "aria-owns", "form"];
const IDREF_SELECTOR = IDREF_SELECTORS.map(x => `[${x}]`).join(",");
const default_css = fs.readFileSync(path.resolve(__dirname, "./refnb.css"));
const default_settings = fs.readFileSync(path.resolve(__dirname, "./settings.js"));
const TPL_DOC = fs.readFileSync(path.resolve(__dirname, "./refnb.html"));
const TPL_DOM = newDocument();
const TPL_CELL_ROW = TPL_DOM.select("#tpl\\:cell-row").node().content.querySelector("tr");
const TPL_OUTPUT_TABLE = TPL_DOM.select("#tpl\\:output").node().content.querySelector("table");
const TPL_OUTPUT_ROW = TPL_DOM.select("#tpl\\:output-row").node().content.querySelector("tr");
const TPL_TOC_ROW = TPL_DOM.select("#tpl\\:toc").node().content.querySelector("tr");
const CELL_TYPES = ["code", "markdown", "raw"]
const OUTPUT_TYPES = ["display_data", "execute_result"]
const LITERATE = /^\s*%{2}\s+/m;
program.args.forEach(
(file, i, args) => {
var nb = JSON.parse(fs.readFileSync(file));
var document = newDocument();
notebook(nb, document);
process.stdout.write(document.select("html").node().outerHTML);
}
)
function newDocument() {
const { _, document } = parseHTML(String(TPL_DOC));
return d3.select(document);
}
function notebook(nb, document) {
let cells = document.select("section").datum(nb).call(
enterNotebook
)
exitNotebook(document)
}
function cellsKey(d, i) {
return [d.id, d.cell_type == "code" ? d.execution_count : d.source]
}
function outputKey(d, i) {
return i
}
function enterNotebook(enter) {
enter.select("table.cells > tbody").selectAll(
"tr.cell"
).data(enter.datum().cells, cellsKey).join(
(enter) => {
enter.append(
(cell, i) => {
return d3.select(TPL_CELL_ROW.cloneNode(true))
.datum(Object.assign({}, cell, { position: i }))
.call(enterCell).node()
}
);
},
update => update,
exit => exit.remove()
)
}
function enterCell(cell_row) {
let cell = cell_row.datum();
let i = cell.position;
let cell_id = `c${i}`;
cell_row.attr("aria-posinset", i + 1)
CELL_TYPES.forEach(t => cell_row.classed(t, cell.cell_type == t));
cell_row.select("td.cell_type>label").text(cell.cell_type)
cell_row.select(`option[value="${cell.cell_type}"]`).attr("selected", "")
enterCellCode(cell_id, cell_row, cell, i)
enterCellIds(cell_id, cell_row)
// calculate triviality
if (cell.cell_type == "markdown") {
cell.outputs = [{ output_type: "display_data", data: { "text/markdown": cell.source } }]
}
cell.outputs?.length > 0 && cell_row.call(enterOutputs)
}
function enterCellCode(cell_id, cell_row, cell, i) {
cell_row.select("th.pos>a").text(i + 1).attr("href", `#${cell_id}`);
cell_row.select("th.id>a").text(cell.id).attr("href", `#${cell.id}`);
cell.execution_count && cell_row.select("td.execution_count>label>output").text(cell.execution_count);
let source = cell.source.join("")
let loc = cell.source.filter(Boolean).length
cell_row.select(".loc>output").text(loc);
cell_row.attr("data-loc", loc)
cell_row.select("td.source>textarea").text(source)
cell_row.classed("markdown", cell.cell_type == "markdown" || Boolean(source.match(LITERATE)))
let highlighted = hljs.highlight(source, {
language:
cell.cell_type == "code" ? "python" : "markdown"
});
cell_row.select("td.source>section.highlight").html(
`<pre><code>${highlighted.value}</code></pre>`
)
}
function enterOutputs(selection) {
let cell = selection.datum();
selection.select("td.outputs").append(
d => TPL_OUTPUT_TABLE.cloneNode(true)
).selectAll("tbody").data(
cell.outputs || []
).join("tbody").selectAll("tr").data(
d => d.data ? Object.entries(d.data).map(x => x.concat([d.metadata])) : [d], outputKey
).join(
(enter) => enter.append(
(output, i) => d3.select(TPL_OUTPUT_ROW.cloneNode(true))
.datum(output)
.call(enterOutput).node()),
update => update, // updateOutput method that will be used with markdown cells to avoid extra work
exit => exit.remove()
)
}
function enterOutput(output_row) {
let output = output_row.datum();
if (OUTPUT_TYPES.includes(output.output_type)) {
enterOutputBundle(output_row, output)
}
}
function enterOutputBundle() {
let [type, bundle, metadata] = output;
output_row.select("td.output_type>label").text(type);
if (type == "text/html") {
output_row.select("td.data").html(bundle.join(""))
} else if (type == "text/markdown") {
output_row.select("td.data").html(
markdownify(bundle.join(""))
).call(replaceAttachments)
} else if (type == "text/plain") {
output_row.select("td.data").append("samp").text(bundle.join(""))
} else if (type.startsWith("image")) {
// we need the fucking cell metadata to attachments in for markdown cells
// svgs need different treatment
let img = output_row.select("td.data").append("img");
// alt text?
img.attr("src", `data:${type};base64,${bundle.join("")}`);
}
}
function enterCellIds(cell_id, cell_row) {
cell_row.selectAll(IDREF_SELECTOR).each((data, i, nodes) => {
let node = d3.select(nodes[i]);
for (const ref of IDREF_SELECTORS) {
let refs = node.attr(ref);
if (!refs) { continue }
refs = refs.split(" ");
let idref = ""
for (let [j, label] of refs.entries()) {
idref += idref ? " " : "";
if (label.startsWith(":")) {
idref += cell_id
} else if (label == "#") {
label = cell_id;
}
idref += label;
}
node.attr(ref, idref)
}
})
}
function exitNotebook(document) {
// document level / non-notebook tasks
// inject css for this specific template
let nb = document.select('section').datum()
// replace style placement with actual css
document.select("style[src='refnb.css']").attr("src", null).text(default_css)
// replace script place holder with the source required to operate the settings in readonly mode
document.select("script[src='settings.js']").attr("src", null).text(default_settings)
// ensure headings have ideas and probably consider uniqueness
// i need to remember id be pissed if someone stole my id
Array.from(document.selectAll(HEADINGS)).forEach(node => headingToId(d3.select(node), document.node()))
enterToc(document)
enterSummary(document)
document.select("a.perma").attr("id", "h1")
}
function enterToc(document) {
document.select("table.nb.toc").select("tbody").selectAll("tr").data(
Array.from(document.selectAll(HEADINGS)).map(
(element) => {
return {
heading: element.innerText,
cell: Number(element.closest("tr.cell[aria-posinset]").getAttribute("aria-posinset")),
level: Number(element.tagName[1]),
id: element.getAttribute("id")
}
}
)
).join(
enterTocRow,
update => update,
exit => exit.remove()
)
}
function enterTocRow(selection) {
selection.append(
(entry, i) => {
let toc_row = d3.select(TPL_TOC_ROW.cloneNode(true));
toc_row.select(".level").text(entry.level)
toc_row.select(".heading > a").text(entry.heading).attr("href", `#${entry.id}`)
toc_row.select(".cell > a").text(entry.cell).attr("href", `#c${entry.cell}`)
return toc_row.node()
}
);
}
function enterSummary(document) {
let nb = document.select("section").datum();
let loc = Array.from(document.selectAll(".cell[data-loc]")).map(
(element) => {
return Number(element.dataset.loc)
}
).reduce((a, b) => a + b)
document.select("#cells\\:loc").text(loc)
document.select("#cells\\:total").text(nb.cells.length)
document.select("#cells\\:md").text(nb.cells.filter(
x => x.cell_type == "markdown"
).length)
document.select("#cells\\:code").text(nb.cells.filter(
x => x.cell_type == "code"
).length)
document.select("#cells\\:outputs").text(
nb.cells.filter(
x => x.outputs
).reduce((p, n) => p + n.outputs.length, 0)
)
let current_execution_count = 0
let monotonic = true
let partial = false
for (const cell of nb.cells) {
let trivial = cell.source.map(x => x.trim()).filter(Boolean).length;
if (trivial) {
if (cell.execution_count) {
monotonic = false
break
}
continue
}
if (cell.cell_type == "code" && cell.execution_count) {
if (cell.execution_count == current_execution_count + 1) {
current_execution_count += 1
} else {
monotonic = false
break
}
} else {
partial = true
}
}
let state = monotonic ? "executed in order" : "executed out of order";
if (partial) {
state = "partially " + state;
}
document.select("#cells\\:state").text(state)
}
function markdownify(body) {
return markdownit({
html: true,
linkify: true,
highlight: function (str, lang) {
if (lang && hljs.getLanguage(lang)) {
try {
return hljs.highlight(str, { language: lang }).value;
} catch (__) { }
}
return ''; // use external default escaping
}
}).render(body)
}
function replaceAttachments(selection) {
let [type, bundle, metadata] = selection.datum();
let cell = selection.node().closest("tr.cell")?.datum();
if (!cell) {
return
}
select.selectAll("img").each(
(_, j, nodes) => {
let img = d3.select(nodes[j]);
if (cell.attachments && img.attr("src").startsWith("attachment:")) {
let [_, attachment] = img.attr("src").split(":", 2);
let bundle = cell.attachments[attachment];
if (bundle) {
for ([type, bundle] of Object.entries(bundle)) {
img.attr("src", `data:${type};base64,${bundle}`)
break
}
}
}
}
)
}
function headingToId(heading, document) {
let id = heading.attr("id") || slugify(heading.text());
heading.attr("id", id)
let a = document.createElement("a")
d3.select(a).attr("href", `#${id}`).attr("aria-labelledby", id).text("🔗").classed("perma", true)
let h = heading.node();
// it makes sense for the link to be before the header.
// mvoing forward int eh document confirms the location.
h.insertBefore(a, h)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment