Created
December 7, 2024 03:43
-
-
Save tonyfast/8f86485d91b3d1129c9598ffc15db64c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { program } from "commander"; | |
import * as fs from 'fs'; | |
import path from "path"; | |
import markdownit from "markdown-it"; | |
import * as d3 from 'd3'; | |
import hljs from 'highlight.js'; | |
// folx warned me about jsdom | |
import slugify from "slugify"; | |
import { DOMParser, parseHTML } from 'linkedom'; | |
import { fileURLToPath } from 'url'; | |
const __filename = fileURLToPath(import.meta.url); | |
const __dirname = path.dirname(__filename); | |
const HEADINGS = "h1,h2,h3,h4,h5,h6,[role=heading]"; | |
program.version("2024-11-30").arguments("<file>").parse(process.argv) | |
const print = console.log; | |
function err(object) { | |
process.stderr.write(JSON.stringify(object) + "\n") | |
} | |
const IDREF_SELECTORS = ["id", "aria-labelledby", "aria-describedby", "aria-controls", "aria-owns", "form"]; | |
const IDREF_SELECTOR = IDREF_SELECTORS.map(x => `[${x}]`).join(","); | |
const default_css = fs.readFileSync(path.resolve(__dirname, "./refnb.css")); | |
const default_settings = fs.readFileSync(path.resolve(__dirname, "./settings.js")); | |
const TPL_DOC = fs.readFileSync(path.resolve(__dirname, "./refnb.html")); | |
const TPL_DOM = newDocument(); | |
const TPL_CELL_ROW = TPL_DOM.select("#tpl\\:cell-row").node().content.querySelector("tr"); | |
const TPL_OUTPUT_TABLE = TPL_DOM.select("#tpl\\:output").node().content.querySelector("table"); | |
const TPL_OUTPUT_ROW = TPL_DOM.select("#tpl\\:output-row").node().content.querySelector("tr"); | |
const TPL_TOC_ROW = TPL_DOM.select("#tpl\\:toc").node().content.querySelector("tr"); | |
const CELL_TYPES = ["code", "markdown", "raw"] | |
const OUTPUT_TYPES = ["display_data", "execute_result"] | |
const LITERATE = /^\s*%{2}\s+/m; | |
program.args.forEach( | |
(file, i, args) => { | |
var nb = JSON.parse(fs.readFileSync(file)); | |
var document = newDocument(); | |
notebook(nb, document); | |
process.stdout.write(document.select("html").node().outerHTML); | |
} | |
) | |
function newDocument() { | |
const { _, document } = parseHTML(String(TPL_DOC)); | |
return d3.select(document); | |
} | |
function notebook(nb, document) { | |
let cells = document.select("section").datum(nb).call( | |
enterNotebook | |
) | |
exitNotebook(document) | |
} | |
function cellsKey(d, i) { | |
return [d.id, d.cell_type == "code" ? d.execution_count : d.source] | |
} | |
function outputKey(d, i) { | |
return i | |
} | |
function enterNotebook(enter) { | |
enter.select("table.cells > tbody").selectAll( | |
"tr.cell" | |
).data(enter.datum().cells, cellsKey).join( | |
(enter) => { | |
enter.append( | |
(cell, i) => { | |
return d3.select(TPL_CELL_ROW.cloneNode(true)) | |
.datum(Object.assign({}, cell, { position: i })) | |
.call(enterCell).node() | |
} | |
); | |
}, | |
update => update, | |
exit => exit.remove() | |
) | |
} | |
function enterCell(cell_row) { | |
let cell = cell_row.datum(); | |
let i = cell.position; | |
let cell_id = `c${i}`; | |
cell_row.attr("aria-posinset", i + 1) | |
CELL_TYPES.forEach(t => cell_row.classed(t, cell.cell_type == t)); | |
cell_row.select("td.cell_type>label").text(cell.cell_type) | |
cell_row.select(`option[value="${cell.cell_type}"]`).attr("selected", "") | |
enterCellCode(cell_id, cell_row, cell, i) | |
enterCellIds(cell_id, cell_row) | |
// calculate triviality | |
if (cell.cell_type == "markdown") { | |
cell.outputs = [{ output_type: "display_data", data: { "text/markdown": cell.source } }] | |
} | |
cell.outputs?.length > 0 && cell_row.call(enterOutputs) | |
} | |
function enterCellCode(cell_id, cell_row, cell, i) { | |
cell_row.select("th.pos>a").text(i + 1).attr("href", `#${cell_id}`); | |
cell_row.select("th.id>a").text(cell.id).attr("href", `#${cell.id}`); | |
cell.execution_count && cell_row.select("td.execution_count>label>output").text(cell.execution_count); | |
let source = cell.source.join("") | |
let loc = cell.source.filter(Boolean).length | |
cell_row.select(".loc>output").text(loc); | |
cell_row.attr("data-loc", loc) | |
cell_row.select("td.source>textarea").text(source) | |
cell_row.classed("markdown", cell.cell_type == "markdown" || Boolean(source.match(LITERATE))) | |
let highlighted = hljs.highlight(source, { | |
language: | |
cell.cell_type == "code" ? "python" : "markdown" | |
}); | |
cell_row.select("td.source>section.highlight").html( | |
`<pre><code>${highlighted.value}</code></pre>` | |
) | |
} | |
function enterOutputs(selection) { | |
let cell = selection.datum(); | |
selection.select("td.outputs").append( | |
d => TPL_OUTPUT_TABLE.cloneNode(true) | |
).selectAll("tbody").data( | |
cell.outputs || [] | |
).join("tbody").selectAll("tr").data( | |
d => d.data ? Object.entries(d.data).map(x => x.concat([d.metadata])) : [d], outputKey | |
).join( | |
(enter) => enter.append( | |
(output, i) => d3.select(TPL_OUTPUT_ROW.cloneNode(true)) | |
.datum(output) | |
.call(enterOutput).node()), | |
update => update, // updateOutput method that will be used with markdown cells to avoid extra work | |
exit => exit.remove() | |
) | |
} | |
function enterOutput(output_row) { | |
let output = output_row.datum(); | |
if (OUTPUT_TYPES.includes(output.output_type)) { | |
enterOutputBundle(output_row, output) | |
} | |
} | |
function enterOutputBundle() { | |
let [type, bundle, metadata] = output; | |
output_row.select("td.output_type>label").text(type); | |
if (type == "text/html") { | |
output_row.select("td.data").html(bundle.join("")) | |
} else if (type == "text/markdown") { | |
output_row.select("td.data").html( | |
markdownify(bundle.join("")) | |
).call(replaceAttachments) | |
} else if (type == "text/plain") { | |
output_row.select("td.data").append("samp").text(bundle.join("")) | |
} else if (type.startsWith("image")) { | |
// we need the fucking cell metadata to attachments in for markdown cells | |
// svgs need different treatment | |
let img = output_row.select("td.data").append("img"); | |
// alt text? | |
img.attr("src", `data:${type};base64,${bundle.join("")}`); | |
} | |
} | |
function enterCellIds(cell_id, cell_row) { | |
cell_row.selectAll(IDREF_SELECTOR).each((data, i, nodes) => { | |
let node = d3.select(nodes[i]); | |
for (const ref of IDREF_SELECTORS) { | |
let refs = node.attr(ref); | |
if (!refs) { continue } | |
refs = refs.split(" "); | |
let idref = "" | |
for (let [j, label] of refs.entries()) { | |
idref += idref ? " " : ""; | |
if (label.startsWith(":")) { | |
idref += cell_id | |
} else if (label == "#") { | |
label = cell_id; | |
} | |
idref += label; | |
} | |
node.attr(ref, idref) | |
} | |
}) | |
} | |
function exitNotebook(document) { | |
// document level / non-notebook tasks | |
// inject css for this specific template | |
let nb = document.select('section').datum() | |
// replace style placement with actual css | |
document.select("style[src='refnb.css']").attr("src", null).text(default_css) | |
// replace script place holder with the source required to operate the settings in readonly mode | |
document.select("script[src='settings.js']").attr("src", null).text(default_settings) | |
// ensure headings have ideas and probably consider uniqueness | |
// i need to remember id be pissed if someone stole my id | |
Array.from(document.selectAll(HEADINGS)).forEach(node => headingToId(d3.select(node), document.node())) | |
enterToc(document) | |
enterSummary(document) | |
document.select("a.perma").attr("id", "h1") | |
} | |
function enterToc(document) { | |
document.select("table.nb.toc").select("tbody").selectAll("tr").data( | |
Array.from(document.selectAll(HEADINGS)).map( | |
(element) => { | |
return { | |
heading: element.innerText, | |
cell: Number(element.closest("tr.cell[aria-posinset]").getAttribute("aria-posinset")), | |
level: Number(element.tagName[1]), | |
id: element.getAttribute("id") | |
} | |
} | |
) | |
).join( | |
enterTocRow, | |
update => update, | |
exit => exit.remove() | |
) | |
} | |
function enterTocRow(selection) { | |
selection.append( | |
(entry, i) => { | |
let toc_row = d3.select(TPL_TOC_ROW.cloneNode(true)); | |
toc_row.select(".level").text(entry.level) | |
toc_row.select(".heading > a").text(entry.heading).attr("href", `#${entry.id}`) | |
toc_row.select(".cell > a").text(entry.cell).attr("href", `#c${entry.cell}`) | |
return toc_row.node() | |
} | |
); | |
} | |
function enterSummary(document) { | |
let nb = document.select("section").datum(); | |
let loc = Array.from(document.selectAll(".cell[data-loc]")).map( | |
(element) => { | |
return Number(element.dataset.loc) | |
} | |
).reduce((a, b) => a + b) | |
document.select("#cells\\:loc").text(loc) | |
document.select("#cells\\:total").text(nb.cells.length) | |
document.select("#cells\\:md").text(nb.cells.filter( | |
x => x.cell_type == "markdown" | |
).length) | |
document.select("#cells\\:code").text(nb.cells.filter( | |
x => x.cell_type == "code" | |
).length) | |
document.select("#cells\\:outputs").text( | |
nb.cells.filter( | |
x => x.outputs | |
).reduce((p, n) => p + n.outputs.length, 0) | |
) | |
let current_execution_count = 0 | |
let monotonic = true | |
let partial = false | |
for (const cell of nb.cells) { | |
let trivial = cell.source.map(x => x.trim()).filter(Boolean).length; | |
if (trivial) { | |
if (cell.execution_count) { | |
monotonic = false | |
break | |
} | |
continue | |
} | |
if (cell.cell_type == "code" && cell.execution_count) { | |
if (cell.execution_count == current_execution_count + 1) { | |
current_execution_count += 1 | |
} else { | |
monotonic = false | |
break | |
} | |
} else { | |
partial = true | |
} | |
} | |
let state = monotonic ? "executed in order" : "executed out of order"; | |
if (partial) { | |
state = "partially " + state; | |
} | |
document.select("#cells\\:state").text(state) | |
} | |
function markdownify(body) { | |
return markdownit({ | |
html: true, | |
linkify: true, | |
highlight: function (str, lang) { | |
if (lang && hljs.getLanguage(lang)) { | |
try { | |
return hljs.highlight(str, { language: lang }).value; | |
} catch (__) { } | |
} | |
return ''; // use external default escaping | |
} | |
}).render(body) | |
} | |
function replaceAttachments(selection) { | |
let [type, bundle, metadata] = selection.datum(); | |
let cell = selection.node().closest("tr.cell")?.datum(); | |
if (!cell) { | |
return | |
} | |
select.selectAll("img").each( | |
(_, j, nodes) => { | |
let img = d3.select(nodes[j]); | |
if (cell.attachments && img.attr("src").startsWith("attachment:")) { | |
let [_, attachment] = img.attr("src").split(":", 2); | |
let bundle = cell.attachments[attachment]; | |
if (bundle) { | |
for ([type, bundle] of Object.entries(bundle)) { | |
img.attr("src", `data:${type};base64,${bundle}`) | |
break | |
} | |
} | |
} | |
} | |
) | |
} | |
function headingToId(heading, document) { | |
let id = heading.attr("id") || slugify(heading.text()); | |
heading.attr("id", id) | |
let a = document.createElement("a") | |
d3.select(a).attr("href", `#${id}`).attr("aria-labelledby", id).text("🔗").classed("perma", true) | |
let h = heading.node(); | |
// it makes sense for the link to be before the header. | |
// mvoing forward int eh document confirms the location. | |
h.insertBefore(a, h) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment