Skip to content

Instantly share code, notes, and snippets.

@shaunlebron
Last active November 16, 2021 00:38
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shaunlebron/309f585a90d34614811e3dffe9a6e677 to your computer and use it in GitHub Desktop.
Save shaunlebron/309f585a90d34614811e3dffe9a6e677 to your computer and use it in GitHub Desktop.
Print beautiful articles (with HTML to LaTeX)

Print Beautiful Articles (with HTML to LaTeX)

It’s hard to read long articles on a computer. Printing them with beautiful typesetting helps.

This solution converts the whole article (or selected part) to LaTeX, then opens it for printing (or editing) in Overleaf. You need an account there, but no installs.

Images

Images are included in the LaTeX code, but won’t show up until you upload them individually to Overleaf. Look for the \includegraphics commands which are commented out in the code, and refer here for proper insertion.

Sample

printing this: https://medium.com/deep-code/understanding-the-blue-church-e4781b2bd9b5

Screen Shot 2019-10-30 at 12 59 40 AM

See Also

(async() => {
// force global import for next libs
delete window.define
delete window.module
// load findAndReplaceDOMText
eval(await(await fetch("https://raw.githubusercontent.com/padolsey/findAndReplaceDOMText/0.4.6/src/findAndReplaceDOMText.js")).text())
// load Readability
eval(await(await fetch("https://raw.githubusercontent.com/mozilla/readability/d6fc38c4b45d9bc337e3579b53ffa7c188cc4d77/Readability.js")).text())
// main
const documentClone = document.cloneNode(true)
const article = new Readability(documentClone).parse()
article.node = selectedArticleNode() || wholeArticleNode(article)
const node = latexifyNode(article.node)
let latex = [...node.childNodes].map(e => e.innerText).join("")
latex = tidyLatex(latex)
openInOverleaf(`
\\documentclass{article}
\\setlength{\\parskip}{1em}
\\setlength{\\parindent}{0em}
\\usepackage[utf8]{inputenc}
\\usepackage{libertine}
\\usepackage[all]{nowidow}
\\PassOptionsToPackage{hyphens}{url}\\usepackage{hyperref}
\\title{${article.title || ""}}
\\author{${article.byline || ""}}
\\date{}
\\begin{document}
\\maketitle
${latex}
\\end{document}`)
// utils
function latexifyNode(node) {
const ESCAPE_CHARS=/[\&\_\%\#\$\^\~\\\{\}]/g
findAndReplaceDOMText(node, { find: ESCAPE_CHARS, replace: (_,m) => "\\"+m })
findAndReplaceDOMText(node, { find: "[", replace: "{[}" })
findAndReplaceDOMText(node, { find: "]", replace: "{]}" })
function wrap ([a,b],e) { e.prepend(a); e.append(b); }
function wrapBlock([a,b], e) { return wrap([`\n\n${a}`, `${b}\n\n`], e); }
node.querySelectorAll("em").forEach(e => wrap`\\emph{${e}}`)
node.querySelectorAll("strong").forEach(e => wrap`\\textbf{${e}}`)
node.querySelectorAll("h1, h2, h3, h4, h5, h6").forEach(e => wrapBlock`\\section*{${e}}`)
node.querySelectorAll("sup").forEach(e => wrap`\\textsuperscript{${e}}`)
node.querySelectorAll("blockquote").forEach(e => wrapBlock`\\begin{quote}${e}\\end{quote}`)
node.querySelectorAll("li").forEach(e => wrapBlock`\\item ${e}`)
node.querySelectorAll("ul").forEach(e => wrapBlock`\\begin{itemize}${e}\\end{itemize}`)
node.querySelectorAll("ol").forEach(e => wrapBlock`\\begin{enumerate}${e}\\end{enumerate}`)
node.querySelectorAll("p").forEach(e => wrapBlock`${e}\n\n`)
node.querySelectorAll("br").forEach(e => e.innerText = "\\newline\n")
node.querySelectorAll("figure").forEach(e => wrapBlock`\n\n\\begin{figure}[h]${e}\\end{figure}`)
node.querySelectorAll("figcaption").forEach(e => wrapBlock`\\caption{${e}}`)
node.querySelectorAll("a").forEach(e => e.append(`\\footnote{\\url{${e.href.replace(ESCAPE_CHARS, m => "\\"+m)}}}`))
node.querySelectorAll("hr").forEach(e => e.innerText = "\n\n\\begin{center}.\\hspace{3mm}.\\hspace{3mm}.\\end{center}\n\n")
node.querySelectorAll("img").forEach(e => e.innerText = `\n\n% \\includegraphics[width=\textwidth]{${e.src}}\n\n`)
node.querySelectorAll("noscript, script").forEach(e => e.remove())
return node;
}
function tidyLatex(latex) {
return latex
.replace(/ /g, " ") // invisible spaces in TinyLetter
.split("\n").map(line => line.trim()).join("\n")
.replace(/\\par\n*/gm, "\n\n")
.replace(/\n\n\\newline/g, "\n\n")
.replace(/\\newline\n\n/g, "\n\n")
.replace(/\\newline\n?\\newline/g, "\n\n")
.replace(/\n{3,}/g, "\n\n")
// smart quote heuristics
.replace(/^"/gm, "“")
.replace(/"$/gm, "”")
.replace(/" /g, "” ")
.replace(/ "/g, " “")
.replace(/^'/gm, "‘")
.replace(/ '/g, " ‘")
.replace(/'$/gm, "’")
.replace(/' /g, "’ ")
}
function openInOverleaf(snip) {
const e = document.body.appendChild(document.createElement("div"))
e.innerHTML = `
<form action="https://www.overleaf.com/docs" method="post" target="_blank">
<textarea name="snip"></textarea>
</form>`
e.querySelector('[name="snip"]').value = snip
e.querySelector("form").submit()
}
function wholeArticleNode(article) {
const div = document.createElement("div")
div.innerHTML = article.content
return div
}
function selectedArticleNode() {
const sel = getSelection()
return sel.isCollapsed ? null : sel.getRangeAt(0).cloneContents()
}
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment