Created
September 1, 2020 17:18
-
-
Save sschuldenzucker/6ff5ce2e55d67da3f2c1afda365e0d6b to your computer and use it in GitHub Desktop.
roam2tex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Input: Roam Research's non-standard markdown. (copied from a roam page or export) | |
Output: Latex | |
The goal of this is to draft papers and other documents in Roam. | |
Thus, we export regular text, *not* a bullet list! | |
Reads from stdin, writes to stdout. | |
Example input: https://roamresearch.com/#/app/publicpages/page/WP7tXRTgK | |
Example output: | |
https://www.dropbox.com/s/pn42k8nyujs9udp/roam2tex_example.tex?dl=0 | |
https://www.dropbox.com/s/ymojtqydeukvhbi/roam2tex_example.pdf?dl=0 | |
Supported features: | |
- Headings, bold, italic, latex formulas | |
- Citations! | |
You can write citations as Roam page links. | |
A link in standard bibtex key format (schuldenzucker2020 or schuldenzucker2020default) is mapped to a latex \cite{...} command. | |
- Nesting! | |
Use the following nesting scheme: | |
sections/subsections > topic sentences of paragraphs > other sentences of paragraphs | |
This will automatically insert correct paragraph breaks. | |
Non-supported features (ping me if you need them): | |
- Highlight formatting is just removed. | |
- Aliases and other links | |
- If you're using natbib, we can't differentiate between \citet and \citep. You need to fix these later. | |
- Itemize / enumerate / ... | |
- Block references. These are not currently exported correctly by Roam, so there's nothing we can do. | |
Written by Steffen Schuldenzucker (@sschuldenzucker / steffen.schuldenzucker@gmail.com), 2020 | |
License: WTFL | |
*/ | |
function renderLink(text) { | |
// text = content of a Roam link. | |
// return: latex rendering. | |
// We only map those links to citations that satisfy the standard bibtex key format, i.e.: | |
// Schuldenzucker2020default OR | |
// schuldenzucker2020 | |
// This is generated by Google Scholar, for instance. | |
// Any other link is just rendered as regular text. | |
// If you want to render *all* links as citations instead, comment out the following lines: | |
if (!text.match(/^[a-zA-Z]+[0-9]+[a-zA-Z]*$/)) { | |
// If you want to render non-citation links with double brackets instead, use: | |
// return `[[${text}]]` | |
return text | |
} | |
return `\\cite{${text}}` | |
} | |
const readline = require('readline'); | |
function replaceMarkupVia(text, rx, f_via) { | |
// replace all inner instances of rx matches by their f_via result. | |
chunks = text.split(rx) | |
for (i=1; i < chunks.length; i+=2) { | |
chunks[i] = f_via(chunks[i]) | |
} | |
return chunks.join("") | |
} | |
async function go() { | |
const rl = readline.createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
crlfDelay: Infinity | |
}); | |
// the following vars: to track indent levels for paragraph breaks. | |
// (this one is a bit weird) | |
section_indent = undefined // indent level of most recent section header | |
par_indent = undefined // indent level of start of paragraph | |
for await (line of rl) { | |
// detect & remove indentation level & bullet symbol | |
indent = line.match(/^ */)[0].length | |
line = line.replace(/^ *(- )?/, "") | |
// detect & replace section headers | |
issection = /^#/.test(line) | |
if (issection) { | |
section_indent = indent | |
par_indent = undefined | |
} | |
if (!issection && par_indent === undefined) { | |
// previous line was a section header | |
par_indent = indent | |
} | |
line = line.replace(/^# (.*)/, "\n\\section{$1}") | |
line = line.replace(/^## (.*)/, "\n\\subsection{$1}") | |
line = line.replace(/^### (.*)/, "\n\\subsubsection{$1}") | |
// replace italic/bold markup | |
line = replaceMarkupVia(line, /__/g, text => "\\emph{" + text + "}") | |
line = replaceMarkupVia(line, /\*\*/g, text => "\\textbf{" + text + "}") | |
// remove highlight (currently not doing anything with this) | |
line = line.replace(/\^\^/g, "") | |
// replace latex | |
line = line.replace(/\$\$/g, "$") | |
// replace page links | |
// KNOWN BUG: Nested page links and links containing ']' don't work. | |
line = line.replace(/\[\[([^\]]*)\]\]/g, function(match, linktext, offset, string, groups) { | |
return renderLink(linktext) | |
}) | |
// output | |
if (indent <= par_indent) { | |
process.stdout.write("\n") | |
par_indent = indent | |
} | |
process.stdout.write(line + "\n") | |
} | |
} | |
go(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment