sschuldenzucker/roam2tex.js

## roam2tex.js
/*
Input: Roam Research's non-standard markdown. (copied from a roam page or export)
Output: Latex

The goal of this is to draft papers and other documents in Roam.
Thus, we export regular text, *not* a bullet list!

Reads from stdin, writes to stdout.

Example input: https://roamresearch.com/#/app/publicpages/page/WP7tXRTgK
Example output:
https://www.dropbox.com/s/pn42k8nyujs9udp/roam2tex_example.tex?dl=0
https://www.dropbox.com/s/ymojtqydeukvhbi/roam2tex_example.pdf?dl=0

Supported features:
- Headings, bold, italic, latex formulas
- Citations!
  You can write citations as Roam page links.
  A link in standard bibtex key format (schuldenzucker2020 or schuldenzucker2020default) is mapped to a latex \cite{...} command.
- Nesting!
  Use the following nesting scheme:
    sections/subsections > topic sentences of paragraphs > other sentences of paragraphs
  This will automatically insert correct paragraph breaks.

Non-supported features (ping me if you need them):
- Highlight formatting is just removed.
- Aliases and other links
- If you're using natbib, we can't differentiate between \citet and \citep. You need to fix these later.
- Itemize / enumerate / ...
- Block references. These are not currently exported correctly by Roam, so there's nothing we can do.

Written by Steffen Schuldenzucker (@sschuldenzucker / steffen.schuldenzucker@gmail.com), 2020
License: WTFL
*/


function renderLink(text) {
    // text = content of a Roam link.
    // return: latex rendering.

    // We only map those links to citations that satisfy the standard bibtex key format, i.e.:
    // Schuldenzucker2020default OR
    // schuldenzucker2020
    // This is generated by Google Scholar, for instance.
    // Any other link is just rendered as regular text.
    // If you want to render *all* links as citations instead, comment out the following lines:
    if (!text.match(/^[a-zA-Z]+[0-9]+[a-zA-Z]*$/)) {
        // If you want to render non-citation links with double brackets instead, use:
        // return `[[${text}]]`
        return text
    }

    return `\\cite{${text}}`
}

const readline = require('readline');

function replaceMarkupVia(text, rx, f_via) {
    // replace all inner instances of rx matches by their f_via result.
    chunks = text.split(rx)
    for (i=1; i < chunks.length; i+=2) {
        chunks[i] = f_via(chunks[i])
    }
    return chunks.join("")
}

async function go() {
    const rl = readline.createInterface({
      input: process.stdin,
      output: process.stdout,
      crlfDelay: Infinity
    });

    // the following vars: to track indent levels for paragraph breaks.
    // (this one is a bit weird)
    section_indent = undefined // indent level of most recent section header
    par_indent = undefined // indent level of start of paragraph

    for await (line of rl) {
        // detect & remove indentation level & bullet symbol
        indent = line.match(/^ */)[0].length
        line = line.replace(/^ *(- )?/, "")

        // detect & replace section headers
        issection = /^#/.test(line)
        if (issection) {
            section_indent = indent
            par_indent = undefined
        }
        if (!issection && par_indent === undefined) {
            // previous line was a section header
            par_indent = indent
        }
        line = line.replace(/^# (.*)/, "\n\\section{$1}")
        line = line.replace(/^## (.*)/, "\n\\subsection{$1}")
        line = line.replace(/^### (.*)/, "\n\\subsubsection{$1}")

        // replace italic/bold markup
        line = replaceMarkupVia(line, /__/g, text => "\\emph{" + text + "}")
        line = replaceMarkupVia(line, /\*\*/g, text => "\\textbf{" + text + "}")
        // remove highlight (currently not doing anything with this)
        line = line.replace(/\^\^/g, "")

        // replace latex
        line = line.replace(/\$\$/g, "$")

        // replace page links
        // KNOWN BUG: Nested page links and links containing ']' don't work.
        line = line.replace(/\[\[([^\]]*)\]\]/g, function(match, linktext, offset, string, groups) {
            return renderLink(linktext)
        })

        // output
        if (indent <= par_indent) {
            process.stdout.write("\n")
            par_indent = indent
        }
        process.stdout.write(line + "\n")
    }
}

go();
	/*
	Input: Roam Research's non-standard markdown. (copied from a roam page or export)
	Output: Latex

	The goal of this is to draft papers and other documents in Roam.
	Thus, we export regular text, not a bullet list!

	Reads from stdin, writes to stdout.

	Example input: https://roamresearch.com/#/app/publicpages/page/WP7tXRTgK
	Example output:
	https://www.dropbox.com/s/pn42k8nyujs9udp/roam2tex_example.tex?dl=0
	https://www.dropbox.com/s/ymojtqydeukvhbi/roam2tex_example.pdf?dl=0

	Supported features:
	- Headings, bold, italic, latex formulas
	- Citations!
	You can write citations as Roam page links.
	A link in standard bibtex key format (schuldenzucker2020 or schuldenzucker2020default) is mapped to a latex \cite{...} command.
	- Nesting!
	Use the following nesting scheme:
	sections/subsections > topic sentences of paragraphs > other sentences of paragraphs
	This will automatically insert correct paragraph breaks.

	Non-supported features (ping me if you need them):
	- Highlight formatting is just removed.
	- Aliases and other links
	- If you're using natbib, we can't differentiate between \citet and \citep. You need to fix these later.
	- Itemize / enumerate / ...
	- Block references. These are not currently exported correctly by Roam, so there's nothing we can do.

	Written by Steffen Schuldenzucker (@sschuldenzucker / steffen.schuldenzucker@gmail.com), 2020
	License: WTFL
	*/


	function renderLink(text) {
	// text = content of a Roam link.
	// return: latex rendering.

	// We only map those links to citations that satisfy the standard bibtex key format, i.e.:
	// Schuldenzucker2020default OR
	// schuldenzucker2020
	// This is generated by Google Scholar, for instance.
	// Any other link is just rendered as regular text.
	// If you want to render all links as citations instead, comment out the following lines:
	if (!text.match(/^[a-zA-Z]+[0-9]+[a-zA-Z]*$/)) {
	// If you want to render non-citation links with double brackets instead, use:
	// return `[[${text}]]`
	return text
	}

	return `\\cite{${text}}`
	}

	const readline = require('readline');

	function replaceMarkupVia(text, rx, f_via) {
	// replace all inner instances of rx matches by their f_via result.
	chunks = text.split(rx)
	for (i=1; i < chunks.length; i+=2) {
	chunks[i] = f_via(chunks[i])
	}
	return chunks.join("")
	}

	async function go() {
	const rl = readline.createInterface({
	input: process.stdin,
	output: process.stdout,
	crlfDelay: Infinity
	});

	// the following vars: to track indent levels for paragraph breaks.
	// (this one is a bit weird)
	section_indent = undefined // indent level of most recent section header
	par_indent = undefined // indent level of start of paragraph

	for await (line of rl) {
	// detect & remove indentation level & bullet symbol
	indent = line.match(/^ */)[0].length
	line = line.replace(/^ *(- )?/, "")

	// detect & replace section headers
	issection = /^#/.test(line)
	if (issection) {
	section_indent = indent
	par_indent = undefined
	}
	if (!issection && par_indent === undefined) {
	// previous line was a section header
	par_indent = indent
	}
	line = line.replace(/^# (.*)/, "\n\\section{$1}")
	line = line.replace(/^## (.*)/, "\n\\subsection{$1}")
	line = line.replace(/^### (.*)/, "\n\\subsubsection{$1}")

	// replace italic/bold markup
	line = replaceMarkupVia(line, /__/g, text => "\\emph{" + text + "}")
	line = replaceMarkupVia(line, /\\/g, text => "\\textbf{" + text + "}")
	// remove highlight (currently not doing anything with this)
	line = line.replace(/\^\^/g, "")

	// replace latex
	line = line.replace(/\$\$/g, "$")

	// replace page links
	// KNOWN BUG: Nested page links and links containing ']' don't work.
	line = line.replace(/\[\[([^\]]*)\]\]/g, function(match, linktext, offset, string, groups) {
	return renderLink(linktext)
	})

	// output
	if (indent <= par_indent) {
	process.stdout.write("\n")
	par_indent = indent
	}
	process.stdout.write(line + "\n")
	}
	}

	go();