adrianjost/md2tex.js

## md2tex.js
// Copyright (C) 2019 Adrian Jost
// This code is licensed under MIT license (see https://tldrlegal.com/license/mit-license for details)

const fs = require("fs");

const readFile = filename => {
	return new Promise((resolve, reject) => {
		fs.readFile(filename, "utf8", function(err, data) {
			if (err) {
				reject(err);
			}
			resolve(data);
		});
	});
};

const escapeSpecialChars = text => {
	const prefix = char => `\\${char}`;
	text = text.replace(/([\&\%\$\#\_\{\}])/g, prefix);
	text = text.replace(/([\~])/g, "\\textasciitilde ");
	text = text.replace(/([\^])/g, "\\textasciicircum ");
	text = text.replace(/([\\])/g, "\\textbackslash ");
	return text;
};
const removeSpecialChars = text => {
	return text.replace(/([\&\%\$\#\_\{\}\~\^\\])/g, "");
};

const mapHeadlines = (line, index) => {
	// not a headline
	if (!line.startsWith("#")) {
		return line;
	}
	let content = line.replace(/^[#]+\W/, "");
	const text = escapeSpecialChars(content);
	const identifier = removeSpecialChars(content.toLowerCase()).replace(
		/\W/g,
		"-"
	);
	const level = line.match(/^[#]+/g)[0].length;

	switch (level) {
		case 1:
			return `%************************************************\n\\chapter{${text}}\n\\label{ch:${identifier}}\n%************************************************\n`;
		case 2:
			return `\\hypertarget{${identifier}}{\n\\section{${text}}\\label{${identifier}}}\n`;
		case 3:
			return `\\hypertarget{${identifier}}{\n\\subsection{${text}}\\label{${identifier}}}\n`;
		case 4:
			return `\\hypertarget{${identifier}}{\n\\subsubsection{${text}}\\label{${identifier}}}\n`;
		default:
			console.error(`unsupported heading ${index}: "${line}"`);
			return line;
	}
};

const mapCodeblocks = (line, index) => {
	if (!line.startsWith("```")) {
		return line;
	}
	if (line.trim() === "```") {
		return `	\\end{minted}
	\\caption{Code SnippeCode Snipped ${index}}
	\\label{lst:code-snipped-${index}}
\\end{listing}`;
	}
	const language = line
		.slice(3)
		.trim()
		.toLowerCase();

	return `\\begin{listing}[H]
	\\begin{minted}{${language}}`;
};

const mapInlineCodeblocks = line => {
	return line.replace(/\`([^\`]*)\`/g, "\\colorbox{gray-light!}{\\texttt{$1}}");
};

const mapTextStyle = line => {
	const replaceWithUnescapedMatch = newString =>
		function(match) {
			let out = newString;
			Array.from(arguments).forEach((a, i) => {
				// mimic .replace api
				if (typeof a !== "string") {
					return;
				}
				// unescape
				unescaped = a.replace(/\\([\*\_\\])/g, "$1");
				out = out.replace(`$${i}`, unescaped);
			});
			return out;
		};

	// 1. escape all _ and * inside urls
	// https://regex101.com/r/TJ2xDV/3
	line = line.replace(/(?<=[^\!]\[.*\]\(.*)([\\\_\*])(?=.*\))/g, "\\$1");

	// 2. apply bold when not escaped. then unescape the text
	line = line
		// check regex for single character below for details
		.replace(
			/(?<=(?:^|[^\\])(?:\\\\)*)(?<!\!\[.*\].*)(\*\*|\_\_)((?:[^\\]|\\[^\\]|\\\\)*?)\1/g,
			replaceWithUnescapedMatch("\\textbf{$2}")
		);
	// 2. apply italic when not escaped. then unescape the text
	line = line
		// https://regex101.com/r/jocMku/2
		.replace(
			/(?<=(?:^|[^\\])(?:\\\\)*)(?<!\!\[.*\].*)(\*|\_)((?:[^\\]|\\[^\\]|\\\\)*?)\1/g,
			replaceWithUnescapedMatch("\\textit{$2}")
		);

	return line;
};

const mapImages = line => {
	const image = `\n\\begin{figure}[H]
	\\centering
	\\includegraphics[width=\\textwidth]{$2}
	\\caption[$2]{$2}
	\\label{fig:$2}
\\end{figure}\n`;
	return line.replace(/\!\[([^\]]*)\]\(([^\)]+)\)/g, image);
};

const mapFootnotes = line => {
	const footnote = `$1\n\\footnote{$2}\n`;
	return line.replace(/[^\!]\[([^\]]*)\]\(([^\)]+)\)/g, footnote);
};

const main = async () => {
	let inCodeBlock = false;
	let inList = false;
	const content = await readFile("./in.md");
	const lines = content
		.split("\n")
		.map(a => a.trimRight())
		.map((a, i) => {
			// block styles
			if (a.startsWith("```")) {
				inCodeBlock = !inCodeBlock;
			}
			a = mapCodeblocks(a);
			if (inCodeBlock) {
				return a;
			}

			// text styles
			a = mapInlineCodeblocks(a, i);
			a = mapTextStyle(a);
			// headlines
			a = mapHeadlines(a);
			a = mapImages(a);
			a = mapFootnotes(a);

			// Lists
			if (a.match(/^[1-9]+\.\s/)) {
				a = a.replace(/^[1-9]+\.(\s)/, "\t\\item$1");
				if (!inList) {
					a = `\\begin{enumerate}\n${a}`;
				}
				inList = "\\end{enumerate}";
			} else if (a.match(/^\-\s/)) {
				a = a.replace(/^\-(\s)/, "\t\\item$1");
				if (!inList) {
					a = `\\begin{itemize}\n${a}`;
				}
				inList = "\\end{itemize}";
			} else if (inList) {
				a = `${a}${inList}\n`;
				inList = false;
			}

			// Return Result
			return a;
		});
	console.log("**********************\nOUTPUT START\n**********************");
	console.log(lines.join("\n"));
	console.log("**********************\nOUTPUT END\n**********************");
	fs.writeFile("./out.tex", lines.join("\n"), function(err) {
		if (err) {
			return console.log(err);
		}
		console.log("The file was saved!");
	});
};

main();
	// Copyright (C) 2019 Adrian Jost
	// This code is licensed under MIT license (see https://tldrlegal.com/license/mit-license for details)

	const fs = require("fs");

	const readFile = filename => {
	return new Promise((resolve, reject) => {
	fs.readFile(filename, "utf8", function(err, data) {
	if (err) {
	reject(err);
	}
	resolve(data);
	});
	});
	};

	const escapeSpecialChars = text => {
	const prefix = char => `\\${char}`;
	text = text.replace(/([\&\%\$\#\_\{\}])/g, prefix);
	text = text.replace(/([\~])/g, "\\textasciitilde ");
	text = text.replace(/([\^])/g, "\\textasciicircum ");
	text = text.replace(/([\\])/g, "\\textbackslash ");
	return text;
	};
	const removeSpecialChars = text => {
	return text.replace(/([\&\%\$\#\_\{\}\~\^\\])/g, "");
	};

	const mapHeadlines = (line, index) => {
	// not a headline
	if (!line.startsWith("#")) {
	return line;
	}
	let content = line.replace(/^[#]+\W/, "");
	const text = escapeSpecialChars(content);
	const identifier = removeSpecialChars(content.toLowerCase()).replace(
	/\W/g,
	"-"
	);
	const level = line.match(/^[#]+/g)[0].length;

	switch (level) {
	case 1:
	return `%**********************************************\n\\chapter{${text}}\n\\label{ch:${identifier}}\n%**********************************************\n`;
	case 2:
	return `\\hypertarget{${identifier}}{\n\\section{${text}}\\label{${identifier}}}\n`;
	case 3:
	return `\\hypertarget{${identifier}}{\n\\subsection{${text}}\\label{${identifier}}}\n`;
	case 4:
	return `\\hypertarget{${identifier}}{\n\\subsubsection{${text}}\\label{${identifier}}}\n`;
	default:
	console.error(`unsupported heading ${index}: "${line}"`);
	return line;
	}
	};

	const mapCodeblocks = (line, index) => {
	if (!line.startsWith("```")) {
	return line;
	}
	if (line.trim() === "```") {
	return ` \\end{minted}
	\\caption{Code SnippeCode Snipped ${index}}
	\\label{lst:code-snipped-${index}}
	\\end{listing}`;
	}
	const language = line
	.slice(3)
	.trim()
	.toLowerCase();

	return `\\begin{listing}[H]
	\\begin{minted}{${language}}`;
	};

	const mapInlineCodeblocks = line => {
	return line.replace(/\`([^\`]*)\`/g, "\\colorbox{gray-light!}{\\texttt{$1}}");
	};

	const mapTextStyle = line => {
	const replaceWithUnescapedMatch = newString =>
	function(match) {
	let out = newString;
	Array.from(arguments).forEach((a, i) => {
	// mimic .replace api
	if (typeof a !== "string") {
	return;
	}
	// unescape
	unescaped = a.replace(/\\([\*\_\\])/g, "$1");
	out = out.replace(`$${i}`, unescaped);
	});
	return out;
	};

	// 1. escape all _ and * inside urls
	// https://regex101.com/r/TJ2xDV/3
	line = line.replace(/(?<=[^\!]\[.\]\(.)([\\\_\])(?=.\))/g, "\\$1");

	// 2. apply bold when not escaped. then unescape the text
	line = line
	// check regex for single character below for details
	.replace(
	/(?<=(?:^\|[^\\])(?:\\\\))(?<!\!\[.\].)(\\\|\_\_)((?:[^\\]\|\\[^\\]\|\\\\)?)\1/g,
	replaceWithUnescapedMatch("\\textbf{$2}")
	);
	// 2. apply italic when not escaped. then unescape the text
	line = line
	// https://regex101.com/r/jocMku/2
	.replace(
	/(?<=(?:^\|[^\\])(?:\\\\))(?<!\!\[.\].)(\\|\_)((?:[^\\]\|\\[^\\]\|\\\\)*?)\1/g,
	replaceWithUnescapedMatch("\\textit{$2}")
	);

	return line;
	};

	const mapImages = line => {
	const image = `\n\\begin{figure}[H]
	\\centering
	\\includegraphics[width=\\textwidth]{$2}
	\\caption[$2]{$2}
	\\label{fig:$2}
	\\end{figure}\n`;
	return line.replace(/\!\[([^\]]*)\]\(([^\)]+)\)/g, image);
	};

	const mapFootnotes = line => {
	const footnote = `$1\n\\footnote{$2}\n`;
	return line.replace(/[^\!]\[([^\]]*)\]\(([^\)]+)\)/g, footnote);
	};

	const main = async () => {
	let inCodeBlock = false;
	let inList = false;
	const content = await readFile("./in.md");
	const lines = content
	.split("\n")
	.map(a => a.trimRight())
	.map((a, i) => {
	// block styles
	if (a.startsWith("```")) {
	inCodeBlock = !inCodeBlock;
	}
	a = mapCodeblocks(a);
	if (inCodeBlock) {
	return a;
	}

	// text styles
	a = mapInlineCodeblocks(a, i);
	a = mapTextStyle(a);
	// headlines
	a = mapHeadlines(a);
	a = mapImages(a);
	a = mapFootnotes(a);

	// Lists
	if (a.match(/^[1-9]+\.\s/)) {
	a = a.replace(/^[1-9]+\.(\s)/, "\t\\item$1");
	if (!inList) {
	a = `\\begin{enumerate}\n${a}`;
	}
	inList = "\\end{enumerate}";
	} else if (a.match(/^\-\s/)) {
	a = a.replace(/^\-(\s)/, "\t\\item$1");
	if (!inList) {
	a = `\\begin{itemize}\n${a}`;
	}
	inList = "\\end{itemize}";
	} else if (inList) {
	a = `${a}${inList}\n`;
	inList = false;
	}

	// Return Result
	return a;
	});
	console.log("********************\nOUTPUT START\n********************");
	console.log(lines.join("\n"));
	console.log("********************\nOUTPUT END\n********************");
	fs.writeFile("./out.tex", lines.join("\n"), function(err) {
	if (err) {
	return console.log(err);
	}
	console.log("The file was saved!");
	});
	};

	main();