Skip to content

Instantly share code, notes, and snippets.

@Pamblam
Last active February 16, 2023 03:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Pamblam/94f6ad8ade6dc0e9582ece64fbb0aee1 to your computer and use it in GitHub Desktop.
Save Pamblam/94f6ad8ade6dc0e9582ece64fbb0aee1 to your computer and use it in GitHub Desktop.
Convert specific allowed MD elements to HTML, as used in the Minibeast app and website.
/**
* Convert a string containing Markdown links, images, codeblocks, inline code, bold and italic text to an HTML string.
* 1. Remove duplicate linebreaks and spaces (including <br>)
* 2. Convert linebreaks to <br>
* 3. Convert URLs that are not part of a markdown tag to markdown tags
* 4. Convert ``` MD code blocks to <pre><code> blocks
* 5. Convert ` inline code to <code> tags
* 6. Convert ** bold tags to <b> tags
* 7. Convert * italics tags to <i> tags
* 8. Convert ![]() image tags to <img> tags
* 9. Convert []() links (including the ones from step 3) to <a> tags
* @param {String} str - The Markdown string to convert to HTML
* @param {Function} url_callback (optional) - A function that is called for each URL
* The callback function is provided two params - A URL and a the type of tag it was called from ("image" or "link")
* If the function returns a promise, it will be awaited and the result used
* If the function returns a string that is a complete, valid URL, the string will be used.
* If the function returns anything else, the tag will not be processed at all.
* @return {String} - Apromise that resolves with the string containing the HTML converted Markdown
*/
async function simpleMDParser({str, url_callback, render_tag}){
if(typeof str !== "string") return "";
const validate_url = async (url, type) => {
try{
if('function' === typeof url_callback){
url = await Promise.resolve(url_callback(url, type));
}
new URL(url);
return url;
}catch(e){
return false;
}
};
if(typeof render_tag !== 'function'){
render_tag = (tagname, attrs, innerText)=>{
switch(tagname){
case "b":
return `<b>${innerText}</b>`;
case "i":
return `<i>${innerText}</i>`;
case "a":
return `<a href="${encodeURI(attrs.href)}">${innerText.replace(/<\/a>/gmi, '&lt;/a>')}</a>`;
case "img":
return `<img src="${encodeURI(attrs.src)}" alt="${attrs.alt.replace(/\\/gmi, '&#8726;').replace(/"/gmi, '&quot;')}" />`;
case "code":
return `<code>${innerText.replace(/<\/code>/gmi, '&lt;/code>')}</code>`;
case "pre":
return `<pre><code>${innerText.replace(/<\/code>/gmi, '&lt;/code>').replace(/<\/pre>/gmi, '&lt;/pre>')}</code></pre>`;
case "ul":
return `<ul>${innerText.map(text=>`<li>${text}</li>`).join('')}</ul>`;
case "ol":
return `<ol>${innerText.map(text=>`<li>${text}</li>`).join('')}</ol>`;
case "br":
return `<br />`;
default:
return "";
}
};
}
var url_regex = /(https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/gmi,
img_regex = /!\[([^\]]*)\]\(((https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))\)/gmi,
link_regex = /\[([^\]]*)\]\(((https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))\)/gmi,
opening_pos = false,
closing_pos = false,
last_index = 0,
link_matches,
img_matches,
url_matches;
// Convert <br> to \n
// Remove extra spaces
// Ensure no more than two consecutive line breaks
str = str.replace(/<br[^>]*>/gmi, "\n");
str = str.replace(/ +/gmi, ' ');
str = str.replace(/\n{2,}/gmi, "\n\n");
// Parse lists
let lines = str.split(/\n/);
let curr_list = [];
let lists = [];
for(let i=0; i<lines.length; i++){
let item = lines[i].match(/^\s*-\s?([^\n]*)$/);
if(item !== null){
curr_list.push(item[1]);
}else if(curr_list.length){
lists.push({type: 'ul', line_pos: i - curr_list.length, items: curr_list});
curr_list = [];
}
}
if(curr_list.length){
lists.push({type: 'ul', line_pos: lines.length - curr_list.length, items: curr_list});
curr_list = [];
}
for(let i=0; i<lines.length; i++){
let item = lines[i].match(/^\s*\d[\.\)]\s?([^\n]*)$/);
if(item !== null){
curr_list.push(item[1]);
}else if(curr_list.length){
lists.push({type: 'ol', line_pos: i - curr_list.length, items: curr_list});
curr_list = [];
}
}
if(curr_list.length){
lists.push({type: 'ol', line_pos: lines.length - curr_list.length, items: curr_list});
curr_list = [];
}
lists.sort((a,b)=>a.line_pos>b.line_pos?1:-1);
for(let i=lists.length; i--;){
lines.splice(lists[i].line_pos, lists[i].items.length);
lines[lists[i].line_pos] = render_tag(lists[i].type, {}, lists[i].items) + (lines[lists[i].line_pos] || '');
if(lines[lists[i].line_pos-1]){
lines[lists[i].line_pos-1] += lines[lists[i].line_pos];
lines.splice(lists[i].line_pos, 1);
}
}
// Convert line breaks (back) to <br>
str = lines.join(render_tag('br', {}, null));
// Find all URLs that are not part of an image or link tag and convert them to a link tag
url_matches = [...str.matchAll(url_regex)].reverse();
url_matches_loop: for(let n=0; n<url_matches.length; n++){
var url = await validate_url(url_matches[n][0], 'link');
if(false === url) continue;
link_matches = [...str.matchAll(link_regex)].reverse();
for(let i=0; i<link_matches.length; i++){
if(url_matches[n].index > link_matches[i].index && url_matches[n].index < link_matches[i].index + link_matches[i][0].length){
continue url_matches_loop;
}
}
img_matches = [...str.matchAll(img_regex)].reverse();
for(let i=0; i<img_matches.length; i++){
if(url_matches[n].index > img_matches[i].index && url_matches[n].index < img_matches[i].index + img_matches[i][0].length){
continue url_matches_loop;
}
}
str = str.substring(0, url_matches[n].index+url_matches[n][0].length) + ')' + str.substring(url_matches[n].index+url_matches[n][0].length);
str = str.substring(0, url_matches[n].index) + '[' + url_matches[n][0] + '](' + url + str.substring(url_matches[n].index+url_matches[n][0].length);
}
// block code
while(last_index > -1){
last_index = str.indexOf("```", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index+=3;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('pre', {}, str.substring(opening_pos+3, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+3);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// inline code
while(last_index > -1){
last_index = str.indexOf("`", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index++;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('code', {}, str.substring(opening_pos+1, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+1);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// replace bold tags
while(last_index > -1){
last_index = str.indexOf("**", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index+=2;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('b', {}, str.substring(opening_pos+2, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+2);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// replace italic tags
while(last_index > -1){
last_index = str.indexOf("*", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index++;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('i', {}, str.substring(opening_pos+1, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+1);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
// Convert img tags to <img>
img_matches = [...str.matchAll(img_regex)].reverse();
for(let n=0; n<img_matches.length; n++){
var url = await validate_url(img_matches[n][2], 'image');
if(false === url) continue;
let html = render_tag('img', {src: url, alt: img_matches[n][1]}, null);
str = str.substring(0, img_matches[n].index) + html + str.substring(img_matches[n].index + img_matches[n][0].length);
}
// Convert links to <a> tags
link_matches = [...str.matchAll(link_regex)].reverse();
for(let n=0; n<link_matches.length; n++){
var url = await validate_url(link_matches[n][2], 'link');
if(false === url) continue;
let html = render_tag('a', {href: url}, link_matches[n][1]);
str = str.substring(0, link_matches[n].index) + html + str.substring(link_matches[n].index + link_matches[n][0].length);
}
return str;
}
<!DOCTYPE html>
<html>
<head>
<title>TODO supply a title</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<textarea id="ta" style='width: 40em; height: 40em;'>***Hello!***, I'm *Robert*.
I **like** kitties.
![MLK Google Doodle](https://www.google.com/logos/doodles/2023/dr-martin-luther-king-jr-day-2023-6753651837109836.2-l.webp)<br><br><br><br>
Today is [MLK Day](https://www.google.com/search?q=Dr.+Martin+Luther+King+Jr.+Day).
`Here's sone inline code`.
```()=>{
alert('Here's a code block');
};```
and here's a raw link... https://www.instacart.com</textarea><br>
<button id="btn">parse markdown</button>
<div id="result"></div>
<script>
document.getElementById('btn').addEventListener('click', async function(e){
e.preventDefault();
var text = document.getElementById('ta').value;
var parsed = await simpleMDParser({
str: text,
render_tag(tagname, attrs, innerText){
switch(tagname){
case "b":
return `<b style='color:pink;'>${innerText}</b>`;
case "i":
return `<i>${innerText}</i>`;
case "a":
return `<a href="${encodeURI(attrs.href)}">${innerText.replace(/<\/a>/gmi, '&lt;/a>')}</a>`;
case "img":
return `<img src="${encodeURI(attrs.src)}" alt="${attrs.alt.replace(/\\/gmi, '&#8726;').replace(/"/gmi, '&quot;')}" />`;
case "code":
return `<code>${innerText.replace(/<\/code>/gmi, '&lt;/code>')}</code>`;
case "pre":
return `<pre><code>${innerText.replace(/<\/code>/gmi, '&lt;/code>').replace(/<\/pre>/gmi, '&lt;/pre>')}</code></pre>`;
case "br":
return `<br />`;
case "ul":
return `<ul>${innerText.map(text=>`<li>${text}</li>`).join('')}</ul>`;
case "ol":
return `<ol>${innerText.map(text=>`<li>${text}</li>`).join('')}</ol>`;
default:
return "";
}
}
});
document.getElementById('result').innerHTML = parsed+`<hr>`+parsed.replace(/</gmi, '&lt;');
});
/**
* Convert a string containing Markdown links, images, codeblocks, inline code, bold and italic text to an HTML string.
* 1. Remove duplicate linebreaks and spaces (including <br>)
* 2. Convert linebreaks to <br>
* 3. Convert URLs that are not part of a markdown tag to markdown tags
* 4. Convert ``` MD code blocks to <pre><code> blocks
* 5. Convert ` inline code to <code> tags
* 6. Convert ** bold tags to <b> tags
* 7. Convert * italics tags to <i> tags
* 8. Convert ![]() image tags to <img> tags
* 9. Convert []() links (including the ones from step 3) to <a> tags
* @param {String} str - The Markdown string to convert to HTML
* @param {Function} url_callback (optional) - A function that is called for each URL
* The callback function is provided two params - A URL and a the type of tag it was called from ("image" or "link")
* If the function returns a promise, it will be awaited and the result used
* If the function returns a string that is a complete, valid URL, the string will be used.
* If the function returns anything else, the tag will not be processed at all.
* @return {String} - Apromise that resolves with the string containing the HTML converted Markdown
*/
async function simpleMDParser({str, url_callback, render_tag}){
if(typeof str !== "string") return "";
const validate_url = async (url, type) => {
try{
if('function' === typeof url_callback){
url = await Promise.resolve(url_callback(url, type));
}
new URL(url);
return url;
}catch(e){
return false;
}
};
if(typeof render_tag !== 'function'){
render_tag = (tagname, attrs, innerText)=>{
switch(tagname){
case "b":
return `<b>${innerText}</b>`;
case "i":
return `<i>${innerText}</i>`;
case "a":
return `<a href="${encodeURI(attrs.href)}">${innerText.replace(/<\/a>/gmi, '&lt;/a>')}</a>`;
case "img":
return `<img src="${encodeURI(attrs.src)}" alt="${attrs.alt.replace(/\\/gmi, '&#8726;').replace(/"/gmi, '&quot;')}" />`;
case "code":
return `<code>${innerText.replace(/<\/code>/gmi, '&lt;/code>')}</code>`;
case "pre":
return `<pre><code>${innerText.replace(/<\/code>/gmi, '&lt;/code>').replace(/<\/pre>/gmi, '&lt;/pre>')}</code></pre>`;
case "ul":
return `<ul>${innerText.map(text=>`<li>${text}</li>`).join('')}</ul>`;
case "ol":
return `<ol>${innerText.map(text=>`<li>${text}</li>`).join('')}</ol>`;
case "br":
return `<br />`;
default:
return "";
}
};
}
var url_regex = /(https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/gmi,
img_regex = /!\[([^\]]*)\]\(((https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))\)/gmi,
link_regex = /\[([^\]]*)\]\(((https?:\/\/)?((www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}|localhost)\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))\)/gmi,
opening_pos = false,
closing_pos = false,
last_index = 0,
link_matches,
img_matches,
url_matches;
// Convert <br> to \n
// Remove extra spaces
// Ensure no more than two consecutive line breaks
str = str.replace(/<br[^>]*>/gmi, "\n");
str = str.replace(/ +/gmi, ' ');
str = str.replace(/\n{2,}/gmi, "\n\n");
// Parse lists
let lines = str.split(/\n/);
let curr_list = [];
let lists = [];
for(let i=0; i<lines.length; i++){
let item = lines[i].match(/^\s*-\s?([^\n]*)$/);
if(item !== null){
curr_list.push(item[1]);
}else if(curr_list.length){
lists.push({type: 'ul', line_pos: i - curr_list.length, items: curr_list});
curr_list = [];
}
}
if(curr_list.length){
lists.push({type: 'ul', line_pos: lines.length - curr_list.length, items: curr_list});
curr_list = [];
}
for(let i=0; i<lines.length; i++){
let item = lines[i].match(/^\s*\d[\.\)]\s?([^\n]*)$/);
if(item !== null){
curr_list.push(item[1]);
}else if(curr_list.length){
lists.push({type: 'ol', line_pos: i - curr_list.length, items: curr_list});
curr_list = [];
}
}
if(curr_list.length){
lists.push({type: 'ol', line_pos: lines.length - curr_list.length, items: curr_list});
curr_list = [];
}
lists.sort((a,b)=>a.line_pos>b.line_pos?1:-1);
for(let i=lists.length; i--;){
lines.splice(lists[i].line_pos, lists[i].items.length);
lines[lists[i].line_pos] = render_tag(lists[i].type, {}, lists[i].items) + (lines[lists[i].line_pos] || '');
if(lines[lists[i].line_pos-1]){
lines[lists[i].line_pos-1] += lines[lists[i].line_pos];
lines.splice(lists[i].line_pos, 1);
}
}
// Convert line breaks (back) to <br>
str = lines.join(render_tag('br', {}, null));
// Find all URLs that are not part of an image or link tag and convert them to a link tag
url_matches = [...str.matchAll(url_regex)].reverse();
url_matches_loop: for(let n=0; n<url_matches.length; n++){
var url = await validate_url(url_matches[n][0], 'link');
if(false === url) continue;
link_matches = [...str.matchAll(link_regex)].reverse();
for(let i=0; i<link_matches.length; i++){
if(url_matches[n].index > link_matches[i].index && url_matches[n].index < link_matches[i].index + link_matches[i][0].length){
continue url_matches_loop;
}
}
img_matches = [...str.matchAll(img_regex)].reverse();
for(let i=0; i<img_matches.length; i++){
if(url_matches[n].index > img_matches[i].index && url_matches[n].index < img_matches[i].index + img_matches[i][0].length){
continue url_matches_loop;
}
}
str = str.substring(0, url_matches[n].index+url_matches[n][0].length) + ')' + str.substring(url_matches[n].index+url_matches[n][0].length);
str = str.substring(0, url_matches[n].index) + '[' + url_matches[n][0] + '](' + url + str.substring(url_matches[n].index+url_matches[n][0].length);
}
// block code
while(last_index > -1){
last_index = str.indexOf("```", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index+=3;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('pre', {}, str.substring(opening_pos+3, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+3);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// inline code
while(last_index > -1){
last_index = str.indexOf("`", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index++;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('code', {}, str.substring(opening_pos+1, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+1);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// replace bold tags
while(last_index > -1){
last_index = str.indexOf("**", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index+=2;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('b', {}, str.substring(opening_pos+2, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+2);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
opening_pos = false;
closing_pos = false;
last_index = 0;
// replace italic tags
while(last_index > -1){
last_index = str.indexOf("*", last_index);
if (last_index > -1) {
if (opening_pos === false) opening_pos = last_index;
else closing_pos = last_index;
last_index++;
}
if (opening_pos !== false && closing_pos !== false) {
let html = render_tag('i', {}, str.substring(opening_pos+1, closing_pos));
str = str.substring(0, opening_pos) + html + str.substring(closing_pos+1);
last_index = opening_pos + html.length;
opening_pos = false;
closing_pos = false;
}
}
// Convert img tags to <img>
img_matches = [...str.matchAll(img_regex)].reverse();
for(let n=0; n<img_matches.length; n++){
var url = await validate_url(img_matches[n][2], 'image');
if(false === url) continue;
let html = render_tag('img', {src: url, alt: img_matches[n][1]}, null);
str = str.substring(0, img_matches[n].index) + html + str.substring(img_matches[n].index + img_matches[n][0].length);
}
// Convert links to <a> tags
link_matches = [...str.matchAll(link_regex)].reverse();
for(let n=0; n<link_matches.length; n++){
var url = await validate_url(link_matches[n][2], 'link');
if(false === url) continue;
let html = render_tag('a', {href: url}, link_matches[n][1]);
str = str.substring(0, link_matches[n].index) + html + str.substring(link_matches[n].index + link_matches[n][0].length);
}
return str;
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment