Skip to content

Instantly share code, notes, and snippets.

@andyjessop
Created April 28, 2022 12:15
Show Gist options
  • Save andyjessop/1405bc478d1d8ad66fd68edb9d40d5a2 to your computer and use it in GitHub Desktop.
Save andyjessop/1405bc478d1d8ad66fd68edb9d40d5a2 to your computer and use it in GitHub Desktop.
function stripMarkdown(md?: string, opts?: any) {
if (!md) {
return;
}
const options = opts || {};
options.listUnicodeChar = options.listUnicodeChar ? options.listUnicodeChar : false;
options.stripListLeaders = options.stripListLeaders ? options.stripListLeaders : true;
options.gfm = options.gfm ? options.gfm : true;
options.useImgAltText = options.useImgAltText ? options.useImgAltText : true;
options.abbr = options.abbr ? options.abbr : false;
options.replaceLinksWithURL = options.replaceLinksWithURL ? options.replaceLinksWithURL : false;
options.htmlTagsToSkip = options.htmlTagsToSkip ? options.htmlTagsToSkip : [];
let output = md || '';
// Remove horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top)
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*/gm, '');
try {
if (options.stripListLeaders) {
if (options.listUnicodeChar)
output = output.replace(/^([\s\t]*)([*\-+]|\d+\.)\s+/gm, `${options.listUnicodeChar} $1`);
else output = output.replace(/^([\s\t]*)([*\-+]|\d+\.)\s+/gm, '$1');
}
if (options.gfm) {
output = output
// Header
.replace(/\n={2,}/g, '\n')
// Fenced codeblocks
.replace(/~{3}.*\n/g, '')
// Strikethrough
.replace(/~~/g, '')
// Fenced codeblocks
.replace(/`{3}.*\n/g, '');
}
if (options.abbr) {
// Remove abbreviations
output = output.replace(/\*\[.*\]:.*\n/, '');
}
output = output
// Remove HTML tags
.replace(/<[^>]*>/g, '');
let htmlReplaceRegex = new RegExp('<[^>]*>', 'g');
if (options.htmlTagsToSkip.length > 0) {
// Using negative lookahead. Eg. (?!sup|sub) will not match 'sup' and 'sub' tags.
const joinedHtmlTagsToSkip = `(?!${options.htmlTagsToSkip.join('|')})`;
// Adding the lookahead literal with the default regex for html. Eg./<(?!sup|sub)[^>]*>/ig
htmlReplaceRegex = new RegExp(`<${joinedHtmlTagsToSkip}[^>]*>`, 'ig');
}
output = output
// Remove HTML tags
.replace(htmlReplaceRegex, '')
// Remove setext-style headers
.replace(/^[=-]{2,}\s*$/g, '')
// Remove footnotes?
.replace(/\[\^.+?\](: .*?$)?/g, '')
.replace(/\s{0,2}\[.*?\]: .*?$/g, '')
// Remove images
.replace(/!\[(.*?)\][[(].*?[\])]/g, options.useImgAltText ? '$1' : '')
// Remove inline links
.replace(/\[([^\]]*?)\][[(].*?[\])]/g, options.replaceLinksWithURL ? '$2' : '$1')
// Remove blockquotes
.replace(/^\s{0,3}>\s?/gm, '')
// .replace(/(^|\n)\s{0,3}>\s?/g, '\n\n')
// Remove reference-style links?
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, '')
// Remove atx-style headers
.replace(/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} #{0,}(\n)?\s{0,}$/gm, '$1$2$3')
// Remove * emphasis
.replace(/([*]+)(\S)(.*?\S)??\1/g, '$2$3')
// Remove _ emphasis. Unlike *, _ emphasis gets rendered only if
// 1. Either there is a whitespace character before opening _ and after closing _.
// 2. Or _ is at the start/end of the string.
.replace(/(^|\W)([_]+)(\S)(.*?\S)??\2($|\W)/g, '$1$3$4$5')
// Remove code blocks
.replace(/(`{3,})(.*?)\1/gm, '$2')
// Remove inline code
.replace(/`(.+?)`/g, '$1')
// // Replace two or more newlines with exactly two? Not entirely sure this belongs here...
// .replace(/\n{2,}/g, '\n\n')
// // Remove newlines in a paragraph
// .replace(/(\S+)\n\s*(\S+)/g, '$1 $2')
// Replace strike through
.replace(/~(.*?)~/g, '$1');
} catch (e) {
console.error(e);
return md;
}
return output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment