Skip to content

Instantly share code, notes, and snippets.

@mrclay
Last active August 25, 2023 15:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrclay/2b307d8cd8c33a6f33abd29842f9dc89 to your computer and use it in GitHub Desktop.
Save mrclay/2b307d8cd8c33a6f33abd29842f9dc89 to your computer and use it in GitHub Desktop.
JS functions for parsing valid HTML
// Each match includes [fullElement, attributesStr, innerHtml]
function makeElementExpr(tagname, selfClosing = false) {
return selfClosing
? new RegExp(`<${tagname}\\b([^>]*)/?>`, 'g')
: new RegExp(`<${tagname}\\b([^>]*)>([\\s\\S]*?)</${tagname}>`, 'g');
}
// Parse into obj like React props
// Note: If you don't provide htmlDecoder, string attribute values will be returned
// as HTML escaped values rather than text.
function parseAttrStr(str, htmlDecoder) {
return [...str.matchAll(/(\w+)(?:="([^"]*)")?(?:\s|$)/g)].reduce(
(acc, curr) => {
const [, name, val] = curr;
acc[name] = typeof val === 'string'
? (htmlDecoder || (el => el))(val)
: true;
return acc;
},
{},
);
}
function removeScripts(html) {
const scripts = [];
const newHtml = html.replace(
makeElementExpr('script'),
(_, attrsStr, innerHtml) => {
scripts.push({ innerHtml, attrs: parseAttrStr(attrsStr) });
return '';
},
);
return { newHtml, scripts };
}
const test = `
<script cool bar="3">
foo;
</script>
`;
console.log(removeScripts(test));
const test2 = `
<img src="effe">
<img src="effe" alt="" />
`;
console.log([...test2.matchAll(makeElementExpr('img', true))]);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment