Skip to content

Instantly share code, notes, and snippets.

@DarrenSem
Created November 29, 2022 18:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DarrenSem/c9314f9442fa7ce78a39c7305e79ea8c to your computer and use it in GitHub Desktop.
Save DarrenSem/c9314f9442fa7ce78a39c7305e79ea8c to your computer and use it in GitHub Desktop.
htmlDecode.js htmlEncode.js - using modern browsers' built-in DOM manipulation (plus common escaping for htmlEncode)
// htmlDecode.js htmlEncode.js - using modern browsers' built-in DOM manipulation (plus common escaping for htmlEncode)
// let htmlDecode=a=>Object.assign(document.createElement("textarea"),{innerHTML:null==a?"":a.replace(/<\s*br\s*\/?\s*>/gi,"\n")}).value;
let htmlDecode = html => Object.assign(
document.createElement("textarea"), {
innerHTML: html == null ? "" : html.replace(/<\s*br\s*\/?\s*>/gi, "\n")
}
).value;
// let htmlEncode=a=>Object.assign(document.createElement("div"),{innerText:null==a?"":a}).innerHTML.replace(/<br>/g,"\n").replace(/['"\t\f\r\n\u00A0-\u2666]/g,a=>({"'":"&#039;",'"':"&quot;"})[a]||`&#${a.charCodeAt(0)};`);
let htmlEncode = string => Object.assign(
document.createElement("div"), {
innerText: string == null ? "" : string
}
).innerHTML.replace(/<br>/g, "\n").replace(
/['"\t\f\r\n\u00A0-\u2666]/g,
// \u00A0-\u2666 based on https://stackoverflow.com/questions/1354064/how-to-convert-characters-to-html-entities-using-plain-javascript/1354715#1354715 ("will not properly handle Unicode characters >= U+10000 -- UTF-16 pairs")
// cf. http://www.w3.org/TR/html4/sgml/entities.html
match => ({
"'": "&#039;",
'"': "&quot;",
}[match] || `&#${match.charCodeAt(0)};`)
);
console.clear();
// let assert=(...t)=>!!t.reduce((p,f,i,a,m=Array.isArray(f)?f:[f])=>("function"==typeof m[0]?m[0]():m[0])?p:console.assert(0,...m),t.length)
let assert = (...tests) => !!tests.reduce(
(pass, func, i, ar, messages = Array.isArray(func) ? func : [func]) => (
typeof messages[0] === "function" ? messages[0]() : messages[0]
) ? pass : console.assert(0, ...messages)
, tests.length
);
// test via https://stackoverflow.com/questions/7394748/whats-the-right-way-to-decode-a-string-that-has-special-html-entities-in-it/36138117#36138117
assert(
() => htmlEncode(`<img src onerror="alert('0','')">`) === '&lt;img src onerror=&quot;alert(&#039;0&#039;,&#039;&#039;)&quot;&gt;',
() => htmlDecode('&lt;img src onerror=&quot;alert(&#039;0&#039;,&#039;&#039;)&quot;&gt;') === `<img src onerror="alert('0','')">`,
() => htmlDecode(htmlEncode(`<img src onerror="alert('0','')">`)) === `<img src onerror="alert('0','')">`,
[() => htmlEncode(`< \xa0'&">`) === `&lt; &nbsp;&#039;&amp;&quot;&gt;`, {actual: htmlEncode(`< \xa0'&">`), expected: '&lt; &nbsp;&#039;&amp;&quot;&gt;'}],
[() => htmlDecode(htmlEncode(`< \xa0'&">`)) === `< \xa0'&">`, {actual: htmlDecode(htmlEncode(`< \xa0'&">`)), expected: `< \xa0'&">`}],
[() => htmlEncode(htmlDecode(`&lt; &nbsp;&#039;&amp;&quot;&gt;`)) === `&lt; &nbsp;&#039;&amp;&quot;&gt;`, {actual: htmlEncode(htmlDecode(`&lt; &nbsp;&#039;&amp;&quot;&gt;`)), expected: `&lt; &nbsp;&#039;&amp;&quot;&gt;`}],
);
// test via https://stackoverflow.com/questions/1354064/how-to-convert-characters-to-html-entities-using-plain-javascript
assert(
[() => htmlDecode("&lt;&Uuml;bergro&szlig;e &Auml;pfel mit W&uuml;rmern&gt;") === "<Übergroße Äpfel mit Würmern>", {actual: htmlDecode("&lt;&Uuml;bergro&szlig;e &Auml;pfel mit W&uuml;rmern&gt;")}],
[() => htmlEncode("<Übergroße Äpfel mit Würmern>") === '&lt;&#220;bergro&#223;e &#196;pfel mit W&#252;rmern&gt;', {actual: htmlEncode("<Übergroße Äpfel mit Würmern>")}],
[() => htmlDecode(htmlEncode("<Übergroße Äpfel mit Würmern>")) === "<Übergroße Äpfel mit Würmern>", {actual: htmlDecode(htmlEncode("<Übergroße Äpfel mit Würmern>"))}],
[() => htmlEncode(htmlDecode("&lt;&Uuml;bergro&szlig;e &Auml;pfel mit W&uuml;rmern&gt;")) === '&lt;&#220;bergro&#223;e &#196;pfel mit W&#252;rmern&gt;', {actual: htmlEncode(htmlDecode("&lt;&Uuml;bergro&szlig;e &Auml;pfel mit W&uuml;rmern&gt;"))}],
);
// test via https://stackoverflow.com/questions/7394748/whats-the-right-way-to-decode-a-string-that-has-special-html-entities-in-it/29824550#29824550
let html1 = `&#9;&#12;&lt;&quot;&quot;&amp;&#039;&nbsp;&nbsp;&nbsp;高级程序设计&nbsp;&nbsp;&#039;&amp;&quot;&gt;`;
let string = `\t\f<""&'   高级程序设计  '&">`; // `<"&'  advanced programming  '&">`
let html2 = "<br>\t\f&lt;&#34;&#x22;&amp;&#039;&#160;&#xa0;&nbsp;&#39640;&#32423;&#31243;&#24207;&#35774;&#35745;&nbsp;&#00160;&apos;&amp;&quot;&gt;";
assert(
[() => htmlDecode(html1) === string, {"htmlDecode(html1)": htmlDecode(html1), expected: string}],
[() => htmlEncode(string) === html1, {"htmlEncode(string)": htmlEncode(string), expected: html1}],
[() => htmlDecode(html2) === "\n" + string, {"htmlDecode(html2)": htmlDecode(html2), expected: "\n" + string}],
[() => htmlDecode() === "", {"htmlDecode()": htmlDecode()}, ""],
[() => htmlEncode() === "", {"htmlEncode()": htmlEncode()}, ""],
[() => htmlDecode(null) === "", {"htmlDecode(null)": htmlDecode(null)}, ""],
[() => htmlEncode(null) === "", {"htmlEncode(null)": htmlEncode(null)}, ""],
[() => htmlDecode('foo\uD800bar&#x61;&amp;&#x62;&#x31;&#x32;&#x33;&semi;&plus;&copy;&nvgt;&nvlt;&NewLine;&fjlig;&#x61;')
=== 'foo\ud800bara&b123;+©>\u20D2<\u20D2\nfja'],
);
// test via https://stackoverflow.com/questions/1354064/how-to-convert-characters-to-html-entities-using-plain-javascript/42437350#42437350
assert(
() => htmlDecode('foo ©&copy;&#0169 bar ≠&ne;&#8800; baz 𝌆&#x1D306; qux') === 'foo ©©© bar ≠≠≠ baz 𝌆𝌆 qux',
() => htmlEncode('foo ©©© bar ≠≠≠ baz 𝌆𝌆 qux') === 'foo &#169;&#169;&#169; bar &#8800;&#8800;&#8800; baz 𝌆𝌆 qux',
[() => htmlEncode('foo ©©© bar ≠≠≠ baz 𝌆𝌆 qux') === htmlEncode(htmlDecode('foo ©&#169&copy; bar ≠&ne;&#8800; baz 𝌆&#x1D306; qux'))],
[() => htmlDecode('foo ©&copy;&#169 bar ≠&ne;&#8800; baz 𝌆&#x1D306; qux') === htmlDecode(htmlEncode('foo ©©© bar ≠≠≠ baz 𝌆𝌆 qux'))],
[() => htmlEncode(`&amp;<>\\&'""'\u2646\u2656\u2666`) === '&amp;amp;&lt;&gt;\\&amp;&#039;&quot;&quot;&#039;&#9798;&#9814;&#9830;', {actual: htmlEncode(`&amp;<>\\&'""'\u2646\u2656\u2666`)}],
);
// test via https://sanzon.wordpress.com/2008/05/01/neat-little-html-encoding-trick-in-javascript/#comment-244
assert(
[() => htmlEncode(`"\'\`a->\t\n\n\n\t<-b\`\'"`) === '&quot;&#039;`a-&gt;&#9;&#10;&#10;&#10;&#9;&lt;-b`&#039;&quot;', {actual: htmlEncode(`"'\`a->\t\n\n\n\t<-b\`'"`)}],
[() => htmlDecode('&quot;&#039;`a-&gt;\t<br>\n< Br / >&#009&lt;-b`&#039;&quot;') === `"\'\`a->\t\n\n\n\t<-b\`\'"`, {actual: htmlDecode('&quot;&#039;`a-&gt;\t<br>\n< Br / >&#009&lt;-b`&#039;&quot;')}],
[() => htmlEncode('"\'\`a->\t<br>\t<-b\`\'"') === '&quot;&#039;`a-&gt;&#9;&lt;br&gt;&#9;&lt;-b`&#039;&quot;', {actual: htmlEncode('"\'\`a->\t<br>\t<-b\`\'"')}],
[() => htmlDecode(htmlEncode('"\'\`a->\t<br>\t<-b\`\'"')) === '"\'\`a->\t<br>\t<-b\`\'"', {actual: htmlDecode(htmlEncode('"\'\`a->\t<br>\t<-b\`\'"'))}],
[() => htmlEncode(htmlDecode('&quot;&#039;`a-&gt;&#9;&lt;br&gt;&#9;&lt;-b`&#039;&quot;')) === '&quot;&#039;`a-&gt;&#9;&lt;br&gt;&#9;&lt;-b`&#039;&quot;', {actual: htmlEncode(htmlDecode('&quot;&#039;`a-&gt;&#9;&lt;br&gt;&#9;&lt;-b`&#039;&quot;'))}],
);
// OTHER tests (lots!) https://github.com/mathiasbynens/he/tree/master/tests
// ^ MISTAKES that "he.js" claims to avoid: https://stackoverflow.com/questions/1354064/how-to-convert-characters-to-html-entities-using-plain-javascript/23831239#23831239
assert(
() => htmlDecode('&PrecedesSlantEqual;') === '≼',
() => htmlDecode('&#x1D306;&#119558;') === '𝌆'.repeat(2),
() => htmlDecode('&#xD306') === '팆',
() => htmlDecode('&#x80;&#x20AC') === '€'.repeat(2),
() => htmlDecode('&#x26;amp;gt;') === '&amp;gt;',
() => htmlDecode('&lt;img src=&#x27;x&#x27; onerror=&quot;prompt(1)&quot;&gt;&lt;script&gt;alert(1)&lt;/script&gt;&lt;img src=&quot;x&#x60; &#x60;&lt;script&gt;alert(1)&lt;/script&gt;&quot;&#x60; &#x60;&gt;')
=== '<img src=\'x\' onerror="prompt(1)"><script>alert(1)</script><img src="x` `<script>alert(1)</script>"` `>',
() => htmlDecode('a&Zeta;ba&ZeroWidthSpace;ba&zeta;b') === 'a\u0396ba\u200Bba\u03B6b',
() => htmlDecode('a&zscr;ba&zwj;ba&zwnj;b') === 'a\uD835\uDCCFba\u200Dba\u200Cb',
() => htmlDecode('a&foololthisdoesntexist;b') === 'a&foololthisdoesntexist;b',
() => htmlDecode('foo &lolwat; bar') === 'foo &lolwat; bar',
() => htmlDecode('&notin; &noti &notin &copy123') === '\u2209 \xACi \xACin \xA9123',
() => htmlDecode('&amp;xxx; &amp;xxx &amp;thorn; &amp;thorn &amp;curren;t &amp;current') === '&xxx; &xxx &thorn; &thorn &curren;t &current',
() => htmlDecode('&amp;xxx; &amp;xxx &ampthorn; &ampthorn &ampcurren;t &ampcurrent') === '&xxx; &xxx &thorn; &thorn &curren;t &current',
() => htmlDecode('a&#x1D306;b&#X0000000000001d306;c') === 'a\uD834\uDF06b\uD834\uDF06c',
() => htmlDecode('a&#119558;b&#169;c&#00000000000000000169;d') === 'a\uD834\uDF06b\xA9c\xA9d',
() => htmlDecode('a&#xD834;&#xDF06;b&#55348;&#57094;c a&#x0;b&#0;c') === 'a\uFFFD\uFFFDb\uFFFD\uFFFDc a\uFFFDb\uFFFDc',
() => htmlDecode('&#0039;&#x22;&#x27;&amp;amp;amp;&#x26;amp;a&#x110000;b&#39;') === `'"'&amp;amp;&amp;a\uFFFDb'`,
() => htmlDecode('&#x10FFFF;&#196605;&#0128;&#00') === '\uDBFF\uDFFF\uD87F\uDFFD\u20AC\uFFFD',
[() => htmlEncode('<img src=\'x\' onerror="prompt(1)"><script>alert(1)</script><img src="x` `<script>alert(1)</script>"` `>')
=== '&lt;img src=&#039;x&#039; onerror=&quot;prompt(1)&quot;&gt;&lt;script&gt;alert(1)&lt;/script&gt;&lt;img src=&quot;x` `&lt;script&gt;alert(1)&lt;/script&gt;&quot;` `&gt;'],
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment