Skip to content

Instantly share code, notes, and snippets.

@erkobridee
Last active December 3, 2023 07:56
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erkobridee/d2f04e4ce5c6844a1e16788d58107e7e to your computer and use it in GitHub Desktop.
Save erkobridee/d2f04e4ce5c6844a1e16788d58107e7e to your computer and use it in GitHub Desktop.
/*
useful references:
String.prototype.replace()
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
https://alligator.io/js/string-replace/
Encode and Decode HTML entities using pure Javascript
https://ourcodeworld.com/articles/read/188/encode-and-decode-html-entities-using-pure-javascript
HTML Entities Encoder / Decoder - online tool
https://www.web2generators.com/html-based-tools/online-html-entities-encoder-and-decoder
*/
interface IEntityDefinition {
char: string;
entity: string;
description: string;
}
const HTML_ENTITY_SPACE: IEntityDefinition = {
char: ' ',
entity: ' ',
description: 'non-breaking space'
};
const HTML_ENTITIES: IEntityDefinition[] = [
{
char: '"',
entity: '"',
description: 'quotation mark'
},
{
char: "'",
entity: ''',
description: 'apostrophe'
},
{
char: '&',
entity: '&',
description: 'ampersand'
},
{
char: '<',
entity: '&lt;',
description: 'less-than'
},
{
char: '>',
entity: '&gt;',
description: 'greater-than'
},
{
char: '¡',
entity: '&iexcl;',
description: 'inverted exclamation mark'
},
{
char: '¢',
entity: '&cent;',
description: 'cent'
},
{
char: '€',
entity: '&euro;',
description: 'euro'
},
{
char: '£',
entity: '&pound;',
description: 'pound'
},
{
char: '¤',
entity: '&curren;',
description: 'currency'
},
{
char: '¥',
entity: '&yen;',
description: 'yen'
},
{
char: '¦',
entity: '&brvbar;',
description: 'broken vertical bar'
},
{
char: '§',
entity: '&sect;',
description: 'section'
},
{
char: '¨',
entity: '&uml;',
description: 'spacing diaeresis'
},
{
char: '©',
entity: '&copy;',
description: 'copyright'
},
{
char: 'ª',
entity: '&ordf;',
description: 'feminine ordinal indicator'
},
{
char: '«',
entity: '&laquo;',
description: 'angle quotation mark (left)'
},
{
char: '¬',
entity: '&not;',
description: 'negation'
},
{
char: '®',
entity: '&reg;',
description: 'registered trademark'
},
{
char: '¯',
entity: '&macr;',
description: 'spacing macron'
},
{
char: '°',
entity: '&deg;',
description: 'degree'
},
{
char: '±',
entity: '&plusmn;',
description: 'plus-or-minus '
},
{
char: '²',
entity: '&sup2;',
description: 'superscript 2'
},
{
char: '³',
entity: '&sup3;',
description: 'superscript 3'
},
{
char: '´',
entity: '&acute;',
description: 'spacing acute'
},
{
char: 'µ',
entity: '&micro;',
description: 'micro'
},
{
char: '¶',
entity: '&para;',
description: 'paragraph'
},
{
char: '·',
entity: '&middot;',
description: 'middle dot'
},
{
char: '¸',
entity: '&cedil;',
description: 'spacing cedilla'
},
{
char: '¹',
entity: '&sup1;',
description: 'superscript 1'
},
{
char: 'º',
entity: '&ordm;',
description: 'masculine ordinal indicator'
},
{
char: '»',
entity: '&raquo;',
description: 'angle quotation mark (right)'
},
{
char: '¼',
entity: '&frac14;',
description: 'fraction 1/4'
},
{
char: '½',
entity: '&frac12;',
description: 'fraction 1/2'
},
{
char: '¾',
entity: '&frac34;',
description: 'fraction 3/4'
},
{
char: '¿',
entity: '&iquest;',
description: 'inverted question mark'
},
{
char: '×',
entity: '&times;',
description: 'multiplication'
},
{
char: '÷',
entity: '&divide;',
description: 'division'
},
{
char: 'À',
entity: '&Agrave;',
description: 'capital a, grave accent'
},
{
char: 'Á',
entity: '&Aacute;',
description: 'capital a, acute accent'
},
{
char: 'Â',
entity: '&Acirc;',
description: 'capital a, circumflex accent'
},
{
char: 'Ã',
entity: '&Atilde;',
description: 'capital a, tilde'
},
{
char: 'Ä',
entity: '&Auml;',
description: 'capital a, umlaut mark'
},
{
char: 'Å',
entity: '&Aring;',
description: 'capital a, ring'
},
{
char: 'Æ',
entity: '&AElig;',
description: 'capital ae'
},
{
char: 'Ç',
entity: '&Ccedil;',
description: 'capital c, cedilla'
},
{
char: 'È',
entity: '&Egrave;',
description: 'capital e, grave accent'
},
{
char: 'É',
entity: '&Eacute;',
description: 'capital e, acute accent'
},
{
char: 'Ê',
entity: '&Ecirc;',
description: 'capital e, circumflex accent'
},
{
char: 'Ë',
entity: '&Euml;',
description: 'capital e, umlaut mark'
},
{
char: 'Ì',
entity: '&Igrave;',
description: 'capital i, grave accent'
},
{
char: 'Í',
entity: '&Iacute;',
description: 'capital i, acute accent'
},
{
char: 'Î',
entity: '&Icirc;',
description: 'capital i, circumflex accent'
},
{
char: 'Ï',
entity: '&Iuml;',
description: 'capital i, umlaut mark'
},
{
char: 'Ð',
entity: '&ETH;',
description: 'capital eth, Icelandic'
},
{
char: 'Ñ',
entity: '&Ntilde;',
description: 'capital n, tilde'
},
{
char: 'Ò',
entity: '&Ograve;',
description: 'capital o, grave accent'
},
{
char: 'Ó',
entity: '&Oacute;',
description: 'capital o, acute accent'
},
{
char: 'Ô',
entity: '&Ocirc;',
description: 'capital o, circumflex accent'
},
{
char: 'Õ',
entity: '&Otilde;',
description: 'capital o, tilde'
},
{
char: 'Ö',
entity: '&Ouml;',
description: 'capital o, umlaut mark'
},
{
char: 'Ø',
entity: '&Oslash;',
description: 'capital o, slash'
},
{
char: 'Ù',
entity: '&Ugrave;',
description: 'capital u, grave accent'
},
{
char: 'Ú',
entity: '&Uacute;',
description: 'capital u, acute accent'
},
{
char: 'Û',
entity: '&Ucirc;',
description: 'capital u, circumflex accent'
},
{
char: 'Ü',
entity: '&Uuml;',
description: 'capital u, umlaut mark'
},
{
char: 'Ý',
entity: '&Yacute;',
description: 'capital y, acute accent'
},
{
char: 'Þ',
entity: '&THORN;',
description: 'capital THORN, Icelandic'
},
{
char: 'ß',
entity: '&szlig;',
description: 'small sharp s, German'
},
{
char: 'à',
entity: '&agrave;',
description: 'small a, grave accent'
},
{
char: 'á',
entity: '&aacute;',
description: 'small a, acute accent'
},
{
char: 'â',
entity: '&acirc;',
description: 'small a, circumflex accent'
},
{
char: 'ã',
entity: '&atilde;',
description: 'small a, tilde'
},
{
char: 'ä',
entity: '&auml;',
description: 'small a, umlaut mark'
},
{
char: 'å',
entity: '&aring;',
description: 'small a, ring'
},
{
char: 'æ',
entity: '&aelig;',
description: 'small ae'
},
{
char: 'ç',
entity: '&ccedil;',
description: 'small c, cedilla'
},
{
char: 'è',
entity: '&egrave;',
description: 'small e, grave accent'
},
{
char: 'é',
entity: '&eacute;',
description: 'small e, acute accent'
},
{
char: 'ê',
entity: '&ecirc;',
description: 'small e, circumflex accent'
},
{
char: 'ë',
entity: '&euml;',
description: 'small e, umlaut mark'
},
{
char: 'ì',
entity: '&igrave;',
description: 'small i, grave accent'
},
{
char: 'í',
entity: '&iacute;',
description: 'small i, acute accent'
},
{
char: 'î',
entity: '&icirc;',
description: 'small i, circumflex accent'
},
{
char: 'ï',
entity: '&iuml;',
description: 'small i, umlaut mark'
},
{
char: 'ð',
entity: '&eth;',
description: 'small eth, Icelandic'
},
{
char: 'ñ',
entity: '&ntilde;',
description: 'small n, tilde'
},
{
char: 'ò',
entity: '&ograve;',
description: 'small o, grave accent'
},
{
char: 'ó',
entity: '&oacute;',
description: 'small o, acute accent'
},
{
char: 'ô',
entity: '&ocirc;',
description: 'small o, circumflex accent'
},
{
char: 'õ',
entity: '&otilde;',
description: 'small o, tilde'
},
{
char: 'ö',
entity: '&ouml;',
description: 'small o, umlaut mark'
},
{
char: 'ø',
entity: '&oslash;',
description: 'small o, slash'
},
{
char: 'ù',
entity: '&ugrave;',
description: 'small u, grave accent'
},
{
char: 'ú',
entity: '&uacute;',
description: 'small u, acute accent'
},
{
char: 'û',
entity: '&ucirc;',
description: 'small u, circumflex accent'
},
{
char: 'ü',
entity: '&uuml;',
description: 'small u, umlaut mark'
},
{
char: 'ý',
entity: '&yacute;',
description: 'small y, acute accent'
},
{
char: 'þ',
entity: '&thorn;',
description: 'small thorn, Icelandic'
},
{
char: 'ÿ',
entity: '&yuml;',
description: 'small y, umlaut mark'
}
];
//----------------------------------------------------------------------------//
type TEnDecodeFunction = (value: string) => string;
type TEnDecodeReplacerFunction = (substring: string, ...args: any[]) => string;
let charToEntityRegex: RegExp;
let entityToCharRegex: RegExp;
let charToEntityMap: Map<string, string>;
let entityToCharMap: Map<string, string>;
(function init() {
charToEntityMap = new Map();
entityToCharMap = new Map();
const charKeys: string[] = [];
const entityKeys: string[] = [];
HTML_ENTITIES.forEach(def => {
const { char, entity } = def;
entityToCharMap.set(entity, char);
entityKeys.push(entity);
charToEntityMap.set(char, entity);
charKeys.push(char);
});
entityToCharMap.set(HTML_ENTITY_SPACE.entity, HTML_ENTITY_SPACE.char);
entityKeys.push(HTML_ENTITY_SPACE.entity);
charToEntityRegex = new RegExp('(' + charKeys.join('|') + ')', 'g');
entityToCharRegex = new RegExp('(' + entityKeys.join('|') + '|&#[0-9]{1,5};' + ')', 'g');
})();
const decodeReplacer: TEnDecodeReplacerFunction = substring => {
return substring && entityToCharMap.has(substring)
? entityToCharMap.get(substring) || ''
: String.fromCharCode(parseInt(substring.substr(2), 10));
};
const encodeReplacer: TEnDecodeReplacerFunction = substring => {
return substring && charToEntityMap.has(substring) ? charToEntityMap.get(substring) || '' : '';
};
export const htmlEntitiesDecode: TEnDecodeFunction = value =>
!value ? value : String(value).replace(entityToCharRegex, decodeReplacer);
export const htmlEntitiesEncode: TEnDecodeFunction = value =>
!value ? value : String(value).replace(charToEntityRegex, encodeReplacer);
@erkobridee
Copy link
Author

@davidfurlong
Copy link

thank you :) worked a charm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment