erkobridee/htmlentities.ts

## htmlentities.ts
/*
    useful references:

    String.prototype.replace()
    https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
    https://alligator.io/js/string-replace/

    Encode and Decode HTML entities using pure Javascript
    https://ourcodeworld.com/articles/read/188/encode-and-decode-html-entities-using-pure-javascript

    HTML Entities Encoder / Decoder - online tool
    https://www.web2generators.com/html-based-tools/online-html-entities-encoder-and-decoder
*/

interface IEntityDefinition {
    char: string;
    entity: string;
    description: string;
}

const HTML_ENTITY_SPACE: IEntityDefinition = {
    char: ' ',
    entity: '&nbsp;',
    description: 'non-breaking space'
};

const HTML_ENTITIES: IEntityDefinition[] = [
    {
        char: '"',
        entity: '&quot;',
        description: 'quotation mark'
    },
    {
        char: "'",
        entity: '&apos;',
        description: 'apostrophe'
    },
    {
        char: '&',
        entity: '&amp;',
        description: 'ampersand'
    },
    {
        char: '<',
        entity: '&lt;',
        description: 'less-than'
    },
    {
        char: '>',
        entity: '&gt;',
        description: 'greater-than'
    },
    {
        char: '¡',
        entity: '&iexcl;',
        description: 'inverted exclamation mark'
    },
    {
        char: '¢',
        entity: '&cent;',
        description: 'cent'
    },
    {
        char: '€',
        entity: '&euro;',
        description: 'euro'
    },
    {
        char: '£',
        entity: '&pound;',
        description: 'pound'
    },
    {
        char: '¤',
        entity: '&curren;',
        description: 'currency'
    },
    {
        char: '¥',
        entity: '&yen;',
        description: 'yen'
    },
    {
        char: '¦',
        entity: '&brvbar;',
        description: 'broken vertical bar'
    },
    {
        char: '§',
        entity: '&sect;',
        description: 'section'
    },
    {
        char: '¨',
        entity: '&uml;',
        description: 'spacing diaeresis'
    },
    {
        char: '©',
        entity: '&copy;',
        description: 'copyright'
    },
    {
        char: 'ª',
        entity: '&ordf;',
        description: 'feminine ordinal indicator'
    },
    {
        char: '«',
        entity: '&laquo;',
        description: 'angle quotation mark (left)'
    },
    {
        char: '¬',
        entity: '&not;',
        description: 'negation'
    },
    {
        char: '®',
        entity: '&reg;',
        description: 'registered trademark'
    },
    {
        char: '¯',
        entity: '&macr;',
        description: 'spacing macron'
    },
    {
        char: '°',
        entity: '&deg;',
        description: 'degree'
    },
    {
        char: '±',
        entity: '&plusmn;',
        description: 'plus-or-minus '
    },
    {
        char: '²',
        entity: '&sup2;',
        description: 'superscript 2'
    },
    {
        char: '³',
        entity: '&sup3;',
        description: 'superscript 3'
    },
    {
        char: '´',
        entity: '&acute;',
        description: 'spacing acute'
    },
    {
        char: 'µ',
        entity: '&micro;',
        description: 'micro'
    },
    {
        char: '¶',
        entity: '&para;',
        description: 'paragraph'
    },
    {
        char: '·',
        entity: '&middot;',
        description: 'middle dot'
    },
    {
        char: '¸',
        entity: '&cedil;',
        description: 'spacing cedilla'
    },
    {
        char: '¹',
        entity: '&sup1;',
        description: 'superscript 1'
    },
    {
        char: 'º',
        entity: '&ordm;',
        description: 'masculine ordinal indicator'
    },
    {
        char: '»',
        entity: '&raquo;',
        description: 'angle quotation mark (right)'
    },
    {
        char: '¼',
        entity: '&frac14;',
        description: 'fraction 1/4'
    },
    {
        char: '½',
        entity: '&frac12;',
        description: 'fraction 1/2'
    },
    {
        char: '¾',
        entity: '&frac34;',
        description: 'fraction 3/4'
    },
    {
        char: '¿',
        entity: '&iquest;',
        description: 'inverted question mark'
    },
    {
        char: '×',
        entity: '&times;',
        description: 'multiplication'
    },
    {
        char: '÷',
        entity: '&divide;',
        description: 'division'
    },
    {
        char: 'À',
        entity: '&Agrave;',
        description: 'capital a, grave accent'
    },
    {
        char: 'Á',
        entity: '&Aacute;',
        description: 'capital a, acute accent'
    },
    {
        char: 'Â',
        entity: '&Acirc;',
        description: 'capital a, circumflex accent'
    },
    {
        char: 'Ã',
        entity: '&Atilde;',
        description: 'capital a, tilde'
    },
    {
        char: 'Ä',
        entity: '&Auml;',
        description: 'capital a, umlaut mark'
    },
    {
        char: 'Å',
        entity: '&Aring;',
        description: 'capital a, ring'
    },
    {
        char: 'Æ',
        entity: '&AElig;',
        description: 'capital ae'
    },
    {
        char: 'Ç',
        entity: '&Ccedil;',
        description: 'capital c, cedilla'
    },
    {
        char: 'È',
        entity: '&Egrave;',
        description: 'capital e, grave accent'
    },
    {
        char: 'É',
        entity: '&Eacute;',
        description: 'capital e, acute accent'
    },
    {
        char: 'Ê',
        entity: '&Ecirc;',
        description: 'capital e, circumflex accent'
    },
    {
        char: 'Ë',
        entity: '&Euml;',
        description: 'capital e, umlaut mark'
    },
    {
        char: 'Ì',
        entity: '&Igrave;',
        description: 'capital i, grave accent'
    },
    {
        char: 'Í',
        entity: '&Iacute;',
        description: 'capital i, acute accent'
    },
    {
        char: 'Î',
        entity: '&Icirc;',
        description: 'capital i, circumflex accent'
    },
    {
        char: 'Ï',
        entity: '&Iuml;',
        description: 'capital i, umlaut mark'
    },
    {
        char: 'Ð',
        entity: '&ETH;',
        description: 'capital eth, Icelandic'
    },
    {
        char: 'Ñ',
        entity: '&Ntilde;',
        description: 'capital n, tilde'
    },
    {
        char: 'Ò',
        entity: '&Ograve;',
        description: 'capital o, grave accent'
    },
    {
        char: 'Ó',
        entity: '&Oacute;',
        description: 'capital o, acute accent'
    },
    {
        char: 'Ô',
        entity: '&Ocirc;',
        description: 'capital o, circumflex accent'
    },
    {
        char: 'Õ',
        entity: '&Otilde;',
        description: 'capital o, tilde'
    },
    {
        char: 'Ö',
        entity: '&Ouml;',
        description: 'capital o, umlaut mark'
    },
    {
        char: 'Ø',
        entity: '&Oslash;',
        description: 'capital o, slash'
    },
    {
        char: 'Ù',
        entity: '&Ugrave;',
        description: 'capital u, grave accent'
    },
    {
        char: 'Ú',
        entity: '&Uacute;',
        description: 'capital u, acute accent'
    },
    {
        char: 'Û',
        entity: '&Ucirc;',
        description: 'capital u, circumflex accent'
    },
    {
        char: 'Ü',
        entity: '&Uuml;',
        description: 'capital u, umlaut mark'
    },
    {
        char: 'Ý',
        entity: '&Yacute;',
        description: 'capital y, acute accent'
    },
    {
        char: 'Þ',
        entity: '&THORN;',
        description: 'capital THORN, Icelandic'
    },
    {
        char: 'ß',
        entity: '&szlig;',
        description: 'small sharp s, German'
    },
    {
        char: 'à',
        entity: '&agrave;',
        description: 'small a, grave accent'
    },
    {
        char: 'á',
        entity: '&aacute;',
        description: 'small a, acute accent'
    },
    {
        char: 'â',
        entity: '&acirc;',
        description: 'small a, circumflex accent'
    },
    {
        char: 'ã',
        entity: '&atilde;',
        description: 'small a, tilde'
    },
    {
        char: 'ä',
        entity: '&auml;',
        description: 'small a, umlaut mark'
    },
    {
        char: 'å',
        entity: '&aring;',
        description: 'small a, ring'
    },
    {
        char: 'æ',
        entity: '&aelig;',
        description: 'small ae'
    },
    {
        char: 'ç',
        entity: '&ccedil;',
        description: 'small c, cedilla'
    },
    {
        char: 'è',
        entity: '&egrave;',
        description: 'small e, grave accent'
    },
    {
        char: 'é',
        entity: '&eacute;',
        description: 'small e, acute accent'
    },
    {
        char: 'ê',
        entity: '&ecirc;',
        description: 'small e, circumflex accent'
    },
    {
        char: 'ë',
        entity: '&euml;',
        description: 'small e, umlaut mark'
    },
    {
        char: 'ì',
        entity: '&igrave;',
        description: 'small i, grave accent'
    },
    {
        char: 'í',
        entity: '&iacute;',
        description: 'small i, acute accent'
    },
    {
        char: 'î',
        entity: '&icirc;',
        description: 'small i, circumflex accent'
    },
    {
        char: 'ï',
        entity: '&iuml;',
        description: 'small i, umlaut mark'
    },
    {
        char: 'ð',
        entity: '&eth;',
        description: 'small eth, Icelandic'
    },
    {
        char: 'ñ',
        entity: '&ntilde;',
        description: 'small n, tilde'
    },
    {
        char: 'ò',
        entity: '&ograve;',
        description: 'small o, grave accent'
    },
    {
        char: 'ó',
        entity: '&oacute;',
        description: 'small o, acute accent'
    },
    {
        char: 'ô',
        entity: '&ocirc;',
        description: 'small o, circumflex accent'
    },
    {
        char: 'õ',
        entity: '&otilde;',
        description: 'small o, tilde'
    },
    {
        char: 'ö',
        entity: '&ouml;',
        description: 'small o, umlaut mark'
    },
    {
        char: 'ø',
        entity: '&oslash;',
        description: 'small o, slash'
    },
    {
        char: 'ù',
        entity: '&ugrave;',
        description: 'small u, grave accent'
    },
    {
        char: 'ú',
        entity: '&uacute;',
        description: 'small u, acute accent'
    },
    {
        char: 'û',
        entity: '&ucirc;',
        description: 'small u, circumflex accent'
    },
    {
        char: 'ü',
        entity: '&uuml;',
        description: 'small u, umlaut mark'
    },
    {
        char: 'ý',
        entity: '&yacute;',
        description: 'small y, acute accent'
    },
    {
        char: 'þ',
        entity: '&thorn;',
        description: 'small thorn, Icelandic'
    },
    {
        char: 'ÿ',
        entity: '&yuml;',
        description: 'small y, umlaut mark'
    }
];

//----------------------------------------------------------------------------//

type TEnDecodeFunction = (value: string) => string;
type TEnDecodeReplacerFunction = (substring: string, ...args: any[]) => string;

let charToEntityRegex: RegExp;
let entityToCharRegex: RegExp;
let charToEntityMap: Map<string, string>;
let entityToCharMap: Map<string, string>;

(function init() {
    charToEntityMap = new Map();
    entityToCharMap = new Map();

    const charKeys: string[] = [];
    const entityKeys: string[] = [];

    HTML_ENTITIES.forEach(def => {
        const { char, entity } = def;
        entityToCharMap.set(entity, char);
        entityKeys.push(entity);

        charToEntityMap.set(char, entity);
        charKeys.push(char);
    });

    entityToCharMap.set(HTML_ENTITY_SPACE.entity, HTML_ENTITY_SPACE.char);
    entityKeys.push(HTML_ENTITY_SPACE.entity);

    charToEntityRegex = new RegExp('(' + charKeys.join('|') + ')', 'g');
    entityToCharRegex = new RegExp('(' + entityKeys.join('|') + '|&#[0-9]{1,5};' + ')', 'g');
})();

const decodeReplacer: TEnDecodeReplacerFunction = substring => {
    return substring && entityToCharMap.has(substring)
        ? entityToCharMap.get(substring) || ''
        : String.fromCharCode(parseInt(substring.substr(2), 10));
};

const encodeReplacer: TEnDecodeReplacerFunction = substring => {
    return substring && charToEntityMap.has(substring) ? charToEntityMap.get(substring) || '' : '';
};

export const htmlEntitiesDecode: TEnDecodeFunction = value =>
    !value ? value : String(value).replace(entityToCharRegex, decodeReplacer);

export const htmlEntitiesEncode: TEnDecodeFunction = value =>
    !value ? value : String(value).replace(charToEntityRegex, encodeReplacer);
	/*
	useful references:

	String.prototype.replace()
	https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
	https://alligator.io/js/string-replace/

	Encode and Decode HTML entities using pure Javascript
	https://ourcodeworld.com/articles/read/188/encode-and-decode-html-entities-using-pure-javascript

	HTML Entities Encoder / Decoder - online tool
	https://www.web2generators.com/html-based-tools/online-html-entities-encoder-and-decoder
	*/

	interface IEntityDefinition {
	char: string;
	entity: string;
	description: string;
	}

	const HTML_ENTITY_SPACE: IEntityDefinition = {
	char: ' ',
	entity: ' ',
	description: 'non-breaking space'
	};

	const HTML_ENTITIES: IEntityDefinition[] = [
	{
	char: '"',
	entity: '"',
	description: 'quotation mark'
	},
	{
	char: "'",
	entity: ''',
	description: 'apostrophe'
	},
	{
	char: '&',
	entity: '&',
	description: 'ampersand'
	},
	{
	char: '<',
	entity: '<',
	description: 'less-than'
	},
	{
	char: '>',
	entity: '>',
	description: 'greater-than'
	},
	{
	char: '¡',
	entity: '¡',
	description: 'inverted exclamation mark'
	},
	{
	char: '¢',
	entity: '¢',
	description: 'cent'
	},
	{
	char: '€',
	entity: '€',
	description: 'euro'
	},
	{
	char: '£',
	entity: '£',
	description: 'pound'
	},
	{
	char: '¤',
	entity: '¤',
	description: 'currency'
	},
	{
	char: '¥',
	entity: '¥',
	description: 'yen'
	},
	{
	char: '¦',
	entity: '¦',
	description: 'broken vertical bar'
	},
	{
	char: '§',
	entity: '§',
	description: 'section'
	},
	{
	char: '¨',
	entity: '¨',
	description: 'spacing diaeresis'
	},
	{
	char: '©',
	entity: '©',
	description: 'copyright'
	},
	{
	char: 'ª',
	entity: 'ª',
	description: 'feminine ordinal indicator'
	},
	{
	char: '«',
	entity: '«',
	description: 'angle quotation mark (left)'
	},
	{
	char: '¬',
	entity: '¬',
	description: 'negation'
	},
	{
	char: '®',
	entity: '®',
	description: 'registered trademark'
	},
	{
	char: '¯',
	entity: '¯',
	description: 'spacing macron'
	},
	{
	char: '°',
	entity: '°',
	description: 'degree'
	},
	{
	char: '±',
	entity: '±',
	description: 'plus-or-minus '
	},
	{
	char: '²',
	entity: '²',
	description: 'superscript 2'
	},
	{
	char: '³',
	entity: '³',
	description: 'superscript 3'
	},
	{
	char: '´',
	entity: '´',
	description: 'spacing acute'
	},
	{
	char: 'µ',
	entity: 'µ',
	description: 'micro'
	},
	{
	char: '¶',
	entity: '¶',
	description: 'paragraph'
	},
	{
	char: '·',
	entity: '·',
	description: 'middle dot'
	},
	{
	char: '¸',
	entity: '¸',
	description: 'spacing cedilla'
	},
	{
	char: '¹',
	entity: '¹',
	description: 'superscript 1'
	},
	{
	char: 'º',
	entity: 'º',
	description: 'masculine ordinal indicator'
	},
	{
	char: '»',
	entity: '»',
	description: 'angle quotation mark (right)'
	},
	{
	char: '¼',
	entity: '¼',
	description: 'fraction 1/4'
	},
	{
	char: '½',
	entity: '½',
	description: 'fraction 1/2'
	},
	{
	char: '¾',
	entity: '¾',
	description: 'fraction 3/4'
	},
	{
	char: '¿',
	entity: '¿',
	description: 'inverted question mark'
	},
	{
	char: '×',
	entity: '×',
	description: 'multiplication'
	},
	{
	char: '÷',
	entity: '÷',
	description: 'division'
	},
	{
	char: 'À',
	entity: 'À',
	description: 'capital a, grave accent'
	},
	{
	char: 'Á',
	entity: 'Á',
	description: 'capital a, acute accent'
	},
	{
	char: 'Â',
	entity: 'Â',
	description: 'capital a, circumflex accent'
	},
	{
	char: 'Ã',
	entity: 'Ã',
	description: 'capital a, tilde'
	},
	{
	char: 'Ä',
	entity: 'Ä',
	description: 'capital a, umlaut mark'
	},
	{
	char: 'Å',
	entity: 'Å',
	description: 'capital a, ring'
	},
	{
	char: 'Æ',
	entity: 'Æ',
	description: 'capital ae'
	},
	{
	char: 'Ç',
	entity: 'Ç',
	description: 'capital c, cedilla'
	},
	{
	char: 'È',
	entity: 'È',
	description: 'capital e, grave accent'
	},
	{
	char: 'É',
	entity: 'É',
	description: 'capital e, acute accent'
	},
	{
	char: 'Ê',
	entity: 'Ê',
	description: 'capital e, circumflex accent'
	},
	{
	char: 'Ë',
	entity: 'Ë',
	description: 'capital e, umlaut mark'
	},
	{
	char: 'Ì',
	entity: 'Ì',
	description: 'capital i, grave accent'
	},
	{
	char: 'Í',
	entity: 'Í',
	description: 'capital i, acute accent'
	},
	{
	char: 'Î',
	entity: 'Î',
	description: 'capital i, circumflex accent'
	},
	{
	char: 'Ï',
	entity: 'Ï',
	description: 'capital i, umlaut mark'
	},
	{
	char: 'Ð',
	entity: 'Ð',
	description: 'capital eth, Icelandic'
	},
	{
	char: 'Ñ',
	entity: 'Ñ',
	description: 'capital n, tilde'
	},
	{
	char: 'Ò',
	entity: 'Ò',
	description: 'capital o, grave accent'
	},
	{
	char: 'Ó',
	entity: 'Ó',
	description: 'capital o, acute accent'
	},
	{
	char: 'Ô',
	entity: 'Ô',
	description: 'capital o, circumflex accent'
	},
	{
	char: 'Õ',
	entity: 'Õ',
	description: 'capital o, tilde'
	},
	{
	char: 'Ö',
	entity: 'Ö',
	description: 'capital o, umlaut mark'
	},
	{
	char: 'Ø',
	entity: 'Ø',
	description: 'capital o, slash'
	},
	{
	char: 'Ù',
	entity: 'Ù',
	description: 'capital u, grave accent'
	},
	{
	char: 'Ú',
	entity: 'Ú',
	description: 'capital u, acute accent'
	},
	{
	char: 'Û',
	entity: 'Û',
	description: 'capital u, circumflex accent'
	},
	{
	char: 'Ü',
	entity: 'Ü',
	description: 'capital u, umlaut mark'
	},
	{
	char: 'Ý',
	entity: 'Ý',
	description: 'capital y, acute accent'
	},
	{
	char: 'Þ',
	entity: 'Þ',
	description: 'capital THORN, Icelandic'
	},
	{
	char: 'ß',
	entity: 'ß',
	description: 'small sharp s, German'
	},
	{
	char: 'à',
	entity: 'à',
	description: 'small a, grave accent'
	},
	{
	char: 'á',
	entity: 'á',
	description: 'small a, acute accent'
	},
	{
	char: 'â',
	entity: 'â',
	description: 'small a, circumflex accent'
	},
	{
	char: 'ã',
	entity: 'ã',
	description: 'small a, tilde'
	},
	{
	char: 'ä',
	entity: 'ä',
	description: 'small a, umlaut mark'
	},
	{
	char: 'å',
	entity: 'å',
	description: 'small a, ring'
	},
	{
	char: 'æ',
	entity: 'æ',
	description: 'small ae'
	},
	{
	char: 'ç',
	entity: 'ç',
	description: 'small c, cedilla'
	},
	{
	char: 'è',
	entity: 'è',
	description: 'small e, grave accent'
	},
	{
	char: 'é',
	entity: 'é',
	description: 'small e, acute accent'
	},
	{
	char: 'ê',
	entity: 'ê',
	description: 'small e, circumflex accent'
	},
	{
	char: 'ë',
	entity: 'ë',
	description: 'small e, umlaut mark'
	},
	{
	char: 'ì',
	entity: 'ì',
	description: 'small i, grave accent'
	},
	{
	char: 'í',
	entity: 'í',
	description: 'small i, acute accent'
	},
	{
	char: 'î',
	entity: 'î',
	description: 'small i, circumflex accent'
	},
	{
	char: 'ï',
	entity: 'ï',
	description: 'small i, umlaut mark'
	},
	{
	char: 'ð',
	entity: 'ð',
	description: 'small eth, Icelandic'
	},
	{
	char: 'ñ',
	entity: 'ñ',
	description: 'small n, tilde'
	},
	{
	char: 'ò',
	entity: 'ò',
	description: 'small o, grave accent'
	},
	{
	char: 'ó',
	entity: 'ó',
	description: 'small o, acute accent'
	},
	{
	char: 'ô',
	entity: 'ô',
	description: 'small o, circumflex accent'
	},
	{
	char: 'õ',
	entity: 'õ',
	description: 'small o, tilde'
	},
	{
	char: 'ö',
	entity: 'ö',
	description: 'small o, umlaut mark'
	},
	{
	char: 'ø',
	entity: 'ø',
	description: 'small o, slash'
	},
	{
	char: 'ù',
	entity: 'ù',
	description: 'small u, grave accent'
	},
	{
	char: 'ú',
	entity: 'ú',
	description: 'small u, acute accent'
	},
	{
	char: 'û',
	entity: 'û',
	description: 'small u, circumflex accent'
	},
	{
	char: 'ü',
	entity: 'ü',
	description: 'small u, umlaut mark'
	},
	{
	char: 'ý',
	entity: 'ý',
	description: 'small y, acute accent'
	},
	{
	char: 'þ',
	entity: 'þ',
	description: 'small thorn, Icelandic'
	},
	{
	char: 'ÿ',
	entity: 'ÿ',
	description: 'small y, umlaut mark'
	}
	];

	//----------------------------------------------------------------------------//

	type TEnDecodeFunction = (value: string) => string;
	type TEnDecodeReplacerFunction = (substring: string, ...args: any[]) => string;

	let charToEntityRegex: RegExp;
	let entityToCharRegex: RegExp;
	let charToEntityMap: Map<string, string>;
	let entityToCharMap: Map<string, string>;

	(function init() {
	charToEntityMap = new Map();
	entityToCharMap = new Map();

	const charKeys: string[] = [];
	const entityKeys: string[] = [];

	HTML_ENTITIES.forEach(def => {
	const { char, entity } = def;
	entityToCharMap.set(entity, char);
	entityKeys.push(entity);

	charToEntityMap.set(char, entity);
	charKeys.push(char);
	});

	entityToCharMap.set(HTML_ENTITY_SPACE.entity, HTML_ENTITY_SPACE.char);
	entityKeys.push(HTML_ENTITY_SPACE.entity);

	charToEntityRegex = new RegExp('(' + charKeys.join('\|') + ')', 'g');
	entityToCharRegex = new RegExp('(' + entityKeys.join('\|') + '\|&#[0-9]{1,5};' + ')', 'g');
	})();

	const decodeReplacer: TEnDecodeReplacerFunction = substring => {
	return substring && entityToCharMap.has(substring)
	? entityToCharMap.get(substring) \|\| ''
	: String.fromCharCode(parseInt(substring.substr(2), 10));
	};

	const encodeReplacer: TEnDecodeReplacerFunction = substring => {
	return substring && charToEntityMap.has(substring) ? charToEntityMap.get(substring) \|\| '' : '';
	};

	export const htmlEntitiesDecode: TEnDecodeFunction = value =>
	!value ? value : String(value).replace(entityToCharRegex, decodeReplacer);

	export const htmlEntitiesEncode: TEnDecodeFunction = value =>
	!value ? value : String(value).replace(charToEntityRegex, encodeReplacer);