Skip to content

Instantly share code, notes, and snippets.

@mikedeboer
Last active July 28, 2020 23:28
Show Gist options
  • Save mikedeboer/1aa7cd2bbcb8e0abc16a to your computer and use it in GitHub Desktop.
Save mikedeboer/1aa7cd2bbcb8e0abc16a to your computer and use it in GitHub Desktop.
encode/ decode XML entities
const xmlEntityMap = {
"quot": "34", "amp": "38", "apos": "39", "lt": "60", "gt": "62",
"nbsp": "160", "iexcl": "161", "cent": "162", "pound": "163", "curren": "164",
"yen": "165", "brvbar": "166", "sect": "167", "uml": "168", "copy": "169",
"ordf": "170", "laquo": "171", "not": "172", "shy": "173", "reg": "174",
"macr": "175", "deg": "176", "plusmn": "177", "sup2": "178", "sup3": "179",
"acute": "180", "micro": "181", "para": "182", "middot": "183", "cedil": "184",
"sup1": "185", "ordm": "186", "raquo": "187", "frac14": "188", "frac12": "189",
"frac34": "190", "iquest": "191", "agrave": ["192", "224"], "aacute": ["193", "225"],
"acirc": ["194", "226"], "atilde": ["195", "227"], "auml": ["196", "228"],
"aring": ["197", "229"], "aelig": ["198", "230"], "ccedil": ["199", "231"],
"egrave": ["200", "232"], "eacute": ["201", "233"], "ecirc": ["202", "234"],
"euml": ["203", "235"], "igrave": ["204", "236"], "iacute": ["205", "237"],
"icirc": ["206", "238"], "iuml": ["207", "239"], "eth": ["208", "240"],
"ntilde": ["209", "241"], "ograve": ["210", "242"], "oacute": ["211", "243"],
"ocirc": ["212", "244"], "otilde": ["213", "245"], "ouml": ["214", "246"],
"times": "215", "oslash": ["216", "248"], "ugrave": ["217", "249"],
"uacute": ["218", "250"], "ucirc": ["219", "251"], "uuml": ["220", "252"],
"yacute": ["221", "253"], "thorn": ["222", "254"], "szlig": "223", "divide": "247",
"yuml": ["255", "376"], "oelig": ["338", "339"], "scaron": ["352", "353"],
"fnof": "402", "circ": "710", "tilde": "732", "alpha": ["913", "945"],
"beta": ["914", "946"], "gamma": ["915", "947"], "delta": ["916", "948"],
"epsilon": ["917", "949"], "zeta": ["918", "950"], "eta": ["919", "951"],
"theta": ["920", "952"], "iota": ["921", "953"], "kappa": ["922", "954"],
"lambda": ["923", "955"], "mu": ["924", "956"], "nu": ["925", "957"],
"xi": ["926", "958"], "omicron": ["927", "959"], "pi": ["928", "960"],
"rho": ["929", "961"], "sigma": ["931", "963"], "tau": ["932", "964"],
"upsilon": ["933", "965"], "phi": ["934", "966"], "chi": ["935", "967"],
"psi": ["936", "968"], "omega": ["937", "969"], "sigmaf": "962", "thetasym": "977",
"upsih": "978", "piv": "982", "ensp": "8194", "emsp": "8195", "thinsp": "8201",
"zwnj": "8204", "zwj": "8205", "lrm": "8206", "rlm": "8207", "ndash": "8211",
"mdash": "8212", "lsquo": "8216", "rsquo": "8217", "sbquo": "8218", "ldquo": "8220",
"rdquo": "8221", "bdquo": "8222", "dagger": ["8224", "8225"], "bull": "8226",
"hellip": "8230", "permil": "8240", "prime": ["8242", "8243"], "lsaquo": "8249",
"rsaquo": "8250", "oline": "8254", "frasl": "8260", "euro": "8364",
"image": "8465", "weierp": "8472", "real": "8476", "trade": "8482",
"alefsym": "8501", "larr": ["8592", "8656"], "uarr": ["8593", "8657"],
"rarr": ["8594", "8658"], "darr": ["8595", "8659"], "harr": ["8596", "8660"],
"crarr": "8629", "forall": "8704", "part": "8706", "exist": "8707", "empty": "8709",
"nabla": "8711", "isin": "8712", "notin": "8713", "ni": "8715", "prod": "8719",
"sum": "8721", "minus": "8722", "lowast": "8727", "radic": "8730", "prop": "8733",
"infin": "8734", "ang": "8736", "and": "8743", "or": "8744", "cap": "8745",
"cup": "8746", "int": "8747", "there4": "8756", "sim": "8764", "cong": "8773",
"asymp": "8776", "ne": "8800", "equiv": "8801", "le": "8804", "ge": "8805",
"sub": "8834", "sup": "8835", "nsub": "8836", "sube": "8838", "supe": "8839",
"oplus": "8853", "otimes": "8855", "perp": "8869", "sdot": "8901", "lceil": "8968",
"rceil": "8969", "lfloor": "8970", "rfloor": "8971", "lang": "9001", "rang": "9002",
"loz": "9674", "spades": "9824", "clubs": "9827", "hearts": "9829", "diams": "9830"
};
/**
* Escapes "&", greater than, less than signs, quotation marks, and others into
* the proper XML entities.
*
* @param {String} str The XML string to escape.
* @param {Boolean} strictMode By default, this function attempts to NOT double-escape XML entities. This flag turns that behavior off when set to `true`.
* @return {String} The escaped string
*/
const escapeXML = function(str, strictMode) {
if (typeof str != "string")
return str;
str = (str || "").replace(strictMode ? /&/g : /&(?!#[0-9]{2,5};|[a-zA-Z]{2,};)/g, "&");
return str
.replace(/"/g, """)
.replace(/</g, "&#60;")
.replace(/>/g, "&#62;")
.replace(/'/g, "&#39;")
.replace(/&([a-zA-Z]+);/gi, function(a, m) {
let x = xmlEntityMap[m.toLowerCase()];
if (x)
return "&#" + (Array.isArray(x) ? x[0] : x) + ";";
return a;
});
};
/**
* Unescapes `"&#38;"` and other similar XML entities into HTML entities, and then replaces
* 'special' ones (`&apos;`, `&gt;`, `&lt;`, `&quot;`, `&amp;`) into characters
* (`'`, `>`, `<`, `"`, `&`).
*
* @param {String} str The XML string to unescape
* @return {String} The unescaped string
*/
const unescapeXML = function(str) {
if (typeof str != "string")
return str;
let map = this.xmlEntityMapReverse;
if (!map) {
map = this.xmlEntityMapReverse = {};
let origMap = xmlEntityMap;
let keys = Object.keys(origMap);
for (let val, j, l2, i = 0, l = keys.length; i < l; ++i) {
val = origMap[keys[i]];
if (Array.isArray(val)) {
for (j = 0, l2 = val.length; j < l2; ++j)
map[val[j]] = keys[i];
} else {
map[val] = keys[i];
}
}
}
return str
.replace(/&#([0-9]{2,5});/g, function(a, m) {
let x = map[m];
if (x)
return "&" + x + ";";
return a;
})
.replace(/&apos;/gi, "'")
.replace(/&gt;/gi, ">")
.replace(/&lt;/gi, "<")
.replace(/&quot;/gi, "\"")
.replace(/&amp;/gi, "&");
};
@Yash-Singh1
Copy link

Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment