Skip to content

Instantly share code, notes, and snippets.

@atifaziz
Created September 23, 2008 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atifaziz/12423 to your computer and use it in GitHub Desktop.
Save atifaziz/12423 to your computer and use it in GitHub Desktop.
HTML encoder using named entities
#region Imports
using System;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
#endregion
public static class HtmlEncoder
{
private readonly static string[] _entityNames;
private static readonly Regex _entityExpression = new Regex(
@"&\#(?:(?:x([0-9a-f]{1,4}))|([0-9]{1,5}));",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
/// <summary>
/// Replaces as many numbered entitiy references in the input HTML
/// string to their named counterparts where possible. For
/// example, &amp;#252; (U+00FC) will be coverted to &amp;uuml;.
/// </summary>
/// <remarks>
/// If a numbered entity reference has no name then it is left as
/// in the source string. Numbered entity reference for simple
/// whitespace characters like TAB (U+0009), CR (U+000A) and
/// LF (U+000D) are converted to native character.
/// </remarks>
public static string NameNumberedEntities(string html)
{
var sb = NameNumberedEntitiesImpl(html, null);
return sb != null ? sb.ToString() : html;
}
/// <summary>
/// Replaces as many numbered entitiy references in the input HTML
/// string to their named counterparts where possible. For
/// example, &amp;#252; (U+00FC) will be coverted to &amp;uuml;.
/// </summary>
/// <remarks>
/// If a numbered entity reference has no name then it is left as
/// in the source string. Numbered entity reference for simple
/// whitespace characters like TAB (U+0009), CR (U+000A) and
/// LF (U+000D) are converted to native character.
/// </remarks>
public static void NameNumberedEntitiesTo(string html, StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException("sb");
NameNumberedEntitiesImpl(html, sb);
}
private static StringBuilder NameNumberedEntitiesImpl(string html, StringBuilder sb)
{
var start = 0;
foreach (Match match in _entityExpression.Matches(html))
{
var hex = match.Groups[1];
var dec = match.Groups[2];
var ch = (char)int.Parse(
hex.Success ? hex.Value : dec.Value,
hex.Success ? NumberStyles.AllowHexSpecifier : NumberStyles.None,
NumberFormatInfo.InvariantInfo);
char? chch = null;
string name = null;
if (ch == '\t' || ch == '\n' || ch == '\r' || (
ch >= 0x20 && ch < 0x80
&& ch != '"' && ch != '&'
&& ch != '<' && ch != '>'))
{
chch = ch;
}
else
{
name = ch < _entityNames.Length ? _entityNames[ch] : match.Value;
}
if (sb == null)
sb = new StringBuilder(html.Length + 10);
var index = match.Index;
sb.Append(html, start, index - start);
start = index + match.Length;
if (chch == null)
sb.Append(name);
else
sb.Append(chch);
}
if (sb == null)
return null;
if (start < html.Length)
sb.Append(html, start, html.Length - start);
return sb;
}
/// <summary>
/// HTML-encodes the input string. It behaves similar to
/// <a href="http://msdn.microsoft.com/en-us/library/system.web.httputility.htmlencode.aspx">HttpUtility.HtmlEncode</a>
/// except it uses named entity references wherever possible. For
/// example, a small letter U with umlaut (U+00FC) in the source
/// string will be encoded as &uuml; instead of &#252;.
/// </summary>
public static string EncodeNamed(string input)
{
return EncodeNamed(input, false);
}
/// <summary>
/// HTML-encodes the input string. It behaves similar to
/// <a href="http://msdn.microsoft.com/en-us/library/system.web.httputility.htmlencode.aspx">HttpUtility.HtmlEncode</a>
/// except it uses named entity references wherever possible. For
/// example, a small letter U with umlaut (U+00FC) in the source
/// string will be encoded as &uuml; instead of &#252;.
/// </summary>
public static string EncodeNamed(string input, bool escaped)
{
var sb = EncodeNamedImpl(input, escaped, null);
return sb != null ? sb.ToString() : input;
}
/// <summary>
/// HTML-encodes the input string. It behaves similar to
/// <a href="http://msdn.microsoft.com/en-us/library/system.web.httputility.htmlencode.aspx">HttpUtility.HtmlEncode</a>
/// except it uses named entity references wherever possible. For
/// example, a small letter U with umlaut (U+00FC) in the source
/// string will be encoded as &uuml; instead of &#252;.
/// </summary>
public static void EncodeNamedTo(string input, bool escaped, StringBuilder sb)
{
if (sb == null) throw new ArgumentNullException("sb");
EncodeNamedImpl(input, escaped, sb);
}
private static StringBuilder EncodeNamedImpl(string input, bool escaped, StringBuilder sb)
{
if (input == null)
return null;
var start = 0;
for (var index = 0; index < input.Length; index++)
{
var ch = input[index];
if (ch == '\t' || ch == '\n' || ch == '\r'
|| (ch >= 0x20 && ch < 0x80
&& (escaped
|| (ch != '"' && ch != '&'
&& ch != '<' && ch != '>'))))
{
continue;
}
if (sb == null)
sb = new StringBuilder(input.Length + 10);
sb.Append(input, start, index - start);
start = index + 1;
var name = ch < _entityNames.Length ? _entityNames[ch] : null;
if (name != null)
{
sb.Append(name);
}
else
{
sb.Append("&#")
.Append(((int)ch).ToString(NumberFormatInfo.InvariantInfo))
.Append(';');
}
}
if (sb == null)
return null;
if (start < input.Length)
sb.Append(input, start, input.Length - start);
return sb;
}
static HtmlEncoder()
{
var e = new string[9831]; // Approx. 38.4K on a 32-bit machine
e[34] = "&quot;"; // quot = quotation mark = APL quote, U+0022 ISOnum
e[38] = "&amp;"; // amp = ampersand, U+0026 ISOnum
e[60] = "&lt;"; // lt = less-than sign, U+003C ISOnum
e[62] = "&gt;"; // gt = greater-than sign, U+003E ISOnum
// Latin-1 character entities
e[160] = "&nbsp;"; // nbsp = no-break space = non-breaking space, U+00A0 ISOnum
e[161] = "&iexcl;"; // iexcl = inverted exclamation mark, U+00A1 ISOnum
e[162] = "&cent;"; // cent = cent sign, U+00A2 ISOnum
e[163] = "&pound;"; // pound = pound sign, U+00A3 ISOnum
e[164] = "&curren;"; // curren = currency sign, U+00A4 ISOnum
e[165] = "&yen;"; // yen = yen sign = yuan sign, U+00A5 ISOnum
e[166] = "&brvbar;"; // brvbar = broken bar = broken vertical bar, U+00A6 ISOnum
e[167] = "&sect;"; // sect = section sign, U+00A7 ISOnum
e[168] = "&uml;"; // uml = diaeresis = spacing diaeresis, U+00A8 ISOdia
e[169] = "&copy;"; // copy = copyright sign, U+00A9 ISOnum
e[170] = "&ordf;"; // ordf = feminine ordinal indicator, U+00AA ISOnum
e[171] = "&laquo;"; // laquo = left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
e[172] = "&not;"; // not = not sign, U+00AC ISOnum
e[173] = "&shy;"; // shy = soft hyphen = discretionary hyphen, U+00AD ISOnum
e[174] = "&reg;"; // reg = registered sign = registered trade mark sign, U+00AE ISOnum
e[175] = "&macr;"; // macr = macron = spacing macron = overline = APL overbar, U+00AF ISOdia
e[176] = "&deg;"; // deg = degree sign, U+00B0 ISOnum
e[177] = "&plusmn;"; // plusmn = plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
e[178] = "&sup2;"; // sup2 = superscript two = superscript digit two = squared, U+00B2 ISOnum
e[179] = "&sup3;"; // sup3 = superscript three = superscript digit three = cubed, U+00B3 ISOnum
e[180] = "&acute;"; // acute = acute accent = spacing acute, U+00B4 ISOdia
e[181] = "&micro;"; // micro = micro sign, U+00B5 ISOnum
e[182] = "&para;"; // para = pilcrow sign = paragraph sign, U+00B6 ISOnum
e[183] = "&middot;"; // middot = middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
e[184] = "&cedil;"; // cedil = cedilla = spacing cedilla, U+00B8 ISOdia
e[185] = "&sup1;"; // sup1 = superscript one = superscript digit one, U+00B9 ISOnum
e[186] = "&ordm;"; // ordm = masculine ordinal indicator, U+00BA ISOnum
e[187] = "&raquo;"; // raquo = right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
e[188] = "&frac14;"; // frac14 = vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
e[189] = "&frac12;"; // frac12 = vulgar fraction one half = fraction one half, U+00BD ISOnum
e[190] = "&frac34;"; // frac34 = vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
e[191] = "&iquest;"; // iquest = inverted question mark = turned question mark, U+00BF ISOnum
e[192] = "&Agrave;"; // Agrave = latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
e[193] = "&Aacute;"; // Aacute = latin capital letter A with acute, U+00C1 ISOlat1
e[194] = "&Acirc;"; // Acirc = latin capital letter A with circumflex, U+00C2 ISOlat1
e[195] = "&Atilde;"; // Atilde = latin capital letter A with tilde, U+00C3 ISOlat1
e[196] = "&Auml;"; // Auml = latin capital letter A with diaeresis, U+00C4 ISOlat1
e[197] = "&Aring;"; // Aring = latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
e[198] = "&AElig;"; // AElig = latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
e[199] = "&Ccedil;"; // Ccedil = latin capital letter C with cedilla, U+00C7 ISOlat1
e[200] = "&Egrave;"; // Egrave = latin capital letter E with grave, U+00C8 ISOlat1
e[201] = "&Eacute;"; // Eacute = latin capital letter E with acute, U+00C9 ISOlat1
e[202] = "&Ecirc;"; // Ecirc = latin capital letter E with circumflex, U+00CA ISOlat1
e[203] = "&Euml;"; // Euml = latin capital letter E with diaeresis, U+00CB ISOlat1
e[204] = "&Igrave;"; // Igrave = latin capital letter I with grave, U+00CC ISOlat1
e[205] = "&Iacute;"; // Iacute = latin capital letter I with acute, U+00CD ISOlat1
e[206] = "&Icirc;"; // Icirc = latin capital letter I with circumflex, U+00CE ISOlat1
e[207] = "&Iuml;"; // Iuml = latin capital letter I with diaeresis, U+00CF ISOlat1
e[208] = "&ETH;"; // ETH = latin capital letter ETH, U+00D0 ISOlat1
e[209] = "&Ntilde;"; // Ntilde = latin capital letter N with tilde, U+00D1 ISOlat1
e[210] = "&Ograve;"; // Ograve = latin capital letter O with grave, U+00D2 ISOlat1
e[211] = "&Oacute;"; // Oacute = latin capital letter O with acute, U+00D3 ISOlat1
e[212] = "&Ocirc;"; // Ocirc = latin capital letter O with circumflex, U+00D4 ISOlat1
e[213] = "&Otilde;"; // Otilde = latin capital letter O with tilde, U+00D5 ISOlat1
e[214] = "&Ouml;"; // Ouml = latin capital letter O with diaeresis, U+00D6 ISOlat1
e[215] = "&times;"; // times = multiplication sign, U+00D7 ISOnum
e[216] = "&Oslash;"; // Oslash = latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
e[217] = "&Ugrave;"; // Ugrave = latin capital letter U with grave, U+00D9 ISOlat1
e[218] = "&Uacute;"; // Uacute = latin capital letter U with acute, U+00DA ISOlat1
e[219] = "&Ucirc;"; // Ucirc = latin capital letter U with circumflex, U+00DB ISOlat1
e[220] = "&Uuml;"; // Uuml = latin capital letter U with diaeresis, U+00DC ISOlat1
e[221] = "&Yacute;"; // Yacute = latin capital letter Y with acute, U+00DD ISOlat1
e[222] = "&THORN;"; // THORN = latin capital letter THORN, U+00DE ISOlat1
e[223] = "&szlig;"; // szlig = latin small letter sharp s = ess-zed, U+00DF ISOlat1
e[224] = "&agrave;"; // agrave = latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
e[225] = "&aacute;"; // aacute = latin small letter a with acute, U+00E1 ISOlat1
e[226] = "&acirc;"; // acirc = latin small letter a with circumflex, U+00E2 ISOlat1
e[227] = "&atilde;"; // atilde = latin small letter a with tilde, U+00E3 ISOlat1
e[228] = "&auml;"; // auml = latin small letter a with diaeresis, U+00E4 ISOlat1
e[229] = "&aring;"; // aring = latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
e[230] = "&aelig;"; // aelig = latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
e[231] = "&ccedil;"; // ccedil = latin small letter c with cedilla, U+00E7 ISOlat1
e[232] = "&egrave;"; // egrave = latin small letter e with grave, U+00E8 ISOlat1
e[233] = "&eacute;"; // eacute = latin small letter e with acute, U+00E9 ISOlat1
e[234] = "&ecirc;"; // ecirc = latin small letter e with circumflex, U+00EA ISOlat1
e[235] = "&euml;"; // euml = latin small letter e with diaeresis, U+00EB ISOlat1
e[236] = "&igrave;"; // igrave = latin small letter i with grave, U+00EC ISOlat1
e[237] = "&iacute;"; // iacute = latin small letter i with acute, U+00ED ISOlat1
e[238] = "&icirc;"; // icirc = latin small letter i with circumflex, U+00EE ISOlat1
e[239] = "&iuml;"; // iuml = latin small letter i with diaeresis, U+00EF ISOlat1
e[240] = "&eth;"; // eth = latin small letter eth, U+00F0 ISOlat1
e[241] = "&ntilde;"; // ntilde = latin small letter n with tilde, U+00F1 ISOlat1
e[242] = "&ograve;"; // ograve = latin small letter o with grave, U+00F2 ISOlat1
e[243] = "&oacute;"; // oacute = latin small letter o with acute, U+00F3 ISOlat1
e[244] = "&ocirc;"; // ocirc = latin small letter o with circumflex, U+00F4 ISOlat1
e[245] = "&otilde;"; // otilde = latin small letter o with tilde, U+00F5 ISOlat1
e[246] = "&ouml;"; // ouml = latin small letter o with diaeresis, U+00F6 ISOlat1
e[247] = "&divide;"; // divide = division sign, U+00F7 ISOnum
e[248] = "&oslash;"; // oslash = latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
e[249] = "&ugrave;"; // ugrave = latin small letter u with grave, U+00F9 ISOlat1
e[250] = "&uacute;"; // uacute = latin small letter u with acute, U+00FA ISOlat1
e[251] = "&ucirc;"; // ucirc = latin small letter u with circumflex, U+00FB ISOlat1
e[252] = "&uuml;"; // uuml = latin small letter u with diaeresis, U+00FC ISOlat1
e[253] = "&yacute;"; // yacute = latin small letter y with acute, U+00FD ISOlat1
e[254] = "&thorn;"; // thorn = latin small letter thorn, U+00FE ISOlat1
e[255] = "&yuml;"; // yuml = latin small letter y with diaeresis, U+00FF ISOlat1
// Extended Entities defined in HTML 4: Symbols
e[402] = "&fnof;"; // fnof = latin small f with hook = function = florin, U+0192 ISOtech
e[913] = "&Alpha;"; // Alpha = greek capital letter alpha, U+0391
e[914] = "&Beta;"; // Beta = greek capital letter beta, U+0392
e[915] = "&Gamma;"; // Gamma = greek capital letter gamma, U+0393 ISOgrk3
e[916] = "&Delta;"; // Delta = greek capital letter delta, U+0394 ISOgrk3
e[917] = "&Epsilon;"; // Epsilon = greek capital letter epsilon, U+0395
e[918] = "&Zeta;"; // Zeta = greek capital letter zeta, U+0396
e[919] = "&Eta;"; // Eta = greek capital letter eta, U+0397
e[920] = "&Theta;"; // Theta = greek capital letter theta, U+0398 ISOgrk3
e[921] = "&Iota;"; // Iota = greek capital letter iota, U+0399
e[922] = "&Kappa;"; // Kappa = greek capital letter kappa, U+039A
e[923] = "&Lambda;"; // Lambda = greek capital letter lambda, U+039B ISOgrk3
e[924] = "&Mu;"; // Mu = greek capital letter mu, U+039C
e[925] = "&Nu;"; // Nu = greek capital letter nu, U+039D
e[926] = "&Xi;"; // Xi = greek capital letter xi, U+039E ISOgrk3
e[927] = "&Omicron;"; // Omicron = greek capital letter omicron, U+039F
e[928] = "&Pi;"; // Pi = greek capital letter pi, U+03A0 ISOgrk3
e[929] = "&Rho;"; // Rho = greek capital letter rho, U+03A1
e[931] = "&Sigma;"; // Sigma = greek capital letter sigma, U+03A3 ISOgrk3
e[932] = "&Tau;"; // Tau = greek capital letter tau, U+03A4
e[933] = "&Upsilon;"; // Upsilon = greek capital letter upsilon, U+03A5 ISOgrk3
e[934] = "&Phi;"; // Phi = greek capital letter phi, U+03A6 ISOgrk3
e[935] = "&Chi;"; // Chi = greek capital letter chi, U+03A7
e[936] = "&Psi;"; // Psi = greek capital letter psi, U+03A8 ISOgrk3
e[937] = "&Omega;"; // Omega = greek capital letter omega, U+03A9 ISOgrk3
e[945] = "&alpha;"; // alpha = greek small letter alpha, U+03B1 ISOgrk3
e[946] = "&beta;"; // beta = greek small letter beta, U+03B2 ISOgrk3
e[947] = "&gamma;"; // gamma = greek small letter gamma, U+03B3 ISOgrk3
e[948] = "&delta;"; // delta = greek small letter delta, U+03B4 ISOgrk3
e[949] = "&epsilon;"; // epsilon = greek small letter epsilon, U+03B5 ISOgrk3
e[950] = "&zeta;"; // zeta = greek small letter zeta, U+03B6 ISOgrk3
e[951] = "&eta;"; // eta = greek small letter eta, U+03B7 ISOgrk3
e[952] = "&theta;"; // theta = greek small letter theta, U+03B8 ISOgrk3
e[953] = "&iota;"; // iota = greek small letter iota, U+03B9 ISOgrk3
e[954] = "&kappa;"; // kappa = greek small letter kappa, U+03BA ISOgrk3
e[955] = "&lambda;"; // lambda = greek small letter lambda, U+03BB ISOgrk3
e[956] = "&mu;"; // mu = greek small letter mu, U+03BC ISOgrk3
e[957] = "&nu;"; // nu = greek small letter nu, U+03BD ISOgrk3
e[958] = "&xi;"; // xi = greek small letter xi, U+03BE ISOgrk3
e[959] = "&omicron;"; // omicron = greek small letter omicron, U+03BF NEW
e[960] = "&pi;"; // pi = greek small letter pi, U+03C0 ISOgrk3
e[961] = "&rho;"; // rho = greek small letter rho, U+03C1 ISOgrk3
e[962] = "&sigmaf;"; // sigmaf = greek small letter final sigma, U+03C2 ISOgrk3
e[963] = "&sigma;"; // sigma = greek small letter sigma, U+03C3 ISOgrk3
e[964] = "&tau;"; // tau = greek small letter tau, U+03C4 ISOgrk3
e[965] = "&upsilon;"; // upsilon = greek small letter upsilon, U+03C5 ISOgrk3
e[966] = "&phi;"; // phi = greek small letter phi, U+03C6 ISOgrk3
e[967] = "&chi;"; // chi = greek small letter chi, U+03C7 ISOgrk3
e[968] = "&psi;"; // psi = greek small letter psi, U+03C8 ISOgrk3
e[969] = "&omega;"; // omega = greek small letter omega, U+03C9 ISOgrk3
e[977] = "&thetasym;"; // thetasym = greek small letter theta symbol, U+03D1 NEW
e[978] = "&upsih;"; // upsih = greek upsilon with hook symbol, U+03D2 NEW
e[982] = "&piv;"; // piv = greek pi symbol, U+03D6 ISOgrk3
e[8226] = "&bull;"; // bull = bullet = black small circle, U+2022 ISOpub
e[8230] = "&hellip;"; // hellip = horizontal ellipsis = three dot leader, U+2026 ISOpub
e[8242] = "&prime;"; // prime = prime = minutes = feet, U+2032 ISOtech
e[8243] = "&Prime;"; // Prime = double prime = seconds = inches, U+2033 ISOtech
e[8254] = "&oline;"; // oline = overline = spacing overscore, U+203E NEW
e[8260] = "&frasl;"; // frasl = fraction slash, U+2044 NEW
e[8472] = "&weierp;"; // weierp = script capital P = power set = Weierstrass p, U+2118 ISOamso
e[8465] = "&image;"; // image = blackletter capital I = imaginary part, U+2111 ISOamso
e[8476] = "&real;"; // real = blackletter capital R = real part symbol, U+211C ISOamso
e[8482] = "&trade;"; // trade = trade mark sign, U+2122 ISOnum
e[8501] = "&alefsym;"; // alefsym = alef symbol = first transfinite cardinal, U+2135 NEW
e[8592] = "&larr;"; // larr = leftwards arrow, U+2190 ISOnum
e[8593] = "&uarr;"; // uarr = upwards arrow, U+2191 ISOnum
e[8594] = "&rarr;"; // rarr = rightwards arrow, U+2192 ISOnum
e[8595] = "&darr;"; // darr = downwards arrow, U+2193 ISOnum
e[8596] = "&harr;"; // harr = left right arrow, U+2194 ISOamsa
e[8629] = "&crarr;"; // crarr = downwards arrow with corner leftwards = carriage return, U+21B5 NEW
e[8656] = "&lArr;"; // lArr = leftwards double arrow, U+21D0 ISOtech
e[8657] = "&uArr;"; // uArr = upwards double arrow, U+21D1 ISOamsa
e[8658] = "&rArr;"; // rArr = rightwards double arrow, U+21D2 ISOtech
e[8659] = "&dArr;"; // dArr = downwards double arrow, U+21D3 ISOamsa
e[8660] = "&hArr;"; // hArr = left right double arrow, U+21D4 ISOamsa
e[8704] = "&forall;"; // forall = for all, U+2200 ISOtech
e[8706] = "&part;"; // part = partial differential, U+2202 ISOtech
e[8707] = "&exist;"; // exist = there exists, U+2203 ISOtech
e[8709] = "&empty;"; // empty = empty set = null set = diameter, U+2205 ISOamso
e[8711] = "&nabla;"; // nabla = nabla = backward difference, U+2207 ISOtech
e[8712] = "&isin;"; // isin = element of, U+2208 ISOtech
e[8713] = "&notin;"; // notin = not an element of, U+2209 ISOtech
e[8715] = "&ni;"; // ni = contains as member, U+220B ISOtech
e[8719] = "&prod;"; // prod = n-ary product = product sign, U+220F ISOamsb
e[8721] = "&sum;"; // sum = n-ary sumation, U+2211 ISOamsb
e[8722] = "&minus;"; // minus = minus sign, U+2212 ISOtech
e[8727] = "&lowast;"; // lowast = asterisk operator, U+2217 ISOtech
e[8730] = "&radic;"; // radic = square root = radical sign, U+221A ISOtech
e[8733] = "&prop;"; // prop = proportional to, U+221D ISOtech
e[8734] = "&infin;"; // infin = infinity, U+221E ISOtech
e[8736] = "&ang;"; // ang = angle, U+2220 ISOamso
e[8743] = "&and;"; // and = logical and = wedge, U+2227 ISOtech
e[8744] = "&or;"; // or = logical or = vee, U+2228 ISOtech
e[8745] = "&cap;"; // cap = intersection = cap, U+2229 ISOtech
e[8746] = "&cup;"; // cup = union = cup, U+222A ISOtech
e[8747] = "&int;"; // int = integral, U+222B ISOtech
e[8756] = "&there4;"; // there4 = therefore, U+2234 ISOtech
e[8764] = "&sim;"; // sim = tilde operator = varies with = similar to, U+223C ISOtech
e[8773] = "&cong;"; // cong = approximately equal to, U+2245 ISOtech
e[8776] = "&asymp;"; // asymp = almost equal to = asymptotic to, U+2248 ISOamsr
e[8800] = "&ne;"; // ne = not equal to, U+2260 ISOtech
e[8801] = "&equiv;"; // equiv = identical to, U+2261 ISOtech
e[8804] = "&le;"; // le = less-than or equal to, U+2264 ISOtech
e[8805] = "&ge;"; // ge = greater-than or equal to, U+2265 ISOtech
e[8834] = "&sub;"; // sub = subset of, U+2282 ISOtech
e[8835] = "&sup;"; // sup = superset of, U+2283 ISOtech
e[8836] = "&nsub;"; // nsub = not a subset of, U+2284 ISOamsn
e[8838] = "&sube;"; // sube = subset of or equal to, U+2286 ISOtech
e[8839] = "&supe;"; // supe = superset of or equal to, U+2287 ISOtech
e[8853] = "&oplus;"; // oplus = circled plus = direct sum, U+2295 ISOamsb
e[8855] = "&otimes;"; // otimes = circled times = vector product, U+2297 ISOamsb
e[8869] = "&perp;"; // perp = up tack = orthogonal to = perpendicular, U+22A5 ISOtech
e[8901] = "&sdot;"; // sdot = dot operator, U+22C5 ISOamsb
e[8968] = "&lceil;"; // lceil = left ceiling = apl upstile, U+2308 ISOamsc
e[8969] = "&rceil;"; // rceil = right ceiling, U+2309 ISOamsc
e[8970] = "&lfloor;"; // lfloor = left floor = apl downstile, U+230A ISOamsc
e[8971] = "&rfloor;"; // rfloor = right floor, U+230B ISOamsc
e[9001] = "&lang;"; // lang = left-pointing angle bracket = bra, U+2329 ISOtech
e[9002] = "&rang;"; // rang = right-pointing angle bracket = ket, U+232A ISOtech
e[9674] = "&loz;"; // loz = lozenge, U+25CA ISOpub
e[9824] = "&spades;"; // spades = black spade suit, U+2660 ISOpub
e[9827] = "&clubs;"; // clubs = black club suit = shamrock, U+2663 ISOpub
e[9829] = "&hearts;"; // hearts = black heart suit = valentine, U+2665 ISOpub
e[9830] = "&diams;"; // diams = black diamond suit, U+2666 ISOpub
// Extended Entities defined in HTML 4: Special
e[338] = "&OElig;"; // OElig = latin capital ligature OE, U+0152 ISOlat2
e[339] = "&oelig;"; // oelig = latin small ligature oe, U+0153 ISOlat2
e[352] = "&Scaron;"; // Scaron = latin capital letter S with caron, U+0160 ISOlat2
e[353] = "&scaron;"; // scaron = latin small letter s with caron, U+0161 ISOlat2
e[376] = "&Yuml;"; // Yuml = latin capital letter Y with diaeresis, U+0178 ISOlat2
e[710] = "&circ;"; // circ = modifier letter circumflex accent, U+02C6 ISOpub
e[732] = "&tilde;"; // tilde = small tilde, U+02DC ISOdia
e[8194] = "&ensp;"; // ensp = en space, U+2002 ISOpub
e[8195] = "&emsp;"; // emsp = em space, U+2003 ISOpub
e[8201] = "&thinsp;"; // thinsp = thin space, U+2009 ISOpub
e[8204] = "&zwnj;"; // zwnj = zero width non-joiner, U+200C NEW RFC 2070
e[8205] = "&zwj;"; // zwj = zero width joiner, U+200D NEW RFC 2070
e[8206] = "&lrm;"; // lrm = left-to-right mark, U+200E NEW RFC 2070
e[8207] = "&rlm;"; // rlm = right-to-left mark, U+200F NEW RFC 2070
e[8211] = "&ndash;"; // ndash = en dash, U+2013 ISOpub
e[8212] = "&mdash;"; // mdash = em dash, U+2014 ISOpub
e[8216] = "&lsquo;"; // lsquo = left single quotation mark, U+2018 ISOnum
e[8217] = "&rsquo;"; // rsquo = right single quotation mark, U+2019 ISOnum
e[8218] = "&sbquo;"; // sbquo = single low-9 quotation mark, U+201A NEW
e[8220] = "&ldquo;"; // ldquo = left double quotation mark, U+201C ISOnum
e[8221] = "&rdquo;"; // rdquo = right double quotation mark, U+201D ISOnum
e[8222] = "&bdquo;"; // bdquo = double low-9 quotation mark, U+201E NEW
e[8224] = "&dagger;"; // dagger = dagger, U+2020 ISOpub
e[8225] = "&Dagger;"; // Dagger = double dagger, U+2021 ISOpub
e[8240] = "&permil;"; // permil = per mille sign, U+2030 ISOtech
e[8249] = "&lsaquo;"; // lsaquo = single left-pointing angle quotation mark, U+2039 ISO proposed
e[8250] = "&rsaquo;"; // rsaquo = single right-pointing angle quotation mark, U+203A ISO proposed
e[8364] = "&euro;"; // euro = euro sign, U+20AC NEW
_entityNames = e;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment