Skip to content

Instantly share code, notes, and snippets.

@Seddryck
Created March 28, 2018 04:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Seddryck/36d24e05c233cccf90e6a88a1af39c68 to your computer and use it in GitHub Desktop.
Save Seddryck/36d24e05c233cccf90e6a88a1af39c68 to your computer and use it in GitHub Desktop.
Function to translate en html entity (such as ♣ or À) to the equivalent character
shared Character.FromHtmlEntity = (entity as text) =>
let
list =
{
[Name="Agrave", Number=192]
, [Name="Aacute", Number=193]
, [Name="Acirc", Number=194]
, [Name="Atilde", Number=195]
, [Name="Auml", Number=196]
, [Name="Aring", Number=197]
, [Name="AElig", Number=198]
, [Name="Ccedil", Number=199]
, [Name="Egrave", Number=200]
, [Name="Eacute", Number=201]
, [Name="Ecirc", Number=202]
, [Name="Euml", Number=203]
, [Name="Igrave", Number=204]
, [Name="Iacute", Number=205]
, [Name="Icirc", Number=206]
, [Name="Iuml", Number=207]
, [Name="ETH", Number=208]
, [Name="Ntilde", Number=209]
, [Name="Ograve", Number=210]
, [Name="Oacute", Number=211]
, [Name="Ocirc", Number=212]
, [Name="Otilde", Number=213]
, [Name="Ouml", Number=214]
, [Name="Oslash", Number=216]
, [Name="Ugrave", Number=217]
, [Name="Uacute", Number=218]
, [Name="Ucirc", Number=219]
, [Name="Uuml", Number=220]
, [Name="Yacute", Number=221]
, [Name="THORN", Number=222]
, [Name="szlig", Number=223]
, [Name="agrave", Number=224]
, [Name="aacute", Number=225]
, [Name="acirc", Number=226]
, [Name="atilde", Number=227]
, [Name="auml", Number=228]
, [Name="aring", Number=229]
, [Name="aelig", Number=230]
, [Name="ccedil", Number=231]
, [Name="egrave", Number=232]
, [Name="eacute", Number=233]
, [Name="ecirc", Number=234]
, [Name="euml", Number=235]
, [Name="igrave", Number=236]
, [Name="iacute", Number=237]
, [Name="icirc", Number=238]
, [Name="iuml", Number=239]
, [Name="eth", Number=240]
, [Name="ntilde", Number=241]
, [Name="ograve", Number=242]
, [Name="oacute", Number=243]
, [Name="ocirc", Number=244]
, [Name="otilde", Number=245]
, [Name="ouml", Number=246]
, [Name="oslash", Number=248]
, [Name="ugrave", Number=249]
, [Name="uacute", Number=250]
, [Name="ucirc", Number=251]
, [Name="uuml", Number=252]
, [Name="yacute", Number=253]
, [Name="thorn", Number=254]
, [Name="yuml", Number=255]
, [Name="nbsp", Number=160]
, [Name="iexcl", Number=161]
, [Name="cent", Number=162]
, [Name="pound", Number=163]
, [Name="curren", Number=164]
, [Name="yen", Number=165]
, [Name="brvbar", Number=166]
, [Name="sect", Number=167]
, [Name="uml", Number=168]
, [Name="copy", Number=169]
, [Name="ordf", Number=170]
, [Name="laquo", Number=171]
, [Name="not", Number=172]
, [Name="shy", Number=173]
, [Name="reg", Number=174]
, [Name="macr", Number=175]
, [Name="deg", Number=176]
, [Name="plusmn", Number=177]
, [Name="sup2", Number=178]
, [Name="sup3", Number=179]
, [Name="acute", Number=180]
, [Name="micro", Number=181]
, [Name="para", Number=182]
, [Name="cedil", Number=184]
, [Name="sup1", Number=185]
, [Name="ordm", Number=186]
, [Name="raquo", Number=187]
, [Name="frac14", Number=188]
, [Name="frac12", Number=189]
, [Name="frac34", Number=190]
, [Name="iquest", Number=191]
, [Name="times", Number=215]
, [Name="divide", Number=247]
, [Name="forall", Number=8704]
, [Name="part", Number=8706]
, [Name="exist", Number=8707]
, [Name="empty", Number=8709]
, [Name="nabla", Number=8711]
, [Name="isin", Number=8712]
, [Name="notin", Number=8713]
, [Name="ni", Number=8715]
, [Name="prod", Number=8719]
, [Name="sum", Number=8721]
, [Name="minus", Number=8722]
, [Name="lowast", Number=8727]
, [Name="radic", Number=8730]
, [Name="prop", Number=8733]
, [Name="infin", Number=8734]
, [Name="ang", Number=8736]
, [Name="and", Number=8743]
, [Name="or", Number=8744]
, [Name="cap", Number=8745]
, [Name="cup", Number=8746]
, [Name="int", Number=8747]
, [Name="there4", Number=8756]
, [Name="sim", Number=8764]
, [Name="cong", Number=8773]
, [Name="asymp", Number=8776]
, [Name="ne", Number=8800]
, [Name="equiv", Number=8801]
, [Name="le", Number=8804]
, [Name="ge", Number=8805]
, [Name="sub", Number=8834]
, [Name="sup", Number=8835]
, [Name="nsub", Number=8836]
, [Name="sube", Number=8838]
, [Name="supe", Number=8839]
, [Name="oplus", Number=8853]
, [Name="otimes", Number=8855]
, [Name="perp", Number=8869]
, [Name="sdot", Number=8901]
, [Name="Alpha", Number=913]
, [Name="Beta", Number=914]
, [Name="Gamma", Number=915]
, [Name="Delta", Number=916]
, [Name="Epsilon", Number=917]
, [Name="Zeta", Number=918]
, [Name="Eta", Number=919]
, [Name="Theta", Number=920]
, [Name="Iota", Number=921]
, [Name="Kappa", Number=922]
, [Name="Lambda", Number=923]
, [Name="Mu", Number=924]
, [Name="Nu", Number=925]
, [Name="Xi", Number=926]
, [Name="Omicron", Number=927]
, [Name="Pi", Number=928]
, [Name="Rho", Number=929]
, [Name="Sigma", Number=931]
, [Name="Tau", Number=932]
, [Name="Upsilon", Number=933]
, [Name="Phi", Number=934]
, [Name="Chi", Number=935]
, [Name="Psi", Number=936]
, [Name="Omega", Number=937]
, [Name="alpha", Number=945]
, [Name="beta", Number=946]
, [Name="gamma", Number=947]
, [Name="delta", Number=948]
, [Name="epsilon", Number=949]
, [Name="zeta", Number=950]
, [Name="eta", Number=951]
, [Name="theta", Number=952]
, [Name="iota", Number=953]
, [Name="kappa", Number=954]
, [Name="lambda", Number=955]
, [Name="mu", Number=956]
, [Name="nu", Number=957]
, [Name="xi", Number=958]
, [Name="omicron", Number=959]
, [Name="pi", Number=960]
, [Name="rho", Number=961]
, [Name="sigmaf", Number=962]
, [Name="sigma", Number=963]
, [Name="tau", Number=964]
, [Name="upsilon", Number=965]
, [Name="phi", Number=966]
, [Name="chi", Number=967]
, [Name="psi", Number=968]
, [Name="omega", Number=969]
, [Name="thetasym", Number=977]
, [Name="upsih", Number=978]
, [Name="piv", Number=982]
, [Name="OElig", Number=338]
, [Name="oelig", Number=339]
, [Name="Scaron", Number=352]
, [Name="scaron", Number=353]
, [Name="Yuml", Number=376]
, [Name="fnof", Number=402]
, [Name="circ", Number=710]
, [Name="tilde", Number=732]
, [Name="ensp", Number=8194]
, [Name="emsp", Number=8195]
, [Name="thinsp", Number=8201]
, [Name="zwnj", Number=8204]
, [Name="zwj", Number=8205]
, [Name="lrm", Number=8206]
, [Name="rlm", Number=8207]
, [Name="ndash", Number=8211]
, [Name="mdash", Number=8212]
, [Name="lsquo", Number=8216]
, [Name="rsquo", Number=8217]
, [Name="sbquo", Number=8218]
, [Name="ldquo", Number=8220]
, [Name="rdquo", Number=8221]
, [Name="bdquo", Number=8222]
, [Name="dagger", Number=8224]
, [Name="Dagger", Number=8225]
, [Name="bull", Number=8226]
, [Name="hellip", Number=8230]
, [Name="permil", Number=8240]
, [Name="prime", Number=8242]
, [Name="Prime", Number=8243]
, [Name="lsaquo", Number=8249]
, [Name="rsaquo", Number=8250]
, [Name="oline", Number=8254]
, [Name="euro", Number=8364]
, [Name="trade", Number=8482]
, [Name="larr", Number=8592]
, [Name="uarr", Number=8593]
, [Name="rarr", Number=8594]
, [Name="darr", Number=8595]
, [Name="harr", Number=8596]
, [Name="crarr", Number=8629]
, [Name="lceil", Number=8968]
, [Name="rceil", Number=8969]
, [Name="lfloor", Number=8970]
, [Name="rfloor", Number=8971]
, [Name="loz", Number=9674]
, [Name="spades", Number=9824]
, [Name="clubs", Number=9827]
, [Name="hearts", Number=9829]
, [Name="diams", Number=9830]
},
token = if Text.StartsWith(entity, "&") and Text.EndsWith(entity, ";") then substring else entity,
substring = Text.Range(entity, 1, Text.Length(entity)-2),
correctCasing = List.Select(list, each [Name]=token),
anyCasing = List.Select(list, each Text.Upper([Name])=Text.Upper(token)),
single = try List.Single(correctCasing)[Number] otherwise List.Single(anyCasing)[Number],
handleError = Error.Record("Invalid parameter", "Unable to translate the html entity &" & token & ";", if List.Count(anyCasing)=0 then "No match with the list of known entities." else "Multiple matches with the list of known entities. Check your the casing of your entity's name."),
char = try Character.FromNumber(single) otherwise handleError
in
char;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment