Skip to content

Instantly share code, notes, and snippets.

@jone
Created October 1, 2010 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jone/606214 to your computer and use it in GitHub Desktop.
Save jone/606214 to your computer and use it in GitHub Desktop.
from htmlentitydefs import name2codepoint as n2cp
from htmlentitydefs import codepoint2name as cp2n
import re
def decode_htmlentities(string):
"""
Decodes htmlentities or xmlentities
>>> decode_htmlentities('"X>Y"')
u'"X>Y"'
>>> decode_htmlentities('m&m')
u'm&m'
"""
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
def html2xmlentities(string):
"""
Converts htmlentities to xmlentities
>>> html2xmlentities('m&m')
'm&m'
"""
xpr = re.compile('&(\w{1,8});')
def substitute_entity(match):
ent = match.group(1)
if ent in n2cp.keys():
return '&#%i;' % n2cp[ent]
else:
return match.group(0)
return xpr.subn(substitute_entity, string)[0]
def xml2htmlentities(string):
"""
Converts xmlentities to htmlentities
>>> xml2htmlentities('m&m')
'm&m'
"""
xpr = re.compile('&#(\d{1,5});')
def substitute_entity(match):
ent = int(match.group(1))
if ent in cp2n.keys():
return '&%s;' % cp2n[ent]
else:
return match.group(0)
return xpr.subn(substitute_entity, string)[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment