Skip to content

Instantly share code, notes, and snippets.

@powdahound
Created August 19, 2010 18:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save powdahound/538547 to your computer and use it in GitHub Desktop.
Save powdahound/538547 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import sys
from twisted.words.xish import domish
def handleStart(elem):
#print 'START: %r' % elem.toXml()
pass
def handleElement(elem):
#print 'ELEM: %r' % elem.toXml()
pass
def handleEnd():
#print 'END'
pass
def stream():
stream = domish.elementStream()
stream.DocumentStartEvent = handleStart
stream.ElementEvent = handleElement
stream.DocumentEndEvent = handleEnd
return stream
print 'HTML entities:'
entities = [
# XML defined
"&",
""",
"<",
">",
"'",
# Not XML defined
" ",
"‘",
"’",
"“",
"”",
"–",
"—",
"…"]
for entity in entities:
s = stream()
print "%20r -" % entity,
try:
s.parse('<entity>%s</entity>' % entity)
print "OK"
except Exception, e:
print "FAIL - %s" % e
print "\nUnicode chars:"
chars = [
"\x0a", # \n
"\x0b",
"\x0c",
"\x0d", # \r
"\x0e",
"\x0f",
"\xe2\x80\x93",
"\xe2\x80\x94",
"\xe2\x80\x98",
"\xe2\x80\x99",
"\xe2\x80\x9c",
"\xe2\x80\x9d",
"\xe2\x80\xa6",
"\x80\x9c",
"\x80\x9d",
]
for char in chars:
s = stream()
print "%20r (%s) -" % (char, char),
try:
s.parse('<char>%s</char>' % char)
print "OK"
except Exception, e:
print "FAIL - %s" % e
print "\nAll Unicode chars in UTF-8:"
for i in range(0, 20000):
s = stream()
char = int("0x%d" % i, 16)
char = unichr(char).encode('utf-8')
try:
s.parse('<char>%s</char>' % char)
#print "%r, %s - OK" % (char, hex(i))
except Exception, e:
print "%r, %s - FAIL: %s" % (char, i, e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment