To filter invalid XML characters in python
#Based on an answer by John Machin on Stack Overflow (
def isValidXMLChar(char):
codepoint = ord(char)
return 0x20 <= codepoint <= 0xD7FF or \
codepoint in (0x9, 0xA, 0xD) or \
0xE000 <= codepoint <= 0xFFFD or \
0x10000 <= codepoint <= 0x10FFFF
def filterInvalidXMLChars(input):
return filter(isValidXMLChar, input)
