Skip to content

Instantly share code, notes, and snippets.

@vadimii
Created May 28, 2012 17:06
Show Gist options
  • Save vadimii/2820112 to your computer and use it in GitHub Desktop.
Save vadimii/2820112 to your computer and use it in GitHub Desktop.
WSGI Sanitizer
import webapp2
import html5lib
from html5lib import sanitizer,treebuilders,treewalkers
from html5lib.serializer.htmlserializer import HTMLSerializer
class MainPage(webapp2.RequestHandler):
def post(self):
self.response.headers['Content-Type'] = 'text/plain'
TREE = 'lxml'
input_data = self.request.get('content')
parser = html5lib.HTMLParser(
tree=html5lib.treebuilders.getTreeBuilder(TREE),
tokenizer=sanitizer.HTMLSanitizer)
output_tree = parser.parse(input_data)
walker = treewalkers.getTreeWalker(TREE)
output_stream = walker(output_tree)
serializer = HTMLSerializer(
quote_attr_values=True,
omit_optional_tags=True)
output_data = serializer.render(output_stream)
self.response.out.write(output_data)
mappings = [('/', MainPage)]
app = webapp2.WSGIApplication(mappings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment