Last active
August 29, 2015 14:09
-
-
Save virusdefender/3e4d7f5bcd76a7f86837 to your computer and use it in GitHub Desktop.
xss filter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
import re | |
from BeautifulSoup import BeautifulSoup | |
regex_cache = {} | |
def search(text, regex): | |
regexcmp = regex_cache.get(regex) | |
if not regexcmp: | |
regexcmp = re.compile(regex) | |
regex_cache[regex] = regexcmp | |
return regexcmp.search(text) | |
# XSS白名单 | |
VALID_TAGS = {'h1':{}, 'h2':{}, 'h3':{}, 'h4':{}, 'strong':{}, 'em':{}, | |
'p':{}, 'ul':{}, 'li':{}, 'br':{}, 'a':{'href':'^http://', 'title':'.*'}, | |
'img':{'src':'^http://', 'alt':'.*'}} | |
def parsehtml(html): | |
soup = BeautifulSoup(html) | |
for tag in soup.findAll(True): | |
if tag.name not in VALID_TAGS: | |
tag.hidden = True | |
else: | |
attr_rules = VALID_TAGS[tag.name] | |
print tag.attrs, len(tag.attrs) | |
for item in tag.attrs: | |
#print item | |
attr_name = item[0] | |
attr_value = item[1] | |
#print attr_name, attr_value | |
#检查属性类型 | |
if attr_name not in attr_rules: | |
del tag[attr_name] | |
print tag.attrs, "----" | |
#print "del", attr_name | |
continue | |
#检查属性值格式 | |
if not search(attr_value, attr_rules[attr_name]): | |
del tag[attr_name] | |
return soup.renderContents() | |
text = ''' | |
<a onmouseover=alert(document.cookie)>xxs link</a>dsd'/> | |
<IMG """><SCRIPT>alert("XSS")</SCRIPT>"> | |
<IMG SRC=/ onerror="alert(String.fromCharCode(88,83,83))"></img> | |
''' | |
print parsehtml(text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment