Skip to content

Instantly share code, notes, and snippets.

@tomerd
Created December 19, 2011 23:43
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomerd/1499453 to your computer and use it in GitHub Desktop.
Save tomerd/1499453 to your computer and use it in GitHub Desktop.
a less simple HtmlSanitizer
public final class LessSimpleHtmlSanitizer implements HtmlSanitizer
{
private static final String[] SIMPLE_TAGS = {"b", "em", "i", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "br", "ul", "ol", "li", "img"};
private static final String[] CONTAINER_TAGS = {"html", "body", "table", "tbody", "tfoot", "th", "tr", "td", "p", "div", "span", "pre"};
public SafeHtml sanitize(String html)
{
return sanitizeHtml(html);
}
public static SafeHtml sanitizeHtml(String html)
{
if (html == null) throw new NullPointerException("html is null");
return SafeHtmlUtils.fromTrustedString(sanitizeString(html));
}
private static String sanitizeString(String text)
{
if (null == text || 0 == text.length()) return null;
StringBuilder builder = new StringBuilder();
boolean firstSegment = true;
for (String segment : text.split("<", -1))
{
if (firstSegment)
{
firstSegment = false;
if (segment.length() > 0) builder.append(SafeHtmlUtils.htmlEscapeAllowEntities(segment));
continue;
}
int tagStart = ('/' == segment.charAt(0)) ? 1 : 0;
boolean endTag = 1 == tagStart;
int tagEnd = segment.indexOf('>');
boolean selfClosing = '/' == segment.charAt(tagEnd-1);
if (selfClosing) tagEnd--;
int attributesStart = segment.substring(tagStart, tagEnd).indexOf(' ');
String tag = segment.substring(tagStart, attributesStart > 0 ? attributesStart : tagEnd).toLowerCase();
// FIXME: need to sanitize attributes as well (onclick, etc can be exploited)
String attributes = attributesStart > 0 ? segment.substring(attributesStart+1, tagEnd) : null;
boolean valid = ArrayUtil.indexOf(SIMPLE_TAGS, tag) >= 0 || ArrayUtil.indexOf(CONTAINER_TAGS, tag) >= 0;
if (!valid)
{
// escape it
builder.append("<").append(SafeHtmlUtils.htmlEscapeAllowEntities(segment));
continue;
}
if (endTag)
{
// close tag
builder.append("</").append(tag).append('>');
}
else if (selfClosing)
{
// self closing tag
builder.append('<').append(tag);
if (null != attributes) builder.append(' ').append(attributes);
builder.append("/>");
String leftover = segment.substring(tagEnd + 2);
if (leftover.length() > 0) builder.append(SafeHtmlUtils.htmlEscapeAllowEntities(leftover));
}
else
{
// continue building
builder.append('<').append(tag);
if (null != attributes) builder.append(' ').append(attributes);
builder.append('>');
String content = segment.substring(tagEnd + 1);
if (content.length() > 0) builder.append(sanitizeString(content));
}
}
return builder.toString();
}
private LessSimpleHtmlSanitizer()
{
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment