Created
October 25, 2010 14:31
-
-
Save edvakf/645040 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// inspired by http://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/html-sanitizer.js | |
// instead of doing with text, only use DOM1 and DocumentFragment | |
var dom_html_sanitize; | |
(function(html4){ | |
// usage is similar to html_sanitize method of html-sanitizer.js | |
// but takes an html document and returns a DocumentFragment | |
// @param document a document that the output DocumentFragment belongs to (must implement createDocumentFragment method) | |
// @param htmlDoc an html document to sanitize (must implement DOM1) | |
dom_html_sanitize = function dom_html_sanitize(document, htmlDoc, opt_uriPolicy, opt_nmTokenPolicy) { | |
var df = document.createDocumentFragment(); | |
makeSanitizer(sanitizeAttribs)(htmlDoc, df); | |
return df; | |
// equivalent of html.makeHtmlSanitizer of html-sanitizer.js | |
function makeSanitizer(sanitizeAttributes) { | |
var current, stack, ignoring; | |
return makeWalker({ | |
startDoc : function(out) { | |
current = out; | |
stack = []; | |
ignoring = false; | |
}, | |
startElement : function(tagName, attribs, _) { | |
if (ignoring) { return; } | |
if (!html4.ELEMENTS.hasOwnProperty(tagName)) { return; } | |
var eflags = html4.ELEMENTS[tagName]; | |
if (eflags & html4.eflags.FOLDABLE) { | |
return; | |
} else if (eflags & html4.eflags.UNSAFE) { | |
ignoring = !(eflags & html4.eflags.EMPTY); | |
return; | |
} | |
attribs = sanitizeAttributes(tagName, attribs); | |
if (attribs) { | |
var element = document.createElement(tagName.toLowerCase()); | |
current.appendChild(element); | |
if (!(eflags & html4.eflags.EMPTY)) { | |
current = element; | |
stack.push(tagName); | |
} | |
for (var i = 0, n = attribs.length; i < n; i += 2) { | |
var attribName = attribs[i], | |
value = attribs[i + 1]; | |
if (value !== null && value !== void 0) { | |
element.setAttribute(attribName, value); | |
} | |
} | |
} | |
}, | |
endElement: function (tagName, _) { | |
if (ignoring) { | |
ignoring = false; | |
} | |
for (var index = stack.length; --index >= 0;) { | |
if (stack[index] === tagName) { break; } | |
} | |
if (index < 0) { return; } | |
for (var i = stack.length; --i > index;) { | |
current = current.parentNode; | |
} | |
stack.length = index; | |
current = current.parentNode; | |
}, | |
textNode: function (textContent, _) { | |
if (ignoring) { return; } | |
current.appendChild(document.createTextNode(textContent)); | |
}, | |
cDataSection: function (textContent, _) { | |
// TODO: append if the htmlDoc is an XHTML document | |
// do nothing | |
}, | |
commentNode: function (textContent, _) { | |
// do nothing | |
}, | |
endDoc: function (_) { | |
// do nothing | |
}, | |
}); | |
} | |
// sanitizeAttribs is taken from html-sanitizer.js | |
function sanitizeAttribs(tagName, attribs) { | |
for (var i = 0; i < attribs.length; i += 2) { | |
var attribName = attribs[i]; | |
var value = attribs[i + 1]; | |
var atype = null, attribKey; | |
if ((attribKey = tagName + '::' + attribName, | |
html4.ATTRIBS.hasOwnProperty(attribKey)) | |
|| (attribKey = '*::' + attribName, | |
html4.ATTRIBS.hasOwnProperty(attribKey))) { | |
atype = html4.ATTRIBS[attribKey]; | |
} | |
if (atype !== null) { | |
switch (atype) { | |
case html4.atype.NONE: break; | |
case html4.atype.SCRIPT: | |
case html4.atype.STYLE: | |
value = null; | |
break; | |
case html4.atype.ID: | |
case html4.atype.IDREF: | |
case html4.atype.IDREFS: | |
case html4.atype.GLOBAL_NAME: | |
case html4.atype.LOCAL_NAME: | |
case html4.atype.CLASSES: | |
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; | |
break; | |
case html4.atype.URI: | |
value = opt_uriPolicy && opt_uriPolicy(value); | |
break; | |
case html4.atype.URI_FRAGMENT: | |
if (value && '#' === value.charAt(0)) { | |
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; | |
if (value) { value = '#' + value; } | |
} else { | |
value = null; | |
} | |
break; | |
default: | |
value = null; | |
break; | |
} | |
} else { | |
value = null; | |
} | |
attribs[i + 1] = value; | |
} | |
return attribs; | |
} | |
}; | |
// equivalent of makeSaxParser of html-sanitizer.js | |
function makeWalker(handler) { | |
return function walker(doc, param) { | |
if (handler.startDoc) { handler.startDoc(param); } | |
subWalker(doc); | |
if (handler.endDoc) { handler.endDoc(param); } | |
}; | |
// recursive walker | |
function subWalker(root, param) { | |
var child = root.firstChild; | |
while(child) { | |
switch(child.nodeType) { | |
case 1: // element | |
var tagName = child.tagName.toLowerCase(); | |
var attrs = []; | |
var attributes = child.attributes; | |
for (var i = 0, l = attributes.length; i < l; i++) { | |
attrs.push(attributes[i].nodeName.toLowerCase(), attributes[i].nodeValue); | |
} | |
if (handler.startElement) { handler.startElement(tagName, attrs, param); } | |
subWalker(child, param); // recursion here! | |
if (handler.endElement) { handler.endElement(tagName, param); } | |
break; | |
case 3: // text | |
if (handler.textNode) { handler.textNode(child.nodeValue, param); } | |
break; | |
case 4: // cdata section | |
if (handler.cDataSection) { handler.cDataSection(child.nodeValue, param); } | |
break; | |
case 8: // comment | |
default: // should be nothing else | |
if (handler.commentNode) { handler.commentNode(child.nodeValue, param); } | |
break; | |
} | |
child = child.nextSibling; | |
} | |
} | |
} | |
}(html4)); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://code.google.com/p/khanacademy/source/browse/trunk/javascript/html4-defs.js | |
/* Copyright Google Inc. | |
* Licensed under the Apache Licence Version 2.0 | |
* Autogenerated at Fri Aug 13 11:26:55 PDT 2010 | |
* @provides html4 | |
*/ | |
var html4 = {}; | |
html4 .atype = { | |
'NONE': 0, | |
'URI': 1, | |
'URI_FRAGMENT': 11, | |
'SCRIPT': 2, | |
'STYLE': 3, | |
'ID': 4, | |
'IDREF': 5, | |
'IDREFS': 6, | |
'GLOBAL_NAME': 7, | |
'LOCAL_NAME': 8, | |
'CLASSES': 9, | |
'FRAME_TARGET': 10 | |
}; | |
html4 .ATTRIBS = { | |
'*::class': 9, | |
'*::dir': 0, | |
'*::id': 4, | |
'*::lang': 0, | |
'*::onclick': 2, | |
'*::ondblclick': 2, | |
'*::onkeydown': 2, | |
'*::onkeypress': 2, | |
'*::onkeyup': 2, | |
'*::onload': 2, | |
'*::onmousedown': 2, | |
'*::onmousemove': 2, | |
'*::onmouseout': 2, | |
'*::onmouseover': 2, | |
'*::onmouseup': 2, | |
'*::style': 3, | |
'*::title': 0, | |
'a::accesskey': 0, | |
'a::coords': 0, | |
'a::href': 1, | |
'a::hreflang': 0, | |
'a::name': 7, | |
'a::onblur': 2, | |
'a::onfocus': 2, | |
'a::rel': 0, | |
'a::rev': 0, | |
'a::shape': 0, | |
'a::tabindex': 0, | |
'a::target': 10, | |
'a::type': 0, | |
'area::accesskey': 0, | |
'area::alt': 0, | |
'area::coords': 0, | |
'area::href': 1, | |
'area::nohref': 0, | |
'area::onblur': 2, | |
'area::onfocus': 2, | |
'area::shape': 0, | |
'area::tabindex': 0, | |
'area::target': 10, | |
'bdo::dir': 0, | |
'blockquote::cite': 1, | |
'br::clear': 0, | |
'button::accesskey': 0, | |
'button::disabled': 0, | |
'button::name': 8, | |
'button::onblur': 2, | |
'button::onfocus': 2, | |
'button::tabindex': 0, | |
'button::type': 0, | |
'button::value': 0, | |
'caption::align': 0, | |
'col::align': 0, | |
'col::char': 0, | |
'col::charoff': 0, | |
'col::span': 0, | |
'col::valign': 0, | |
'col::width': 0, | |
'colgroup::align': 0, | |
'colgroup::char': 0, | |
'colgroup::charoff': 0, | |
'colgroup::span': 0, | |
'colgroup::valign': 0, | |
'colgroup::width': 0, | |
'del::cite': 1, | |
'del::datetime': 0, | |
'dir::compact': 0, | |
'div::align': 0, | |
'dl::compact': 0, | |
'font::color': 0, | |
'font::face': 0, | |
'font::size': 0, | |
'form::accept': 0, | |
'form::action': 1, | |
'form::autocomplete': 0, | |
'form::enctype': 0, | |
'form::method': 0, | |
'form::name': 7, | |
'form::onreset': 2, | |
'form::onsubmit': 2, | |
'form::target': 10, | |
'h1::align': 0, | |
'h2::align': 0, | |
'h3::align': 0, | |
'h4::align': 0, | |
'h5::align': 0, | |
'h6::align': 0, | |
'hr::align': 0, | |
'hr::noshade': 0, | |
'hr::size': 0, | |
'hr::width': 0, | |
'iframe::align': 0, | |
'iframe::frameborder': 0, | |
'iframe::height': 0, | |
'iframe::marginheight': 0, | |
'iframe::marginwidth': 0, | |
'iframe::width': 0, | |
'img::align': 0, | |
'img::alt': 0, | |
'img::border': 0, | |
'img::height': 0, | |
'img::hspace': 0, | |
'img::ismap': 0, | |
'img::name': 7, | |
'img::src': 1, | |
'img::usemap': 11, | |
'img::vspace': 0, | |
'img::width': 0, | |
'input::accept': 0, | |
'input::accesskey': 0, | |
'input::align': 0, | |
'input::alt': 0, | |
'input::autocomplete': 0, | |
'input::checked': 0, | |
'input::disabled': 0, | |
'input::ismap': 0, | |
'input::maxlength': 0, | |
'input::name': 8, | |
'input::onblur': 2, | |
'input::onchange': 2, | |
'input::onfocus': 2, | |
'input::onselect': 2, | |
'input::readonly': 0, | |
'input::size': 0, | |
'input::src': 1, | |
'input::tabindex': 0, | |
'input::type': 0, | |
'input::usemap': 11, | |
'input::value': 0, | |
'ins::cite': 1, | |
'ins::datetime': 0, | |
'label::accesskey': 0, | |
'label::for': 5, | |
'label::onblur': 2, | |
'label::onfocus': 2, | |
'legend::accesskey': 0, | |
'legend::align': 0, | |
'li::type': 0, | |
'li::value': 0, | |
'map::name': 7, | |
'menu::compact': 0, | |
'ol::compact': 0, | |
'ol::start': 0, | |
'ol::type': 0, | |
'optgroup::disabled': 0, | |
'optgroup::label': 0, | |
'option::disabled': 0, | |
'option::label': 0, | |
'option::selected': 0, | |
'option::value': 0, | |
'p::align': 0, | |
'pre::width': 0, | |
'q::cite': 1, | |
'select::disabled': 0, | |
'select::multiple': 0, | |
'select::name': 8, | |
'select::onblur': 2, | |
'select::onchange': 2, | |
'select::onfocus': 2, | |
'select::size': 0, | |
'select::tabindex': 0, | |
'table::align': 0, | |
'table::bgcolor': 0, | |
'table::border': 0, | |
'table::cellpadding': 0, | |
'table::cellspacing': 0, | |
'table::frame': 0, | |
'table::rules': 0, | |
'table::summary': 0, | |
'table::width': 0, | |
'tbody::align': 0, | |
'tbody::char': 0, | |
'tbody::charoff': 0, | |
'tbody::valign': 0, | |
'td::abbr': 0, | |
'td::align': 0, | |
'td::axis': 0, | |
'td::bgcolor': 0, | |
'td::char': 0, | |
'td::charoff': 0, | |
'td::colspan': 0, | |
'td::headers': 6, | |
'td::height': 0, | |
'td::nowrap': 0, | |
'td::rowspan': 0, | |
'td::scope': 0, | |
'td::valign': 0, | |
'td::width': 0, | |
'textarea::accesskey': 0, | |
'textarea::cols': 0, | |
'textarea::disabled': 0, | |
'textarea::name': 8, | |
'textarea::onblur': 2, | |
'textarea::onchange': 2, | |
'textarea::onfocus': 2, | |
'textarea::onselect': 2, | |
'textarea::readonly': 0, | |
'textarea::rows': 0, | |
'textarea::tabindex': 0, | |
'tfoot::align': 0, | |
'tfoot::char': 0, | |
'tfoot::charoff': 0, | |
'tfoot::valign': 0, | |
'th::abbr': 0, | |
'th::align': 0, | |
'th::axis': 0, | |
'th::bgcolor': 0, | |
'th::char': 0, | |
'th::charoff': 0, | |
'th::colspan': 0, | |
'th::headers': 6, | |
'th::height': 0, | |
'th::nowrap': 0, | |
'th::rowspan': 0, | |
'th::scope': 0, | |
'th::valign': 0, | |
'th::width': 0, | |
'thead::align': 0, | |
'thead::char': 0, | |
'thead::charoff': 0, | |
'thead::valign': 0, | |
'tr::align': 0, | |
'tr::bgcolor': 0, | |
'tr::char': 0, | |
'tr::charoff': 0, | |
'tr::valign': 0, | |
'ul::compact': 0, | |
'ul::type': 0 | |
}; | |
html4 .eflags = { | |
'OPTIONAL_ENDTAG': 1, | |
'EMPTY': 2, | |
'CDATA': 4, | |
'RCDATA': 8, | |
'UNSAFE': 16, | |
'FOLDABLE': 32, | |
'SCRIPT': 64, | |
'STYLE': 128 | |
}; | |
html4 .ELEMENTS = { | |
'a': 0, | |
'abbr': 0, | |
'acronym': 0, | |
'address': 0, | |
'applet': 16, | |
'area': 2, | |
'b': 0, | |
'base': 18, | |
'basefont': 18, | |
'bdo': 0, | |
'big': 0, | |
'blockquote': 0, | |
'body': 49, | |
'br': 2, | |
'button': 0, | |
'caption': 0, | |
'center': 0, | |
'cite': 0, | |
'code': 0, | |
'col': 2, | |
'colgroup': 1, | |
'dd': 1, | |
'del': 0, | |
'dfn': 0, | |
'dir': 0, | |
'div': 0, | |
'dl': 0, | |
'dt': 1, | |
'em': 0, | |
'fieldset': 0, | |
'font': 0, | |
'form': 0, | |
'frame': 18, | |
'frameset': 16, | |
'h1': 0, | |
'h2': 0, | |
'h3': 0, | |
'h4': 0, | |
'h5': 0, | |
'h6': 0, | |
'head': 49, | |
'hr': 2, | |
'html': 49, | |
'i': 0, | |
'iframe': 4, | |
'img': 2, | |
'input': 2, | |
'ins': 0, | |
'isindex': 18, | |
'kbd': 0, | |
'label': 0, | |
'legend': 0, | |
'li': 1, | |
'link': 18, | |
'map': 0, | |
'menu': 0, | |
'meta': 18, | |
'noframes': 20, | |
'noscript': 20, | |
'object': 16, | |
'ol': 0, | |
'optgroup': 0, | |
'option': 1, | |
'p': 1, | |
'param': 18, | |
'pre': 0, | |
'q': 0, | |
's': 0, | |
'samp': 0, | |
'script': 84, | |
'select': 0, | |
'small': 0, | |
'span': 0, | |
'strike': 0, | |
'strong': 0, | |
'style': 148, | |
'sub': 0, | |
'sup': 0, | |
'table': 0, | |
'tbody': 1, | |
'td': 1, | |
'textarea': 8, | |
'tfoot': 1, | |
'th': 1, | |
'thead': 1, | |
'title': 24, | |
'tr': 1, | |
'tt': 0, | |
'u': 0, | |
'ul': 0, | |
'var': 0 | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment