Skip to content

Instantly share code, notes, and snippets.

@tetsuharuohzeki
Last active August 29, 2015 14:05
Show Gist options
  • Save tetsuharuohzeki/c7dc40746d693282fe2d to your computer and use it in GitHub Desktop.
Save tetsuharuohzeki/c7dc40746d693282fe2d to your computer and use it in GitHub Desktop.
DOMParser/DOMImplementation.createHTMLDocumentで安全なDOMの構築. とりあえずPrestoでは使えないし、IE8も確かダメ。
"use strict";
// MIT License
// author: Tetsuharu OHZEKI
/*
* Parse from HTML text to DOM subtree which does not have scripting items.
*
* @param {string} aText
* @return {DocumentFragment}
*
* XXX: This does not work on Opera (presto).
* XXX: Don't serialize to string from the DOM subtree parsed with this.
* If you serialize, it might recover a dangerous html text accidentally.
* e.g. `"<bar>&lt;foo&gt;"` -> this -> re-serialize -> `"<foo>"`.
*
* Design:
* 1. Use `DOM::DOMParser` to parse the given HTML text.
* `DOMParser.parseFromString()` make scripting disable when `text/html` is passed.
* cf. http://domparsing.spec.whatwg.org/#dom-domparser-parsefromstring
*
* But Opera (presto) doesn't support its behavior. It fires scripting :(
* This is not unsafe for our purpose, so we don't execute any step on presto.
*
* 2. Remove all scripting items from the parsed document.
*
* 3. Move nodes which are safe we checked in the previous step
* to `DOM::DocumentFragment`, and return it.
*/
var parseHTMLToSafeDOM = function (aText) {
if (!!window.opera) {
// XXX: Opera (presto) has the failure that fires scripting
// in a document created from DOMParser/DOMImplementation.createHTMLDocument().
// This presto's behavior cannot ensure the safety of this strategy that
// parsing HTML text to DOM subtree.
throw new Error("Safety Reject: Opera (presto) will do accidental firing of scripting in DOMParser.");
}
var parser = new DOMParser();
var doc = parser.parseFromString(aText, "text/html");
doc = removeScriptElement(doc);
doc = removeEventHandlerAttr(doc);
var result = importSubTreeToFragment(document, doc);
return result;
};
/*
* Remove all script elements from the document.
*
* @param {Document} aDoc
* @return {Document}
*/
var removeScriptElement = function (aDoc) {
var list = aDoc.getElementsByTagName("script");
var elements = Array.prototype.slice.call(list);
for (var i = 0, l = elements.length; i < l; ++i) {
var element = elements[i];
element.parentNode.removeChild(element);
}
return aDoc;
};
/*
* Remove all attributes which might be an inline event handler.
*
* @param {Document} aDoc
* @return {Document}
*/
var removeEventHandlerAttr = function (aDoc) {
var list = aDoc.querySelectorAll("*");
var elements = Array.prototype.slice.call(list);
for (var i = 0, l = elements.length; i < l; ++i) {
var elem = elements[i];
var attributes = Array.prototype.slice.call(elem.attributes);
for (var j = 0, k = attributes.length; j < k; ++j) {
var attr = attributes[j];
// This check has false positive.
// This distinguishes all attribute names which starts from "on" (e.g. "only")
if (/^on/.test(attr.localName)) {
elem.removeAttribute(attr.name);
}
}
}
return aDoc;
};
/*
* @param {Document} aCurrent
* @param {Document} aOther
* @return {DocumentFragment}
*/
var importSubTreeToFragment = function (aCurrent, aOther) {
var fragment = aCurrent.createDocumentFragment();
var children = Array.prototype.slice.call(aOther.body.childNodes);
for (var i = 0, l = children.length; i < l; ++i) {
var adopted = aCurrent.adoptNode(children[i]);
fragment.appendChild(adopted);
}
return fragment;
};
exports.parseHTMLToSafeDOM = parseHTMLToSafeDOM;
"use strict";
var assert = require("power-assert");
//var SafeDOM = require("safe-parse");
// https://dvcs.w3.org/hg/dom3events/raw-file/tip/html/DOM3-Events.html#event-types-list
// XXX: This is minimum testcase.
// If we aims to do perfectly, we should add all event types in relevant specs.
var EVENT_ATTRS = ["abort",
"beforeinput",
"blur",
"click",
"compositionstart",
"compositionupdate",
"compositionend",
"dblclick",
"error",
"focus",
"focusin",
"focusout",
"input",
"keydown",
"keyup",
"load",
"mousedown",
"mouseenter",
"mouseleave",
"mousemove",
"mouseout",
"mouseover",
"mouseup",
"resize",
"scroll",
"select",
"unload",
"wheel"].map(function(el){return "[on" + el + "]";}).join(",");
describe("Safty parse from HTML text to DOM", function () {
describe("parseHTMLToSafeDOM", function () {
var TEST = "<script>window.___test()</script><img src='' onerror='window.___test()'/>" +
"&lt;script&gt;window.___test()&lt;/script&gt;";
var result = null;
before(function(){
window.___testCalled = false;
window.___test = function () {
window.___testCalled = true;
};
result = SafeDOM.parseHTMLToSafeDOM(TEST);
});
after(function(){
result = null;
delete window.___testCalled;
delete window.___test;
});
it("instance of DocumentFragment", function () {
assert(result instanceof DocumentFragment);
});
it("the first element of result should be HTMLImageElement", function () {
var first = result.firstChild;
assert(first instanceof HTMLImageElement);
});
it("remove all `script` elements", function () {
var list = result.querySelectorAll("script");
assert.strictEqual(list.length, 0);
});
it("remove all event handlers", function () {
var list = result.querySelectorAll(EVENT_ATTRS);
assert.strictEqual(list.length, 0);
});
it("Don't fire scripting", function () {
assert.strictEqual(window.___testCalled, false);
});
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment