Last active
August 29, 2015 14:05
-
-
Save tetsuharuohzeki/c7dc40746d693282fe2d to your computer and use it in GitHub Desktop.
DOMParser/DOMImplementation.createHTMLDocumentで安全なDOMの構築. とりあえずPrestoでは使えないし、IE8も確かダメ。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
// MIT License | |
// author: Tetsuharu OHZEKI | |
/* | |
* Parse from HTML text to DOM subtree which does not have scripting items. | |
* | |
* @param {string} aText | |
* @return {DocumentFragment} | |
* | |
* XXX: This does not work on Opera (presto). | |
* XXX: Don't serialize to string from the DOM subtree parsed with this. | |
* If you serialize, it might recover a dangerous html text accidentally. | |
* e.g. `"<bar><foo>"` -> this -> re-serialize -> `"<foo>"`. | |
* | |
* Design: | |
* 1. Use `DOM::DOMParser` to parse the given HTML text. | |
* `DOMParser.parseFromString()` make scripting disable when `text/html` is passed. | |
* cf. http://domparsing.spec.whatwg.org/#dom-domparser-parsefromstring | |
* | |
* But Opera (presto) doesn't support its behavior. It fires scripting :( | |
* This is not unsafe for our purpose, so we don't execute any step on presto. | |
* | |
* 2. Remove all scripting items from the parsed document. | |
* | |
* 3. Move nodes which are safe we checked in the previous step | |
* to `DOM::DocumentFragment`, and return it. | |
*/ | |
var parseHTMLToSafeDOM = function (aText) { | |
if (!!window.opera) { | |
// XXX: Opera (presto) has the failure that fires scripting | |
// in a document created from DOMParser/DOMImplementation.createHTMLDocument(). | |
// This presto's behavior cannot ensure the safety of this strategy that | |
// parsing HTML text to DOM subtree. | |
throw new Error("Safety Reject: Opera (presto) will do accidental firing of scripting in DOMParser."); | |
} | |
var parser = new DOMParser(); | |
var doc = parser.parseFromString(aText, "text/html"); | |
doc = removeScriptElement(doc); | |
doc = removeEventHandlerAttr(doc); | |
var result = importSubTreeToFragment(document, doc); | |
return result; | |
}; | |
/* | |
* Remove all script elements from the document. | |
* | |
* @param {Document} aDoc | |
* @return {Document} | |
*/ | |
var removeScriptElement = function (aDoc) { | |
var list = aDoc.getElementsByTagName("script"); | |
var elements = Array.prototype.slice.call(list); | |
for (var i = 0, l = elements.length; i < l; ++i) { | |
var element = elements[i]; | |
element.parentNode.removeChild(element); | |
} | |
return aDoc; | |
}; | |
/* | |
* Remove all attributes which might be an inline event handler. | |
* | |
* @param {Document} aDoc | |
* @return {Document} | |
*/ | |
var removeEventHandlerAttr = function (aDoc) { | |
var list = aDoc.querySelectorAll("*"); | |
var elements = Array.prototype.slice.call(list); | |
for (var i = 0, l = elements.length; i < l; ++i) { | |
var elem = elements[i]; | |
var attributes = Array.prototype.slice.call(elem.attributes); | |
for (var j = 0, k = attributes.length; j < k; ++j) { | |
var attr = attributes[j]; | |
// This check has false positive. | |
// This distinguishes all attribute names which starts from "on" (e.g. "only") | |
if (/^on/.test(attr.localName)) { | |
elem.removeAttribute(attr.name); | |
} | |
} | |
} | |
return aDoc; | |
}; | |
/* | |
* @param {Document} aCurrent | |
* @param {Document} aOther | |
* @return {DocumentFragment} | |
*/ | |
var importSubTreeToFragment = function (aCurrent, aOther) { | |
var fragment = aCurrent.createDocumentFragment(); | |
var children = Array.prototype.slice.call(aOther.body.childNodes); | |
for (var i = 0, l = children.length; i < l; ++i) { | |
var adopted = aCurrent.adoptNode(children[i]); | |
fragment.appendChild(adopted); | |
} | |
return fragment; | |
}; | |
exports.parseHTMLToSafeDOM = parseHTMLToSafeDOM; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
var assert = require("power-assert"); | |
//var SafeDOM = require("safe-parse"); | |
// https://dvcs.w3.org/hg/dom3events/raw-file/tip/html/DOM3-Events.html#event-types-list | |
// XXX: This is minimum testcase. | |
// If we aims to do perfectly, we should add all event types in relevant specs. | |
var EVENT_ATTRS = ["abort", | |
"beforeinput", | |
"blur", | |
"click", | |
"compositionstart", | |
"compositionupdate", | |
"compositionend", | |
"dblclick", | |
"error", | |
"focus", | |
"focusin", | |
"focusout", | |
"input", | |
"keydown", | |
"keyup", | |
"load", | |
"mousedown", | |
"mouseenter", | |
"mouseleave", | |
"mousemove", | |
"mouseout", | |
"mouseover", | |
"mouseup", | |
"resize", | |
"scroll", | |
"select", | |
"unload", | |
"wheel"].map(function(el){return "[on" + el + "]";}).join(","); | |
describe("Safty parse from HTML text to DOM", function () { | |
describe("parseHTMLToSafeDOM", function () { | |
var TEST = "<script>window.___test()</script><img src='' onerror='window.___test()'/>" + | |
"<script>window.___test()</script>"; | |
var result = null; | |
before(function(){ | |
window.___testCalled = false; | |
window.___test = function () { | |
window.___testCalled = true; | |
}; | |
result = SafeDOM.parseHTMLToSafeDOM(TEST); | |
}); | |
after(function(){ | |
result = null; | |
delete window.___testCalled; | |
delete window.___test; | |
}); | |
it("instance of DocumentFragment", function () { | |
assert(result instanceof DocumentFragment); | |
}); | |
it("the first element of result should be HTMLImageElement", function () { | |
var first = result.firstChild; | |
assert(first instanceof HTMLImageElement); | |
}); | |
it("remove all `script` elements", function () { | |
var list = result.querySelectorAll("script"); | |
assert.strictEqual(list.length, 0); | |
}); | |
it("remove all event handlers", function () { | |
var list = result.querySelectorAll(EVENT_ATTRS); | |
assert.strictEqual(list.length, 0); | |
}); | |
it("Don't fire scripting", function () { | |
assert.strictEqual(window.___testCalled, false); | |
}); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment