Created
August 17, 2013 00:34
-
-
Save arlolra/6254644 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 465d767c1b0a557bca474e4d97a32f6a24b1acb1 Mon Sep 17 00:00:00 2001 | |
From: Arlo Breault <abreault@wikimedia.org> | |
Date: Fri, 16 Aug 2013 11:32:58 -0700 | |
Subject: [PATCH] diff | |
Change-Id: I04e2a789d04ef2fcf13a694bd398d157e7d2c8b2 | |
--- | |
js/lib/html5/parser.js | 39 ++++++++++++------------ | |
js/lib/html5/parser/in_head_phase.js | 58 ++++++++++-------------------------- | |
js/lib/html5/parser/in_row_phase.js | 4 +-- | |
js/lib/html5/tokenizer.js | 4 +-- | |
js/lib/html5/treebuilder.js | 29 ++++-------------- | |
5 files changed, 44 insertions(+), 90 deletions(-) | |
diff --git a/js/lib/html5/parser.js b/js/lib/html5/parser.js | |
index c71d39f..1a791e8 100644 | |
--- a/js/lib/html5/parser.js | |
+++ b/js/lib/html5/parser.js | |
@@ -3,7 +3,7 @@ var HTML5 = exports.HTML5 = require('../html5'); | |
var events = require('events'); | |
require('./treebuilder'); | |
-require('../mediawiki.HTML5TreeBuilder.node'); | |
+require('./tokenizer'); | |
var Phase = require('./parser/phase').Phase; | |
@@ -28,10 +28,11 @@ var Parser = HTML5.Parser = function HTML5Parser(options) { | |
} | |
if(!this.document) { | |
- var domino = require('../domino') | |
- // pass in an explicit string of html, or else we get the | |
- // default document, which has a doctype node already present | |
- this.document = domino.createDocument('<html></html>'); | |
+ var l3, jsdom | |
+ jsdom = require('jsdom') | |
+ l3 = jsdom.dom.level3.core | |
+ var DOM = jsdom.browserAugmentation(l3) | |
+ this.document = new DOM.Document('html'); | |
} | |
this.tree = new HTML5.TreeBuilder(this.document); | |
@@ -39,25 +40,18 @@ var Parser = HTML5.Parser = function HTML5Parser(options) { | |
Parser.prototype = new events.EventEmitter; | |
-// normally accepts the source | |
-Parser.prototype.parse = function(tokenizer) { | |
- //if(!source) throw(new Error("No source to parse")); | |
- //HTML5.debug('parser.parse', source) | |
- //this.tokenizer = new HTML5.Tokenizer(source, this.document); | |
- | |
- this.tokenizer = tokenizer; | |
- | |
- this.tokenizer.addListener('token', this.do_token.bind(this)); | |
- this.tokenizer.addListener('end', this.emit.bind(this, 'end')); | |
- | |
+Parser.prototype.parse = function(source) { | |
+ if(!source) throw(new Error("No source to parse")); | |
+ HTML5.debug('parser.parse', source) | |
+ this.tokenizer = new HTML5.Tokenizer(source, this.document); | |
this.setup(); | |
- //this.tokenizer.tokenize(); | |
+ this.tokenizer.tokenize(); | |
} | |
Parser.prototype.parse_fragment = function(source, element) { | |
HTML5.debug('parser.parse_fragment', source, element) | |
// FIXME: Check to make sure element is inside document | |
- //this.tokenizer = new HTML5.Tokenizer(source, this.document); | |
+ this.tokenizer = new HTML5.Tokenizer(source, this.document); | |
if(element && element.ownerDocument) { | |
this.setup(element.tagName, null); | |
this.tree.open_elements.push(element); | |
@@ -73,7 +67,7 @@ Parser.prototype.parse_fragment = function(source, element) { | |
this.tree.open_elements.push(this.tree.body_pointer); | |
this.tree.root_pointer = this.tree.body_pointer; | |
} | |
- //this.tokenizer.tokenize(); | |
+ this.tokenizer.tokenize(); | |
} | |
Object.defineProperty(Parser.prototype, 'fragment', { | |
@@ -89,7 +83,6 @@ Parser.prototype.newPhase = function(name) { | |
} | |
Parser.prototype.do_token = function(token) { | |
- //console.warn('d_token: ' + JSON.stringify(token)); | |
var method = 'process' + token.type; | |
switch(token.type) { | |
@@ -123,6 +116,12 @@ Parser.prototype.do_token = function(token) { | |
} | |
Parser.prototype.setup = function(container, encoding) { | |
+ this.tokenizer.addListener('token', function(t) { | |
+ return function(token) { t.do_token(token); }; | |
+ }(this)); | |
+ this.tokenizer.addListener('end', function(t) { | |
+ return function() { t.emit('end'); }; | |
+ }(this)); | |
this.emit('setup', this); | |
var inner_html = !!container; | |
diff --git a/js/lib/html5/parser/in_head_phase.js b/js/lib/html5/parser/in_head_phase.js | |
index 497a471..4a85cf4 100644 | |
--- a/js/lib/html5/parser/in_head_phase.js | |
+++ b/js/lib/html5/parser/in_head_phase.js | |
@@ -72,20 +72,12 @@ p.prototype.startTagTitle = function(name, attributes) { | |
} | |
p.prototype.startTagStyle = function(name, attributes) { | |
- if (this.tree.insert_from_table) { | |
- // SSS FIXME: html5 library assumes that the style tag | |
- // only shows up in head and doesn't check if it needs | |
- // to be fostered. So, we are patching the html5 lib. | |
- this.tree.insert_element_from_table(name, attributes); | |
- this.tree.pop_element(); | |
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
+ var element = this.tree.createElement(name, attributes); | |
+ this.appendToHead(element); | |
+ this.tree.open_elements.push(element); | |
} else { | |
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
- var element = this.tree.createElement(name, attributes); | |
- this.appendToHead(element); | |
- this.tree.open_elements.push(element); | |
- } else { | |
- this.tree.insert_element(name, attributes); | |
- } | |
+ this.tree.insert_element(name, attributes); | |
} | |
this.parser.tokenizer.content_model = HTML5.Models.CDATA; | |
} | |
@@ -103,44 +95,24 @@ p.prototype.startTagNoScript = function(name, attributes) { | |
} | |
p.prototype.startTagScript = function(name, attributes) { | |
- if (this.tree.insert_from_table) { | |
- // SSS FIXME: html5 library assumes that the style tag | |
- // only shows up in head and doesn't check if it needs | |
- // to be fostered. So, we are patching the html5 lib. | |
- this.tree.insert_element_from_table(name, attributes); | |
- this.tree.pop_element(); | |
+ // XXX Inner HTML case may be wrong | |
+ var element = this.tree.createElement(name, attributes); | |
+ //element.flags.push('parser-inserted'); | |
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
+ this.appendToHead(element); | |
} else { | |
- // XXX Inner HTML case may be wrong | |
- var element = this.tree.createElement(name, attributes); | |
- //element.flags.push('parser-inserted'); | |
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
- this.appendToHead(element); | |
- } else { | |
- this.tree.open_elements.last().appendChild(element); | |
- } | |
+ this.tree.open_elements.last().appendChild(element); | |
} | |
this.tree.open_elements.push(element); | |
this.parser.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA; | |
} | |
p.prototype.startTagBaseLinkMeta = function(name, attributes) { | |
- // SSS FIXME: html5 library assumes that base,link,meta | |
- // tags only show up in head and doesn't check if they need | |
- // to be fostered. So, we are patching the html5 lib. | |
- // | |
- // Right now, Parsoid exploits this bug for meta tags. | |
- // So, till Parsoid is fixed to not rely on this bug, | |
- // we'll continue to not to foster meta tags out of tables. | |
- if (name !== 'meta' && this.tree.insert_from_table) { | |
- this.tree.insert_element_from_table(name, attributes); | |
- this.tree.pop_element(); | |
+ var element = this.tree.createElement(name, attributes); | |
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
+ this.appendToHead(element); | |
} else { | |
- var element = this.tree.createElement(name, attributes); | |
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { | |
- this.appendToHead(element); | |
- } else { | |
- this.tree.open_elements.last().appendChild(element); | |
- } | |
+ this.tree.open_elements.last().appendChild(element); | |
} | |
} | |
diff --git a/js/lib/html5/parser/in_row_phase.js b/js/lib/html5/parser/in_row_phase.js | |
index 4d1d936..6be7e9d 100644 | |
--- a/js/lib/html5/parser/in_row_phase.js | |
+++ b/js/lib/html5/parser/in_row_phase.js | |
@@ -65,7 +65,7 @@ p.startTagOther = function(name, attributes) { | |
p.endTagTr = function(name) { | |
if(this.ignoreEndTagTr()) { | |
- //assert.ok(this.parser.inner_html); | |
+ assert.ok(this.parser.inner_html); | |
this.parse_error | |
} else { | |
this.clearStackToTableRowContext(); | |
@@ -79,7 +79,7 @@ p.endTagTable = function(name) { | |
this.endTagTr('tr'); | |
// Reprocess the current tag if the tr end tag was not ignored | |
// XXX how are we sure it's always ignored in the inner_html case? | |
- if(!ignoreEndTag) this.parser.phase.processEndTag(name) | |
+ if(!ignoreEndTag) this.parser.phase.processEndTag(name) | |
} | |
p.endTagTableRowGroup = function(name) { | |
diff --git a/js/lib/html5/tokenizer.js b/js/lib/html5/tokenizer.js | |
index bc507d2..6c9ed05 100644 | |
--- a/js/lib/html5/tokenizer.js | |
+++ b/js/lib/html5/tokenizer.js | |
@@ -861,7 +861,7 @@ var t = HTML5.Tokenizer = function HTML5Tokenizer(input, document) { | |
token.name = token.name.toLowerCase(); | |
if(token.data.length != 0) { | |
var data = {}; | |
- // reverse so that the first value for each key wins | |
+ // the first value for each key wins | |
token.data.reverse(); | |
token.data.forEach(function(e) { | |
data[e.nodeName.toLowerCase()] = e.nodeValue; | |
@@ -870,7 +870,7 @@ var t = HTML5.Tokenizer = function HTML5Tokenizer(input, document) { | |
for(var k in data) { | |
token.data.push({nodeName: k, nodeValue: data[k]}); | |
} | |
- // now restore the original attribute order | |
+ // restore original attribute order | |
token.data.reverse(); | |
} | |
} else if(token.type == 'EndTag') { | |
diff --git a/js/lib/html5/treebuilder.js b/js/lib/html5/treebuilder.js | |
index 558de78..bd555a7 100644 | |
--- a/js/lib/html5/treebuilder.js | |
+++ b/js/lib/html5/treebuilder.js | |
@@ -13,35 +13,18 @@ b.prototype.reset = function() { | |
} | |
b.prototype.copyAttributeToElement = function(element, attribute) { | |
- // attributes don't inherit from Node any longer in DOM level 4 | |
if(attribute.nodeType && attribute.nodeType == attribute.ATTRIBUTE_NODE) { | |
- // DOM 3 | |
element.setAttributeNode(attribute.cloneNode()); | |
+ } else { | |
+ try { | |
+ element.setAttribute(attribute.nodeName, attribute.nodeValue) | |
+ } catch(e) { | |
+ console.log("Can't set attribute '" + attribute.nodeName + "' to value '" + attribute.nodeValue + "': (" + e + ')'); | |
+ } | |
if(attribute.namespace) { | |
var at = element.getAttributeNode(attribute.nodeName); | |
at.namespace = attribute.namespace; | |
} | |
- } else { | |
- var name, value, namespace; | |
- try { | |
- if ('namespaceURI' in attribute) { // DOM 4 | |
- name = attribute.name; | |
- value = attribute.value; | |
- namespace = attribute.namespaceURI; | |
- } else { // token | |
- name = attribute.nodeName; | |
- value = attribute.nodeValue; | |
- namespace = attribute.namespace; | |
- } | |
- if (namespace) { | |
- element.setAttributeNS(namespace, name, value); | |
- } else { | |
- element.setAttribute(name, value); | |
- } | |
- } catch(e) { | |
- HTML5.debug('treebuilder.copyAttributes', | |
- "Can't set attribute", name, value, e); | |
- } | |
} | |
} | |
-- | |
1.8.3.4 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment