Skip to content

Instantly share code, notes, and snippets.

@arlolra
Created August 17, 2013 00:34
Show Gist options
  • Save arlolra/6254644 to your computer and use it in GitHub Desktop.
Save arlolra/6254644 to your computer and use it in GitHub Desktop.
From 465d767c1b0a557bca474e4d97a32f6a24b1acb1 Mon Sep 17 00:00:00 2001
From: Arlo Breault <abreault@wikimedia.org>
Date: Fri, 16 Aug 2013 11:32:58 -0700
Subject: [PATCH] diff
Change-Id: I04e2a789d04ef2fcf13a694bd398d157e7d2c8b2
---
js/lib/html5/parser.js | 39 ++++++++++++------------
js/lib/html5/parser/in_head_phase.js | 58 ++++++++++--------------------------
js/lib/html5/parser/in_row_phase.js | 4 +--
js/lib/html5/tokenizer.js | 4 +--
js/lib/html5/treebuilder.js | 29 ++++--------------
5 files changed, 44 insertions(+), 90 deletions(-)
diff --git a/js/lib/html5/parser.js b/js/lib/html5/parser.js
index c71d39f..1a791e8 100644
--- a/js/lib/html5/parser.js
+++ b/js/lib/html5/parser.js
@@ -3,7 +3,7 @@ var HTML5 = exports.HTML5 = require('../html5');
var events = require('events');
require('./treebuilder');
-require('../mediawiki.HTML5TreeBuilder.node');
+require('./tokenizer');
var Phase = require('./parser/phase').Phase;
@@ -28,10 +28,11 @@ var Parser = HTML5.Parser = function HTML5Parser(options) {
}
if(!this.document) {
- var domino = require('../domino')
- // pass in an explicit string of html, or else we get the
- // default document, which has a doctype node already present
- this.document = domino.createDocument('<html></html>');
+ var l3, jsdom
+ jsdom = require('jsdom')
+ l3 = jsdom.dom.level3.core
+ var DOM = jsdom.browserAugmentation(l3)
+ this.document = new DOM.Document('html');
}
this.tree = new HTML5.TreeBuilder(this.document);
@@ -39,25 +40,18 @@ var Parser = HTML5.Parser = function HTML5Parser(options) {
Parser.prototype = new events.EventEmitter;
-// normally accepts the source
-Parser.prototype.parse = function(tokenizer) {
- //if(!source) throw(new Error("No source to parse"));
- //HTML5.debug('parser.parse', source)
- //this.tokenizer = new HTML5.Tokenizer(source, this.document);
-
- this.tokenizer = tokenizer;
-
- this.tokenizer.addListener('token', this.do_token.bind(this));
- this.tokenizer.addListener('end', this.emit.bind(this, 'end'));
-
+Parser.prototype.parse = function(source) {
+ if(!source) throw(new Error("No source to parse"));
+ HTML5.debug('parser.parse', source)
+ this.tokenizer = new HTML5.Tokenizer(source, this.document);
this.setup();
- //this.tokenizer.tokenize();
+ this.tokenizer.tokenize();
}
Parser.prototype.parse_fragment = function(source, element) {
HTML5.debug('parser.parse_fragment', source, element)
// FIXME: Check to make sure element is inside document
- //this.tokenizer = new HTML5.Tokenizer(source, this.document);
+ this.tokenizer = new HTML5.Tokenizer(source, this.document);
if(element && element.ownerDocument) {
this.setup(element.tagName, null);
this.tree.open_elements.push(element);
@@ -73,7 +67,7 @@ Parser.prototype.parse_fragment = function(source, element) {
this.tree.open_elements.push(this.tree.body_pointer);
this.tree.root_pointer = this.tree.body_pointer;
}
- //this.tokenizer.tokenize();
+ this.tokenizer.tokenize();
}
Object.defineProperty(Parser.prototype, 'fragment', {
@@ -89,7 +83,6 @@ Parser.prototype.newPhase = function(name) {
}
Parser.prototype.do_token = function(token) {
- //console.warn('d_token: ' + JSON.stringify(token));
var method = 'process' + token.type;
switch(token.type) {
@@ -123,6 +116,12 @@ Parser.prototype.do_token = function(token) {
}
Parser.prototype.setup = function(container, encoding) {
+ this.tokenizer.addListener('token', function(t) {
+ return function(token) { t.do_token(token); };
+ }(this));
+ this.tokenizer.addListener('end', function(t) {
+ return function() { t.emit('end'); };
+ }(this));
this.emit('setup', this);
var inner_html = !!container;
diff --git a/js/lib/html5/parser/in_head_phase.js b/js/lib/html5/parser/in_head_phase.js
index 497a471..4a85cf4 100644
--- a/js/lib/html5/parser/in_head_phase.js
+++ b/js/lib/html5/parser/in_head_phase.js
@@ -72,20 +72,12 @@ p.prototype.startTagTitle = function(name, attributes) {
}
p.prototype.startTagStyle = function(name, attributes) {
- if (this.tree.insert_from_table) {
- // SSS FIXME: html5 library assumes that the style tag
- // only shows up in head and doesn't check if it needs
- // to be fostered. So, we are patching the html5 lib.
- this.tree.insert_element_from_table(name, attributes);
- this.tree.pop_element();
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
+ var element = this.tree.createElement(name, attributes);
+ this.appendToHead(element);
+ this.tree.open_elements.push(element);
} else {
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
- var element = this.tree.createElement(name, attributes);
- this.appendToHead(element);
- this.tree.open_elements.push(element);
- } else {
- this.tree.insert_element(name, attributes);
- }
+ this.tree.insert_element(name, attributes);
}
this.parser.tokenizer.content_model = HTML5.Models.CDATA;
}
@@ -103,44 +95,24 @@ p.prototype.startTagNoScript = function(name, attributes) {
}
p.prototype.startTagScript = function(name, attributes) {
- if (this.tree.insert_from_table) {
- // SSS FIXME: html5 library assumes that the style tag
- // only shows up in head and doesn't check if it needs
- // to be fostered. So, we are patching the html5 lib.
- this.tree.insert_element_from_table(name, attributes);
- this.tree.pop_element();
+ // XXX Inner HTML case may be wrong
+ var element = this.tree.createElement(name, attributes);
+ //element.flags.push('parser-inserted');
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
+ this.appendToHead(element);
} else {
- // XXX Inner HTML case may be wrong
- var element = this.tree.createElement(name, attributes);
- //element.flags.push('parser-inserted');
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
- this.appendToHead(element);
- } else {
- this.tree.open_elements.last().appendChild(element);
- }
+ this.tree.open_elements.last().appendChild(element);
}
this.tree.open_elements.push(element);
this.parser.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA;
}
p.prototype.startTagBaseLinkMeta = function(name, attributes) {
- // SSS FIXME: html5 library assumes that base,link,meta
- // tags only show up in head and doesn't check if they need
- // to be fostered. So, we are patching the html5 lib.
- //
- // Right now, Parsoid exploits this bug for meta tags.
- // So, till Parsoid is fixed to not rely on this bug,
- // we'll continue to not to foster meta tags out of tables.
- if (name !== 'meta' && this.tree.insert_from_table) {
- this.tree.insert_element_from_table(name, attributes);
- this.tree.pop_element();
+ var element = this.tree.createElement(name, attributes);
+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
+ this.appendToHead(element);
} else {
- var element = this.tree.createElement(name, attributes);
- if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
- this.appendToHead(element);
- } else {
- this.tree.open_elements.last().appendChild(element);
- }
+ this.tree.open_elements.last().appendChild(element);
}
}
diff --git a/js/lib/html5/parser/in_row_phase.js b/js/lib/html5/parser/in_row_phase.js
index 4d1d936..6be7e9d 100644
--- a/js/lib/html5/parser/in_row_phase.js
+++ b/js/lib/html5/parser/in_row_phase.js
@@ -65,7 +65,7 @@ p.startTagOther = function(name, attributes) {
p.endTagTr = function(name) {
if(this.ignoreEndTagTr()) {
- //assert.ok(this.parser.inner_html);
+ assert.ok(this.parser.inner_html);
this.parse_error
} else {
this.clearStackToTableRowContext();
@@ -79,7 +79,7 @@ p.endTagTable = function(name) {
this.endTagTr('tr');
// Reprocess the current tag if the tr end tag was not ignored
// XXX how are we sure it's always ignored in the inner_html case?
- if(!ignoreEndTag) this.parser.phase.processEndTag(name)
+ if(!ignoreEndTag) this.parser.phase.processEndTag(name)
}
p.endTagTableRowGroup = function(name) {
diff --git a/js/lib/html5/tokenizer.js b/js/lib/html5/tokenizer.js
index bc507d2..6c9ed05 100644
--- a/js/lib/html5/tokenizer.js
+++ b/js/lib/html5/tokenizer.js
@@ -861,7 +861,7 @@ var t = HTML5.Tokenizer = function HTML5Tokenizer(input, document) {
token.name = token.name.toLowerCase();
if(token.data.length != 0) {
var data = {};
- // reverse so that the first value for each key wins
+ // the first value for each key wins
token.data.reverse();
token.data.forEach(function(e) {
data[e.nodeName.toLowerCase()] = e.nodeValue;
@@ -870,7 +870,7 @@ var t = HTML5.Tokenizer = function HTML5Tokenizer(input, document) {
for(var k in data) {
token.data.push({nodeName: k, nodeValue: data[k]});
}
- // now restore the original attribute order
+ // restore original attribute order
token.data.reverse();
}
} else if(token.type == 'EndTag') {
diff --git a/js/lib/html5/treebuilder.js b/js/lib/html5/treebuilder.js
index 558de78..bd555a7 100644
--- a/js/lib/html5/treebuilder.js
+++ b/js/lib/html5/treebuilder.js
@@ -13,35 +13,18 @@ b.prototype.reset = function() {
}
b.prototype.copyAttributeToElement = function(element, attribute) {
- // attributes don't inherit from Node any longer in DOM level 4
if(attribute.nodeType && attribute.nodeType == attribute.ATTRIBUTE_NODE) {
- // DOM 3
element.setAttributeNode(attribute.cloneNode());
+ } else {
+ try {
+ element.setAttribute(attribute.nodeName, attribute.nodeValue)
+ } catch(e) {
+ console.log("Can't set attribute '" + attribute.nodeName + "' to value '" + attribute.nodeValue + "': (" + e + ')');
+ }
if(attribute.namespace) {
var at = element.getAttributeNode(attribute.nodeName);
at.namespace = attribute.namespace;
}
- } else {
- var name, value, namespace;
- try {
- if ('namespaceURI' in attribute) { // DOM 4
- name = attribute.name;
- value = attribute.value;
- namespace = attribute.namespaceURI;
- } else { // token
- name = attribute.nodeName;
- value = attribute.nodeValue;
- namespace = attribute.namespace;
- }
- if (namespace) {
- element.setAttributeNS(namespace, name, value);
- } else {
- element.setAttribute(name, value);
- }
- } catch(e) {
- HTML5.debug('treebuilder.copyAttributes',
- "Can't set attribute", name, value, e);
- }
}
}
--
1.8.3.4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment