HTML语法分析器模型
function StartTagToken(){ | |
} | |
function EndTagToken(){ | |
} | |
function Attribute(){ | |
} | |
function HTMLLexicalParser(syntaxer){ | |
function emitToken(token){ | |
syntaxer.receiveInput(token); | |
} | |
function error(){ | |
console.log("error"); | |
} | |
var token; | |
var attribute; | |
//function consumeReference(); | |
//状态函数们…… | |
var dataState = function dataState(c){ | |
if(c=="<") { | |
return tagOpenState; | |
} | |
else { | |
emitToken(c); | |
return dataState; | |
} | |
}; | |
var tagOpenState = function tagOpenState(c){ | |
if(c=="/") { | |
return endTagOpenState; | |
} | |
if(c.match(/[A-Z]/)) { | |
token = new StartTagToken(); | |
token.name = c.toLowerCase(); | |
return tagNameState; | |
} | |
if(c.match(/[a-z]/)) { | |
token = new StartTagToken(); | |
token.name = c; | |
return tagNameState; | |
} | |
if(c=="?") { | |
return bogusCommentState; | |
} | |
else { | |
error(); | |
return dataState; | |
} | |
}; | |
var endTagOpenState = function endTagOpenState(c){ | |
if(c.match(/[A-Z]/)) { | |
token = new EndTagToken(); | |
token.name = c.toLowerCase(); | |
return tagNameState; | |
} | |
if(c.match(/[a-z]/)) { | |
token = new EndTagToken(); | |
token.name = c; | |
return tagNameState; | |
} | |
if(c==">") { | |
error(); | |
return dataState; | |
} | |
else { | |
error(); | |
return bogusCommentState; | |
} | |
}; | |
var tagNameState = function tagNameState(c) { | |
if(c.match(/[\t \f\n]/)) { | |
return beforeAttributeNameState; | |
} | |
if(c=="/") { | |
return selfClosingStartTagState; | |
} | |
if(c == ">") { | |
emitToken(token); | |
return dataState; | |
} | |
if(c.match(/[a-z]/)) | |
{ | |
token.name += c.toLowerCase(); | |
return tagNameState; | |
} | |
} | |
var beforeAttributeNameState = function beforeAttributeNameState(c) { | |
if(c.match(/[\t \f\n]/)) { | |
return beforeAttributeNameState; | |
} | |
if(c=="/") { | |
return selfClosingStartTagState; | |
} | |
if(c == ">") { | |
emitToken(token); | |
return dataState; | |
} | |
if(c.match(/[a-z]/)) | |
{ | |
attribute = new Attribute(); | |
attribute.name = c.toLowerCase(); | |
attribute.value = ""; | |
return attributeNameState; | |
} | |
if(c=="\"" || c=="'" || c=="<" || c=="\"") { | |
error(); | |
} | |
else { | |
attribute = new Attribute(); | |
attribute.name = c; | |
attribute.value = ""; | |
return attributeNameState; | |
} | |
} | |
var attributeNameState = function attributeNameState(c) { | |
if(c=="/") { | |
token[attribute.name] = attribute.value; | |
return selfClosingStartTagState; | |
} | |
if(c=="/") { | |
token[attribute.name] = attribute.value; | |
return selfClosingStartTagState; | |
} | |
if(c=="=") { | |
return beforeAttributeValueState; | |
} | |
if(c.match(/[\t \f\n]/)) { | |
return afterAttributeNameState; | |
} | |
if(c.match(/[A-Z]/)) | |
{ | |
attribute.name += c.toLowerCase(); | |
return attributeNameState; | |
} | |
else { | |
attribute.name += c; | |
return attributeNameState; | |
} | |
} | |
var afterAttributeNameState = function afterAttributeNameState(c) { | |
if(c=="/") { | |
token[attribute.name] = attribute.value; | |
return selfClosingStartTagState; | |
} | |
if(c=="/") { | |
token[attribute.name] = attribute.value; | |
return selfClosingStartTagState; | |
} | |
if(c=="=") { | |
return beforeAttributeValueState; | |
} | |
if(c.match(/[\t \f\n]/)) { | |
return afterAttributeNameState; | |
} | |
if(c.match(/[A-Z]/)) | |
{ | |
attribute = new Attribute(); | |
attribute.name = c.toLowerCase(); | |
attribute.value = ""; | |
return attributeNameState; | |
} | |
else { | |
attribute = new Attribute(); | |
attribute.name = c; | |
attribute.value = ""; | |
return attributeNameState; | |
} | |
} | |
var beforeAttributeValueState = function beforeAttributeValueState(c) { | |
if(c=="\"") { | |
return attributeValueDoubleQuotedState; | |
} | |
if(c=="\'") { | |
return attributeValueSingleQuotedState; | |
} | |
if(c.match(/[\t \f\n]/)) { | |
return beforeAttributeValueState; | |
} | |
else { | |
attribute.value += c; | |
return attributeValueUnquotedState; | |
} | |
} | |
var attributeValueDoubleQuotedState = function attributeValueDoubleQuotedState(c) { | |
if(c=="\"") { | |
token[attribute.name] = attribute.value; | |
return beforeAttributeNameState; | |
} | |
else { | |
attribute.value += c; | |
return attributeValueDoubleQuotedState; | |
} | |
} | |
var attributeValueSingleQuotedState = function attributeValueSingleQuotedState(c) { | |
if(c=="\'") { | |
token[attribute.name] = attribute.value; | |
return beforeAttributeNameState; | |
} | |
else { | |
attribute.value += c; | |
return attributeValueSingleQuoted; | |
} | |
} | |
var attributeValueUnquotedState = function attributeValueUnquotedState(c) { | |
if(c.match(/[\t \f\n]/)) { | |
token[attribute.name] = attribute.value; | |
return beforeAttributeNameState; | |
} | |
else { | |
attribute.value += c; | |
return attributeValueUnquoted; | |
} | |
} | |
var selfClosingStartTagState = function selfClosingStartTagState(c) { | |
if(c == ">") { | |
emitToken(token); | |
endToken = new EndTagToken(); | |
endToken.name = token.name; | |
emitToken(endToken); | |
return dataState; | |
} | |
} | |
var bogusCommentState = function bogusCommentState(c) { | |
if(c == ">") { | |
return dataState; | |
} | |
else { | |
return bogusCommentState; | |
} | |
} | |
var state = dataState; | |
this.receiveInput = function(char) { | |
//visualizer.visualize(state.name); | |
state = state(char); | |
} | |
this.reset = function(){ | |
state = dataState; | |
} | |
} |
function Element(token){ | |
for(var p in token) { | |
this[p] = token[p]; | |
} | |
this.childNodes = []; | |
} | |
function Text(value){ | |
this.value = value || ""; | |
} | |
function HTMLSyntaticalParser(){ | |
var stack = [new Element({name:"document"})]; | |
this.receiveInput = function(token) { | |
if(token.constructor.name == "String") | |
{ | |
if(stack[stack.length-1].constructor.name == "Text") | |
stack[stack.length-1].value += token; | |
else | |
{ | |
stack[stack.length-1].childNodes.push(new Text(token)); | |
stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]); | |
} | |
} | |
else if(stack.length>0 && stack[stack.length-1].constructor.name == "Text") | |
{ | |
stack.pop(); | |
} | |
if(token.constructor.name == "StartTagToken") | |
{ | |
console.log(stack[stack.length-1]); | |
stack[stack.length-1].childNodes.push(new Element(token)); | |
stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]); | |
} | |
else if(token.constructor.name == "EndTagToken") | |
{ | |
stack.pop(); | |
} | |
} | |
this.getOutput = function(){ | |
return stack[0]; | |
} | |
} |
This comment has been minimized.
This comment has been minimized.
连个注释都没有…… |
This comment has been minimized.
This comment has been minimized.
@Gaubee 这有什么好注释的 无非是机械的if else |
This comment has been minimized.
This comment has been minimized.
有幸研究研究。。。 |
This comment has been minimized.
This comment has been minimized.
唔 又是用 JavaScript 写的语法解析吗? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
沙发啊!