Skip to content

Instantly share code, notes, and snippets.

@wintercn
Last active August 5, 2019 11:16
Show Gist options
  • Star 40 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save wintercn/5618683 to your computer and use it in GitHub Desktop.
Save wintercn/5618683 to your computer and use it in GitHub Desktop.
HTML语法分析器模型
function StartTagToken(){
}
function EndTagToken(){
}
function Attribute(){
}
function HTMLLexicalParser(syntaxer){
function emitToken(token){
syntaxer.receiveInput(token);
}
function error(){
console.log("error");
}
var token;
var attribute;
//function consumeReference();
//状态函数们……
var dataState = function dataState(c){
if(c=="<") {
return tagOpenState;
}
else {
emitToken(c);
return dataState;
}
};
var tagOpenState = function tagOpenState(c){
if(c=="/") {
return endTagOpenState;
}
if(c.match(/[A-Z]/)) {
token = new StartTagToken();
token.name = c.toLowerCase();
return tagNameState;
}
if(c.match(/[a-z]/)) {
token = new StartTagToken();
token.name = c;
return tagNameState;
}
if(c=="?") {
return bogusCommentState;
}
else {
error();
return dataState;
}
};
var endTagOpenState = function endTagOpenState(c){
if(c.match(/[A-Z]/)) {
token = new EndTagToken();
token.name = c.toLowerCase();
return tagNameState;
}
if(c.match(/[a-z]/)) {
token = new EndTagToken();
token.name = c;
return tagNameState;
}
if(c==">") {
error();
return dataState;
}
else {
error();
return bogusCommentState;
}
};
var tagNameState = function tagNameState(c) {
if(c.match(/[\t \f\n]/)) {
return beforeAttributeNameState;
}
if(c=="/") {
return selfClosingStartTagState;
}
if(c == ">") {
emitToken(token);
return dataState;
}
if(c.match(/[a-z]/))
{
token.name += c.toLowerCase();
return tagNameState;
}
}
var beforeAttributeNameState = function beforeAttributeNameState(c) {
if(c.match(/[\t \f\n]/)) {
return beforeAttributeNameState;
}
if(c=="/") {
return selfClosingStartTagState;
}
if(c == ">") {
emitToken(token);
return dataState;
}
if(c.match(/[a-z]/))
{
attribute = new Attribute();
attribute.name = c.toLowerCase();
attribute.value = "";
return attributeNameState;
}
if(c=="\"" || c=="'" || c=="<" || c=="\"") {
error();
}
else {
attribute = new Attribute();
attribute.name = c;
attribute.value = "";
return attributeNameState;
}
}
var attributeNameState = function attributeNameState(c) {
if(c=="/") {
token[attribute.name] = attribute.value;
return selfClosingStartTagState;
}
if(c=="/") {
token[attribute.name] = attribute.value;
return selfClosingStartTagState;
}
if(c=="=") {
return beforeAttributeValueState;
}
if(c.match(/[\t \f\n]/)) {
return afterAttributeNameState;
}
if(c.match(/[A-Z]/))
{
attribute.name += c.toLowerCase();
return attributeNameState;
}
else {
attribute.name += c;
return attributeNameState;
}
}
var afterAttributeNameState = function afterAttributeNameState(c) {
if(c=="/") {
token[attribute.name] = attribute.value;
return selfClosingStartTagState;
}
if(c=="/") {
token[attribute.name] = attribute.value;
return selfClosingStartTagState;
}
if(c=="=") {
return beforeAttributeValueState;
}
if(c.match(/[\t \f\n]/)) {
return afterAttributeNameState;
}
if(c.match(/[A-Z]/))
{
attribute = new Attribute();
attribute.name = c.toLowerCase();
attribute.value = "";
return attributeNameState;
}
else {
attribute = new Attribute();
attribute.name = c;
attribute.value = "";
return attributeNameState;
}
}
var beforeAttributeValueState = function beforeAttributeValueState(c) {
if(c=="\"") {
return attributeValueDoubleQuotedState;
}
if(c=="\'") {
return attributeValueSingleQuotedState;
}
if(c.match(/[\t \f\n]/)) {
return beforeAttributeValueState;
}
else {
attribute.value += c;
return attributeValueUnquotedState;
}
}
var attributeValueDoubleQuotedState = function attributeValueDoubleQuotedState(c) {
if(c=="\"") {
token[attribute.name] = attribute.value;
return beforeAttributeNameState;
}
else {
attribute.value += c;
return attributeValueDoubleQuotedState;
}
}
var attributeValueSingleQuotedState = function attributeValueSingleQuotedState(c) {
if(c=="\'") {
token[attribute.name] = attribute.value;
return beforeAttributeNameState;
}
else {
attribute.value += c;
return attributeValueSingleQuoted;
}
}
var attributeValueUnquotedState = function attributeValueUnquotedState(c) {
if(c.match(/[\t \f\n]/)) {
token[attribute.name] = attribute.value;
return beforeAttributeNameState;
}
else {
attribute.value += c;
return attributeValueUnquoted;
}
}
var selfClosingStartTagState = function selfClosingStartTagState(c) {
if(c == ">") {
emitToken(token);
endToken = new EndTagToken();
endToken.name = token.name;
emitToken(endToken);
return dataState;
}
}
var bogusCommentState = function bogusCommentState(c) {
if(c == ">") {
return dataState;
}
else {
return bogusCommentState;
}
}
var state = dataState;
this.receiveInput = function(char) {
//visualizer.visualize(state.name);
state = state(char);
}
this.reset = function(){
state = dataState;
}
}
function Element(token){
for(var p in token) {
this[p] = token[p];
}
this.childNodes = [];
}
function Text(value){
this.value = value || "";
}
function HTMLSyntaticalParser(){
var stack = [new Element({name:"document"})];
this.receiveInput = function(token) {
if(token.constructor.name == "String")
{
if(stack[stack.length-1].constructor.name == "Text")
stack[stack.length-1].value += token;
else
{
stack[stack.length-1].childNodes.push(new Text(token));
stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]);
}
}
else if(stack.length>0 && stack[stack.length-1].constructor.name == "Text")
{
stack.pop();
}
if(token.constructor.name == "StartTagToken")
{
console.log(stack[stack.length-1]);
stack[stack.length-1].childNodes.push(new Element(token));
stack.push(stack[stack.length-1].childNodes[stack[stack.length-1].childNodes.length-1]);
}
else if(token.constructor.name == "EndTagToken")
{
stack.pop();
}
}
this.getOutput = function(){
return stack[0];
}
}
@yisibl
Copy link

yisibl commented May 21, 2013

沙发啊!

@Gaubee
Copy link

Gaubee commented May 27, 2013

连个注释都没有……

@wintercn
Copy link
Author

@Gaubee 这有什么好注释的 无非是机械的if else

@chibin58
Copy link

有幸研究研究。。。

@yuanyuanlife
Copy link

唔 又是用 JavaScript 写的语法解析吗?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment