Skip to content

Instantly share code, notes, and snippets.

@ichiriac
Created November 25, 2014 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ichiriac/f888c99da242fc2e9b52 to your computer and use it in GitHub Desktop.
Save ichiriac/f888c99da242fc2e9b52 to your computer and use it in GitHub Desktop.
Tryout of a manual implementation of a parser
var lex = require('./lexer');
var tokens = require('./tokens');
var names = require('./grammar/tokens');
function isNumber(n) {
return n != '.' && n != ',' && !isNaN(parseFloat(n)) && isFinite(n);
}
function getTokenName(token) {
if (!isNumber(token)) {
return "'" + token + "'";
} else {
if (token == 1) return 'the end of file (EOF)';
return names[token];
}
}
module.exports = {
parser: {
// le lexer
lexer: lex,
token: null,
/** main entry point : converts a source code to AST **/
parse: function(code) {
this.lexer.setInput(code);
var token = this.lexer.lex() || lex.EOF;
var ast = [];
while(token != lex.EOF) {
ast.push(this.read_start(token));
token = this.lexer.lex() || lex.EOF;
}
return ast;
}
/** handling errors **/
,error: function(token, expect) {
token = getTokenName(token);
var msgExpect = '';
if (expect) {
msgExpect = ', expecting ';
if (Array.isArray(expect)) {
for(var i = 0; i < expect.length; i++) {
expect[i] = getTokenName(expect[i]);
}
msgExpect += expect.join(', ');
} else {
msgExpect += getTokenName(expect);
}
}
throw new Error(
'Parse Error : unexpected ' + token + msgExpect,
'\nat line ' + this.lexer.yylloc.first_line
);
}
/** consume the next token **/
,next: function() {
this.token = this.lexer.lex() || this.error(lex.EOF);
return this.token;
}
/** convert an token to ast **/
,read_token: function(token) {
if (isNumber(token)) {
return [token, this.lexer.yytext, this.lexer.yylloc.first_line];
} else {
return token;
}
}
/** helper : reads a list of tokens / sample : T_STRING ',' T_STRING ... **/
,read_list: function(token, item, separator) {
var result = [];
if (token == separator) token = next; // trim separator
if (token != item) {
this.error(token, [item, separator]);
}
result.push(this.lexer.yytext);
this.token = this.lexer.lex() || lex.EOF;
while(this.token != lex.EOF) {
if (this.token != separator) break;
this.token = this.lexer.lex() || lex.EOF; // trim separator
if (this.token != item) break;
result.push(this.lexer.yytext);
this.token = this.lexer.lex() || lex.EOF;
}
return result;
}
/** main entry **/
,read_start: function(token) {
if (token == tokens.T_NAMESPACE) {
return this.read_namespace(token);
} else {
return this.read_top_statement(token);
}
}
/** reading namespaces **/
,read_namespace: function(token) {
if (token != tokens.T_NAMESPACE) this.error(token, tokens.T_NAMESPACE);
token = this.next();
if (token == '{') {
var body = this.read_top_statements(this.next());
if (this.token != '}') this.error(this.token, '}');
return ['namespace', [], body];
} else {
var name = this.read_namespace_name(token);
if (this.token == ';') {
var body = this.read_top_statements(this.next());
if (this.token != lex.EOF) this.error(this.token, lex.EOF);
return ['namespace', name, body];
} else if (this.token == '{') {
var body = this.read_top_statements(this.next());
if (this.token != '}') this.error(this.token, '}');
return ['namespace', name, body];
} else {
this.error(this.token, ['{', ';']);
}
}
}
/** reading a namespace **/
,read_namespace_name: function(token) {
return this.read_list(token, tokens.T_STRING, tokens.T_NS_SEPARATOR);
}
/** reading a list of top statements **/
,read_top_statements: function(token) {
var result = [];
if (token) this.token = token;
while(this.token != lex.EOF) {
result.push(this.read_top_statement(this.token));
this.token = this.lexer.lex() || lex.EOF;
}
return result;
}
/** reading a top statement **/
,read_top_statement: function(token) {
if (token == tokens.T_FUNCTION ) {
return this.read_function(token);
} else if ( token == tokens.T_FINAL) {
var next = this.read();
if (next == tokens.T_INTERFACE) {
return this.read_interface(token);
} else {
return this.read_class(token);
}
} else if ( token == tokens.T_ABSTRACT || token == tokens.T_CLASS) {
return this.read_class(token);
} else if ( token == tokens.T_INTERFACE ) {
return this.read_interface(token);
} else if ( token == tokens.T_TRAIT ) {
return this.read_trait(token);
} else {
return this.read_inner_statement(token);
}
}
/** reads a list of simple inner statements **/
,read_inner_statements: function(token) {
var result = [];
if (token) this.token = token;
while(this.token != lex.EOF) {
result.push(this.read_inner_statement(this.token));
this.token = this.lexer.lex() || lex.EOF;
}
return result;
}
/** reads a simple inner statement **/
,read_inner_statement: function(token) {
if (token == '{') {
var body = this.read_inner_statements(this.next());
if (this.token != '}') this.error(this.token, '}');
this.next();
return body;
} else if (token == '}' ) {
this.error(token);
} else {
return this.read_token(token);
}
}
/** checks if current token is a reference keyword **/
,is_reference: function(token) {
return (token == '&');
}
/** reading a function **/
,read_function: function(token) {
if (token != tokens.T_FUNCTION) this.error(token, tokens.T_FUNCTION);
var isRef = this.is_reference(token);
if (isRef) token = this.next();
if (token != tokens.T_STRING) this.error(token, tokens.T_STRING);
var name = this.lexer.yytext;
if (this.next() != '(') this.error(this.token, '(');
var params = this.read_parameter_list(this.next());
if (this.token != ')') this.error(this.token, ')');
if (this.next() != '{') this.error(this.token, '{');
var body = this.read_inner_statements(this.next());
if (this.token != '}') this.error(this.token, '}');
return ['function', name, params, body, isRef];
}
/** reads a list of parameters **/
,read_parameter_list: function(token) {
}
/** reading a class **/
,read_class: function(token) {
if (token != tokens.T_FINAL) this.error(token, tokens.T_FINAL);
}
/** **/
,read_class_scope: function(token) {
if (token == tokens.T_FINAL) this.error(token, tokens.T_FINAL);
}
/** reading an interface **/
,read_interface: function(token) {
}
/** reading a trait **/
,read_trait: function(token) {
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment