Skip to content

Instantly share code, notes, and snippets.

@marihachi
Last active March 21, 2023 15:07
Show Gist options
  • Save marihachi/9a2123aa8770ea05242b298033518cd6 to your computer and use it in GitHub Desktop.
Save marihachi/9a2123aa8770ea05242b298033518cd6 to your computer and use it in GitHub Desktop.
パーサーの例
const T_EOF = 'EOF'; // end of input
const T_NUM = 'NUM'; // [0-9]+
const T_ADD = 'ADD'; // "+"
const T_SUB = 'SUB'; // "-"
const T_MUL = 'MUL'; // "*"
const T_DIV = 'DIV'; // "/"
/**
* @param {string} input
*/
function tokenize(input) {
let pos = 0;
const tokens = [];
while (pos < input.length) {
if (input[pos] == '+') {
tokens.push([T_ADD]);
pos += 1;
continue;
}
if (input[pos] == '-') {
tokens.push([T_SUB]);
pos += 1;
continue;
}
if (input[pos] == '*') {
tokens.push([T_MUL]);
pos += 1;
continue;
}
if (input[pos] == '/') {
tokens.push([T_DIV]);
pos += 1;
continue;
}
const match = /^([0-9]+)/.exec(input.slice(pos));
if (match != null) {
const value = match[1];
tokens.push([T_NUM, value]);
pos += value.length;
continue;
}
throw new Error('invalid input');
}
tokens.push([T_EOF]);
return tokens;
}
function test() {
let tokens;
tokens = tokenize('1+2');
console.log(tokens);
// => [ [ 'NUM', '1' ], [ 'ADD' ], [ 'NUM', '2' ], ['EOF'] ]
tokens = tokenize('2-1');
console.log(tokens);
// => [ [ 'NUM', '2' ], [ 'SUB' ], [ 'NUM', '1' ], ['EOF'] ]
tokens = tokenize('2+2*3*2-10');
console.log(tokens);
/* => [
[ 'NUM', '2' ],
[ 'ADD' ],
[ 'NUM', '2' ],
[ 'MUL' ],
[ 'NUM', '3' ],
[ 'MUL' ],
[ 'NUM', '2' ],
[ 'SUB' ],
[ 'NUM', '10' ],
['EOF']
] */
}
test();
// tokens
const T_EOF = 'EOF'; // end of input
const T_EQ = 'EQ'; // "="
const T_SEMI = 'SEMI'; // ";"
const T_NUM = 'NUM'; // [0-9]+
const T_NAME = 'NAME'; // [A-Za-z0-9_]+
/**
* @param {string} input
* @returns {[string, string | undefined]}
*/
function tokenize(input) {
let pos = 0;
const tokens = [];
let match;
while (pos < input.length) {
// skip spaces
match = /^([ \r\n\t]+)/.exec(input.slice(pos));
if (match != null) {
pos += match[1].length;
continue;
}
// EQ
if (input[pos] == '=') {
tokens.push([T_EQ]);
pos += 1;
continue;
}
// SEMI
if (input[pos] == ';') {
tokens.push([T_SEMI]);
pos += 1;
continue;
}
// NUM
match = /^([0-9]+)/.exec(input.slice(pos));
if (match != null) {
const value = match[1];
tokens.push([T_NUM, value]);
pos += value.length;
continue;
}
// NAME
match = /^([A-Za-z0-9_]+)/.exec(input.slice(pos));
if (match != null) {
const value = match[1];
tokens.push([T_NAME, value]);
pos += value.length;
continue;
}
throw new Error('invalid input');
}
tokens.push([T_EOF]);
return tokens;
}
function testTokenize() {
let tokens;
tokens = tokenize('abc = 123;');
console.log(tokens);
// => [ [ 'NAME', 'abc' ], [ 'EQ' ], [ 'NUM', '123' ], [ 'SEMI' ], [ 'EOF' ] ]
}
// parse
class ParseContext {
/** @param {[string, string | undefined][]} tokens */
constructor(tokens) {
this.tokens = tokens;
this.pos = 0;
}
getToken() {
return this.tokens[this.pos];
}
/** @param {string} tokenKind */
expect(tokenKind) {
const token = this.getToken();
if (token[0] != tokenKind) {
throw new Error('unexpected token');
}
return token[1];
}
next() {
if (this.getToken()[0] != T_EOF) {
this.pos += 1;
}
}
}
/**
* @param {ParseContext} p
*/
function parse(p) {
const name = parseName(p);
// "=" token
p.expect(T_EQ);
p.next();
const value = parseNum(p);
// ";" token
p.expect(T_SEMI);
p.next();
return { kind: 'assign', name, value };
}
/**
* @param {ParseContext} p
* @return {string}
*/
function parseName(p) {
// name token
const value = p.expect(T_NAME);
p.next();
return value;
}
/**
* @param {ParseContext} p
* @return {number}
*/
function parseNum(p) {
// number token
const value = p.expect(T_NUM);
p.next();
return Number(value);
}
function testParse() {
const tokens = tokenize('abc = 123;');
const ast = parse(new ParseContext(tokens));
console.log(ast);
// => { kind: 'assign', name: 'abc', value: 123 }
}
testParse();
// * add block statement
// * change expression
// tokens
const T_EOF = 'EOF'; // end of input
const T_EQ = 'EQ'; // "="
const T_SEMI = 'SEMI'; // ";"
const T_BEGIN_BLOCK = 'BEGIN_BLOCK'; // "{"
const T_END_BLOCK = 'END_BLOCK'; // "}"
const T_NUM = 'NUM'; // [0-9]+
const T_NAME = 'NAME'; // [A-Za-z0-9_]+
/**
* @param {string} input
* @returns {[string, string | undefined]}
*/
function tokenize(input) {
let pos = 0;
const tokens = [];
let match;
while (pos < input.length) {
// skip spaces
match = /^([ \r\n\t]+)/.exec(input.slice(pos));
if (match != null) {
pos += match[1].length;
continue;
}
// EQ
if (input[pos] == '=') {
tokens.push([T_EQ]);
pos += 1;
continue;
}
// SEMI
if (input[pos] == ';') {
tokens.push([T_SEMI]);
pos += 1;
continue;
}
// BEGIN_BLOCK
if (input[pos] == '{') {
tokens.push([T_BEGIN_BLOCK]);
pos += 1;
continue;
}
// END_BLOCK
if (input[pos] == '}') {
tokens.push([T_END_BLOCK]);
pos += 1;
continue;
}
// NUM
match = /^([0-9]+)/.exec(input.slice(pos));
if (match != null) {
const value = match[1];
tokens.push([T_NUM, value]);
pos += value.length;
continue;
}
// NAME
match = /^([A-Za-z0-9_]+)/.exec(input.slice(pos));
if (match != null) {
const value = match[1];
tokens.push([T_NAME, value]);
pos += value.length;
continue;
}
throw new Error('invalid input');
}
tokens.push([T_EOF]);
return tokens;
}
function testTokenize() {
let tokens;
tokens = tokenize('abc = 123;');
console.log(tokens);
// => [ [ 'NAME', 'abc' ], [ 'EQ' ], [ 'NUM', '123' ], [ 'SEMI' ], ['EOF'] ]
}
// parse
class ParseContext {
/** @param {[string, string | undefined][]} tokens */
constructor(tokens) {
this.tokens = tokens;
this.pos = 0;
}
getToken() {
return this.tokens[this.pos];
}
/** @param {string} tokenKind */
expect(tokenKind) {
const token = this.getToken();
if (token[0] != tokenKind) {
throw new Error('unexpected token');
}
return token[1];
}
next() {
if (this.getToken()[0] != T_EOF) {
this.pos += 1;
}
}
}
/**
* @param {ParseContext} p
* @return {ReturnType<parseStatement>[]}
*/
function parse(p) {
// statement list
const body = [];
while (p.getToken()[0] != T_EOF) {
body.push(parseStatement(p));
}
return body;
}
/**
* @param {ParseContext} p
* @return {ReturnType<parseAssign> | ReturnType<parseBlock>}
*/
function parseStatement(p) {
switch (p.getToken()[0]) {
case T_NAME: {
return parseAssign(p);
}
case T_BEGIN_BLOCK: {
return parseBlock(p);
}
}
throw new Error('unexpected token');
}
/**
* @param {ParseContext} p
* @return {{ kind: 'assign', name: ReturnType<parseName>, value: ReturnType<parseExpr> }}
*/
function parseAssign(p) {
const name = parseName(p);
// "=" token
p.expect(T_EQ);
p.next();
const value = parseExpr(p);
// ";" token
p.expect(T_SEMI);
p.next();
return { kind: 'assign', name, value };
}
/**
* @param {ParseContext} p
* @return {{ kind: 'block', body: ReturnType<parseStatement>[] }}
*/
function parseBlock(p) {
// "{" token
p.expect(T_BEGIN_BLOCK);
p.next();
// statement list
const body = [];
while (p.getToken()[0] != T_END_BLOCK) {
body.push(parseStatement(p));
}
// "}" token
p.expect(T_END_BLOCK);
p.next();
return { kind: 'block', body };
}
/**
* @param {ParseContext} p
* @return {ReturnType<parseName> | ReturnType<parseNum>}
*/
function parseExpr(p) {
switch (p.getToken()[0]) {
case T_NAME: {
return parseName(p);
}
case T_NUM: {
return parseNum(p);
}
}
throw new Error('unexpected token');
}
/**
* @param {ParseContext} p
* @return {{ kind: 'name', value: string }}
*/
function parseName(p) {
// name token
const value = p.expect(T_NAME);
p.next();
return { kind: 'name', value };
}
/**
* @param {ParseContext} p
* @return {{ kind: 'num', value: number }}
*/
function parseNum(p) {
// number token
const value = p.expect(T_NUM);
p.next();
return { kind: 'num', value: Number(value) };
}
function testParse_1() {
const tokens = tokenize('abc = 123;');
const ast = parse(new ParseContext(tokens));
console.log(JSON.stringify(ast));
// => [{ "kind": "assign", "name": { "kind": "name", "value": "abc" }, "value": { "kind": "num", "value": 123 }}]
}
testParse_1();
function testParse_2() {
const tokens = tokenize('{ abc = 123; xyz = 456; aaa = xyz; }');
const ast = parse(new ParseContext(tokens));
console.log(JSON.stringify(ast));
/* => [{ "kind": "block", "body": [
{
"kind": "assign",
"name": { "kind": "name", "value": "abc" },
"value": { "kind": "num", "value": 123 }
},
{
"kind": "assign",
"name": { "kind": "name", "value": "xyz" },
"value": { "kind": "num", "value": 456 }
},
{
"kind": "assign",
"name": { "kind": "name", "value": "aaa" },
"value": { "kind": "name", "value": "xyz" }
}
]}] */
}
testParse_2();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment