Created
November 25, 2019 06:01
-
-
Save lilactown/98dbbde0c3a9ed70518efedcf9ac6dbb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { LexicalGrammar } from "./lexer"; | |
let toplevel = new LexicalGrammar(); | |
export const canonicalParens = { | |
'#?(': '()', | |
'#?@(': '()', | |
'#(': '()', | |
'(': '()', | |
')': '()', | |
'#{': '{}', | |
'{': '{}', | |
'}': '{}', | |
'[': '[]', | |
']': '[]' | |
}; | |
export function validPair(open, close) { | |
return canonicalParens[open] == canonicalParens[close]; | |
} | |
toplevel.terminal(/[\t ,]+/, (l, m) => ({ type: "ws" })); | |
toplevel.terminal(/(\r?\n)/, (l, m) => ({ type: "ws" })); | |
toplevel.terminal(/;.*/, (l, m) => ({ type: "comment" })); | |
toplevel.terminal(/\(|\[|\{|#\(|#\?\(|#\{|#\?@\(/, (l, m) => ({ type: "open" })); | |
toplevel.terminal(/\)|\]|\}/, (l, m) => ({ type: "close" })); | |
toplevel.terminal(/~@|~|'|#'|#:|#_|\^|`|#|\^:/, (l, m) => ({ type: "punc" })); | |
toplevel.terminal(/true|false|nil/, (l, m) => ({ type: "lit" })); | |
toplevel.terminal(/[0-9]+[rR][0-9a-zA-Z]+/, (l, m) => ({ type: "lit" })); | |
toplevel.terminal(/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/, (l, m) => ({ type: "lit" })); | |
toplevel.terminal(/:[^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "kw" })); | |
toplevel.terminal(/[^()[\]\{\}#,~@'`^\"\s:;][^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "id" })); | |
toplevel.terminal(/#?"([^"\\]|\\.)*"/, (l, m) => ({ type: "str" })); | |
toplevel.terminal(/#?"([^"\\]|\\.)*/, (l, m) => ({ type: "str-start" })); | |
toplevel.terminal(/./, (l, m) => ({ type: "junk" })); | |
let multstring = new LexicalGrammar(); | |
multstring.terminal(/([^"\\]|\\.)*"/, (l, m) => ({ type: "str-end" })); | |
multstring.terminal(/([^"\\]|\\.)+/, (l, m) => ({ type: "str-inside" })); | |
export class Scanner { | |
constructor() { | |
this.state = { inString: false }; | |
} | |
processLine(line, state = this.state) { | |
let tks = []; | |
this.state = state; | |
let lex = (this.state.inString ? multstring : toplevel).lex(line); | |
let tk; | |
do { | |
tk = lex.scan(); | |
if (tk) { | |
let oldpos = lex.position; | |
switch (tk.type) { | |
case "str-end": | |
this.state = Object.assign(Object.assign({}, this.state), { inString: false }); | |
lex = toplevel.lex(line); | |
lex.position = oldpos; | |
break; | |
case "str-start": | |
this.state = Object.assign(Object.assign({}, this.state), { inString: true }); | |
lex = multstring.lex(line); | |
lex.position = oldpos; | |
break; | |
} | |
tks.push(Object.assign(Object.assign({}, tk), { state: this.state })); | |
} | |
} while (tk); | |
tks.push({ type: "eol", raw: "\n", offset: line.length, state: this.state }); | |
return tks; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export class Lexer { | |
constructor(source, rules) { | |
this.source = source; | |
this.rules = rules; | |
this.position = 0; | |
this.positions = new Map(); | |
this.rules.forEach(rule => { | |
rule.r.lastIndex = 0; | |
let x = rule.r.exec(source); | |
while (x) { | |
if (x && x[0]) { | |
x.input = undefined; | |
x["rule"] = rule; | |
let position = rule.r.lastIndex - x[0].length; | |
let values = this.positions.get(position); | |
if (values) { | |
values.push(x); | |
this.positions.set(position, values); | |
} | |
else { | |
this.positions.set(position, [x]); | |
} | |
} | |
x = rule.r.exec(source); | |
} | |
}); | |
} | |
scan() { | |
let [token, length] = this.lookup(); | |
if (token == null) { | |
if (this.position == this.source.length) { | |
return null; | |
} | |
[token, length] = this.retrieve(); | |
if (token == null) { | |
throw new Error("Unexpected character at " + this.position + ": " + JSON.stringify(this.source)); | |
} | |
} | |
this.position += length; | |
return token; | |
} | |
lookup() { | |
var token = null; | |
var length = 0; | |
let values = this.positions.get(this.position); | |
if (values) { | |
values.forEach(x => { | |
if (x && x[0].length > length) { | |
token = x["rule"].fn(this, x); | |
token.offset = this.position; | |
token.raw = x[0]; | |
length = x[0].length; | |
} | |
}); | |
} | |
return ([token, length]); | |
} | |
retrieve() { | |
var token = null; | |
var length = 0; | |
this.rules.forEach(rule => { | |
rule.r.lastIndex = this.position; | |
var x = rule.r.exec(this.source); | |
if (x && x[0].length > length && this.position + x[0].length == rule.r.lastIndex) { | |
token = rule.fn(this, x); | |
token.offset = this.position; | |
token.raw = x[0]; | |
length = x[0].length; | |
} | |
}); | |
return ([token, length]); | |
} | |
} | |
export class LexicalGrammar { | |
constructor() { | |
this.rules = []; | |
} | |
terminal(pattern, fn) { | |
this.rules.push({ | |
r: pattern instanceof RegExp ? new RegExp(pattern, "g") : new RegExp(pattern, "g"), | |
fn: fn | |
}); | |
} | |
lex(source) { | |
return new Lexer(source, this.rules); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment