Skip to content

Instantly share code, notes, and snippets.

@lilactown
Created November 25, 2019 06:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lilactown/98dbbde0c3a9ed70518efedcf9ac6dbb to your computer and use it in GitHub Desktop.
Save lilactown/98dbbde0c3a9ed70518efedcf9ac6dbb to your computer and use it in GitHub Desktop.
import { LexicalGrammar } from "./lexer";
let toplevel = new LexicalGrammar();
export const canonicalParens = {
'#?(': '()',
'#?@(': '()',
'#(': '()',
'(': '()',
')': '()',
'#{': '{}',
'{': '{}',
'}': '{}',
'[': '[]',
']': '[]'
};
export function validPair(open, close) {
return canonicalParens[open] == canonicalParens[close];
}
toplevel.terminal(/[\t ,]+/, (l, m) => ({ type: "ws" }));
toplevel.terminal(/(\r?\n)/, (l, m) => ({ type: "ws" }));
toplevel.terminal(/;.*/, (l, m) => ({ type: "comment" }));
toplevel.terminal(/\(|\[|\{|#\(|#\?\(|#\{|#\?@\(/, (l, m) => ({ type: "open" }));
toplevel.terminal(/\)|\]|\}/, (l, m) => ({ type: "close" }));
toplevel.terminal(/~@|~|'|#'|#:|#_|\^|`|#|\^:/, (l, m) => ({ type: "punc" }));
toplevel.terminal(/true|false|nil/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/[0-9]+[rR][0-9a-zA-Z]+/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/:[^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "kw" }));
toplevel.terminal(/[^()[\]\{\}#,~@'`^\"\s:;][^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "id" }));
toplevel.terminal(/#?"([^"\\]|\\.)*"/, (l, m) => ({ type: "str" }));
toplevel.terminal(/#?"([^"\\]|\\.)*/, (l, m) => ({ type: "str-start" }));
toplevel.terminal(/./, (l, m) => ({ type: "junk" }));
let multstring = new LexicalGrammar();
multstring.terminal(/([^"\\]|\\.)*"/, (l, m) => ({ type: "str-end" }));
multstring.terminal(/([^"\\]|\\.)+/, (l, m) => ({ type: "str-inside" }));
export class Scanner {
constructor() {
this.state = { inString: false };
}
processLine(line, state = this.state) {
let tks = [];
this.state = state;
let lex = (this.state.inString ? multstring : toplevel).lex(line);
let tk;
do {
tk = lex.scan();
if (tk) {
let oldpos = lex.position;
switch (tk.type) {
case "str-end":
this.state = Object.assign(Object.assign({}, this.state), { inString: false });
lex = toplevel.lex(line);
lex.position = oldpos;
break;
case "str-start":
this.state = Object.assign(Object.assign({}, this.state), { inString: true });
lex = multstring.lex(line);
lex.position = oldpos;
break;
}
tks.push(Object.assign(Object.assign({}, tk), { state: this.state }));
}
} while (tk);
tks.push({ type: "eol", raw: "\n", offset: line.length, state: this.state });
return tks;
}
}
export class Lexer {
constructor(source, rules) {
this.source = source;
this.rules = rules;
this.position = 0;
this.positions = new Map();
this.rules.forEach(rule => {
rule.r.lastIndex = 0;
let x = rule.r.exec(source);
while (x) {
if (x && x[0]) {
x.input = undefined;
x["rule"] = rule;
let position = rule.r.lastIndex - x[0].length;
let values = this.positions.get(position);
if (values) {
values.push(x);
this.positions.set(position, values);
}
else {
this.positions.set(position, [x]);
}
}
x = rule.r.exec(source);
}
});
}
scan() {
let [token, length] = this.lookup();
if (token == null) {
if (this.position == this.source.length) {
return null;
}
[token, length] = this.retrieve();
if (token == null) {
throw new Error("Unexpected character at " + this.position + ": " + JSON.stringify(this.source));
}
}
this.position += length;
return token;
}
lookup() {
var token = null;
var length = 0;
let values = this.positions.get(this.position);
if (values) {
values.forEach(x => {
if (x && x[0].length > length) {
token = x["rule"].fn(this, x);
token.offset = this.position;
token.raw = x[0];
length = x[0].length;
}
});
}
return ([token, length]);
}
retrieve() {
var token = null;
var length = 0;
this.rules.forEach(rule => {
rule.r.lastIndex = this.position;
var x = rule.r.exec(this.source);
if (x && x[0].length > length && this.position + x[0].length == rule.r.lastIndex) {
token = rule.fn(this, x);
token.offset = this.position;
token.raw = x[0];
length = x[0].length;
}
});
return ([token, length]);
}
}
export class LexicalGrammar {
constructor() {
this.rules = [];
}
terminal(pattern, fn) {
this.rules.push({
r: pattern instanceof RegExp ? new RegExp(pattern, "g") : new RegExp(pattern, "g"),
fn: fn
});
}
lex(source) {
return new Lexer(source, this.rules);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment