lilactown/clojure_lexer.js

## clojure_lexer.js
import { LexicalGrammar } from "./lexer";
let toplevel = new LexicalGrammar();
export const canonicalParens = {
    '#?(': '()',
    '#?@(': '()',
    '#(': '()',
    '(': '()',
    ')': '()',
    '#{': '{}',
    '{': '{}',
    '}': '{}',
    '[': '[]',
    ']': '[]'
};
export function validPair(open, close) {
    return canonicalParens[open] == canonicalParens[close];
}
toplevel.terminal(/[\t ,]+/, (l, m) => ({ type: "ws" }));
toplevel.terminal(/(\r?\n)/, (l, m) => ({ type: "ws" }));
toplevel.terminal(/;.*/, (l, m) => ({ type: "comment" }));
toplevel.terminal(/\(|\[|\{|#\(|#\?\(|#\{|#\?@\(/, (l, m) => ({ type: "open" }));
toplevel.terminal(/\)|\]|\}/, (l, m) => ({ type: "close" }));
toplevel.terminal(/~@|~|'|#'|#:|#_|\^|`|#|\^:/, (l, m) => ({ type: "punc" }));
toplevel.terminal(/true|false|nil/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/[0-9]+[rR][0-9a-zA-Z]+/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/, (l, m) => ({ type: "lit" }));
toplevel.terminal(/:[^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "kw" }));
toplevel.terminal(/[^()[\]\{\}#,~@'`^\"\s:;][^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "id" }));
toplevel.terminal(/#?"([^"\\]|\\.)*"/, (l, m) => ({ type: "str" }));
toplevel.terminal(/#?"([^"\\]|\\.)*/, (l, m) => ({ type: "str-start" }));
toplevel.terminal(/./, (l, m) => ({ type: "junk" }));
let multstring = new LexicalGrammar();
multstring.terminal(/([^"\\]|\\.)*"/, (l, m) => ({ type: "str-end" }));
multstring.terminal(/([^"\\]|\\.)+/, (l, m) => ({ type: "str-inside" }));
export class Scanner {
    constructor() {
        this.state = { inString: false };
    }
    processLine(line, state = this.state) {
        let tks = [];
        this.state = state;
        let lex = (this.state.inString ? multstring : toplevel).lex(line);
        let tk;
        do {
            tk = lex.scan();
            if (tk) {
                let oldpos = lex.position;
                switch (tk.type) {
                    case "str-end":
                        this.state = Object.assign(Object.assign({}, this.state), { inString: false });
                        lex = toplevel.lex(line);
                        lex.position = oldpos;
                        break;
                    case "str-start":
                        this.state = Object.assign(Object.assign({}, this.state), { inString: true });
                        lex = multstring.lex(line);
                        lex.position = oldpos;
                        break;
                }
                tks.push(Object.assign(Object.assign({}, tk), { state: this.state }));
            }
        } while (tk);
        tks.push({ type: "eol", raw: "\n", offset: line.length, state: this.state });
        return tks;
    }
}

## lexer.js
export class Lexer {
    constructor(source, rules) {
        this.source = source;
        this.rules = rules;
        this.position = 0;
        this.positions = new Map();
        this.rules.forEach(rule => {
            rule.r.lastIndex = 0;
            let x = rule.r.exec(source);
            while (x) {
                if (x && x[0]) {
                    x.input = undefined;
                    x["rule"] = rule;
                    let position = rule.r.lastIndex - x[0].length;
                    let values = this.positions.get(position);
                    if (values) {
                        values.push(x);
                        this.positions.set(position, values);
                    }
                    else {
                        this.positions.set(position, [x]);
                    }
                }
                x = rule.r.exec(source);
            }
        });
    }
    scan() {
        let [token, length] = this.lookup();
        if (token == null) {
            if (this.position == this.source.length) {
                return null;
            }
            [token, length] = this.retrieve();
            if (token == null) {
                throw new Error("Unexpected character at " + this.position + ": " + JSON.stringify(this.source));
            }
        }
        this.position += length;
        return token;
    }
    lookup() {
        var token = null;
        var length = 0;
        let values = this.positions.get(this.position);
        if (values) {
            values.forEach(x => {
                if (x && x[0].length > length) {
                    token = x["rule"].fn(this, x);
                    token.offset = this.position;
                    token.raw = x[0];
                    length = x[0].length;
                }
            });
        }
        return ([token, length]);
    }
    retrieve() {
        var token = null;
        var length = 0;
        this.rules.forEach(rule => {
            rule.r.lastIndex = this.position;
            var x = rule.r.exec(this.source);
            if (x && x[0].length > length && this.position + x[0].length == rule.r.lastIndex) {
                token = rule.fn(this, x);
                token.offset = this.position;
                token.raw = x[0];
                length = x[0].length;
            }
        });
        return ([token, length]);
    }
}
export class LexicalGrammar {
    constructor() {
        this.rules = [];
    }
    terminal(pattern, fn) {
        this.rules.push({
            r: pattern instanceof RegExp ? new RegExp(pattern, "g") : new RegExp(pattern, "g"),
            fn: fn
        });
    }
    lex(source) {
        return new Lexer(source, this.rules);
    }
}
	import { LexicalGrammar } from "./lexer";
	let toplevel = new LexicalGrammar();
	export const canonicalParens = {
	'#?(': '()',
	'#?@(': '()',
	'#(': '()',
	'(': '()',
	')': '()',
	'#{': '{}',
	'{': '{}',
	'}': '{}',
	'[': '[]',
	']': '[]'
	};
	export function validPair(open, close) {
	return canonicalParens[open] == canonicalParens[close];
	}
	toplevel.terminal(/[\t ,]+/, (l, m) => ({ type: "ws" }));
	toplevel.terminal(/(\r?\n)/, (l, m) => ({ type: "ws" }));
	toplevel.terminal(/;.*/, (l, m) => ({ type: "comment" }));
	toplevel.terminal(/\(\|\[\|\{\|#\(\|#\?\(\|#\{\|#\?@\(/, (l, m) => ({ type: "open" }));
	toplevel.terminal(/\)\|\]\|\}/, (l, m) => ({ type: "close" }));
	toplevel.terminal(/~@\|~\|'\|#'\|#:\|#_\|\^\|`\|#\|\^:/, (l, m) => ({ type: "punc" }));
	toplevel.terminal(/true\|false\|nil/, (l, m) => ({ type: "lit" }));
	toplevel.terminal(/[0-9]+[rR][0-9a-zA-Z]+/, (l, m) => ({ type: "lit" }));
	toplevel.terminal(/[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/, (l, m) => ({ type: "lit" }));
	toplevel.terminal(/:[^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "kw" }));
	toplevel.terminal(/[^()[\]\{\}#,~@'`^\"\s:;][^()[\]\{\}#,~@'`^\"\s;]*/, (l, m) => ({ type: "id" }));
	toplevel.terminal(/#?"([^"\\]\|\\.)*"/, (l, m) => ({ type: "str" }));
	toplevel.terminal(/#?"([^"\\]\|\\.)*/, (l, m) => ({ type: "str-start" }));
	toplevel.terminal(/./, (l, m) => ({ type: "junk" }));
	let multstring = new LexicalGrammar();
	multstring.terminal(/([^"\\]\|\\.)*"/, (l, m) => ({ type: "str-end" }));
	multstring.terminal(/([^"\\]\|\\.)+/, (l, m) => ({ type: "str-inside" }));
	export class Scanner {
	constructor() {
	this.state = { inString: false };
	}
	processLine(line, state = this.state) {
	let tks = [];
	this.state = state;
	let lex = (this.state.inString ? multstring : toplevel).lex(line);
	let tk;
	do {
	tk = lex.scan();
	if (tk) {
	let oldpos = lex.position;
	switch (tk.type) {
	case "str-end":
	this.state = Object.assign(Object.assign({}, this.state), { inString: false });
	lex = toplevel.lex(line);
	lex.position = oldpos;
	break;
	case "str-start":
	this.state = Object.assign(Object.assign({}, this.state), { inString: true });
	lex = multstring.lex(line);
	lex.position = oldpos;
	break;
	}
	tks.push(Object.assign(Object.assign({}, tk), { state: this.state }));
	}
	} while (tk);
	tks.push({ type: "eol", raw: "\n", offset: line.length, state: this.state });
	return tks;
	}
	}
	export class Lexer {
	constructor(source, rules) {
	this.source = source;
	this.rules = rules;
	this.position = 0;
	this.positions = new Map();
	this.rules.forEach(rule => {
	rule.r.lastIndex = 0;
	let x = rule.r.exec(source);
	while (x) {
	if (x && x[0]) {
	x.input = undefined;
	x["rule"] = rule;
	let position = rule.r.lastIndex - x[0].length;
	let values = this.positions.get(position);
	if (values) {
	values.push(x);
	this.positions.set(position, values);
	}
	else {
	this.positions.set(position, [x]);
	}
	}
	x = rule.r.exec(source);
	}
	});
	}
	scan() {
	let [token, length] = this.lookup();
	if (token == null) {
	if (this.position == this.source.length) {
	return null;
	}
	[token, length] = this.retrieve();
	if (token == null) {
	throw new Error("Unexpected character at " + this.position + ": " + JSON.stringify(this.source));
	}
	}
	this.position += length;
	return token;
	}
	lookup() {
	var token = null;
	var length = 0;
	let values = this.positions.get(this.position);
	if (values) {
	values.forEach(x => {
	if (x && x[0].length > length) {
	token = x["rule"].fn(this, x);
	token.offset = this.position;
	token.raw = x[0];
	length = x[0].length;
	}
	});
	}
	return ([token, length]);
	}
	retrieve() {
	var token = null;
	var length = 0;
	this.rules.forEach(rule => {
	rule.r.lastIndex = this.position;
	var x = rule.r.exec(this.source);
	if (x && x[0].length > length && this.position + x[0].length == rule.r.lastIndex) {
	token = rule.fn(this, x);
	token.offset = this.position;
	token.raw = x[0];
	length = x[0].length;
	}
	});
	return ([token, length]);
	}
	}
	export class LexicalGrammar {
	constructor() {
	this.rules = [];
	}
	terminal(pattern, fn) {
	this.rules.push({
	r: pattern instanceof RegExp ? new RegExp(pattern, "g") : new RegExp(pattern, "g"),
	fn: fn
	});
	}
	lex(source) {
	return new Lexer(source, this.rules);
	}
	}