Skip to content

Instantly share code, notes, and snippets.

@JoshuaGrams
Last active September 16, 2017 21:08
Show Gist options
  • Save JoshuaGrams/84acba3f58410f9cef2d496d85bfa173 to your computer and use it in GitHub Desktop.
Save JoshuaGrams/84acba3f58410f9cef2d496d85bfa173 to your computer and use it in GitHub Desktop.
moo + indentation tokens.
(function() {
'use strict';
function IndentedLexer(lexer) {
this.lexer = lexer;
this.indents = [''];
this.tokens = [];
this.nextBlankIsIndent = true;
}
IndentedLexer.prototype.next = function() {
if(this.tokens.length) {
return this.tokens.shift();
} else {
var token = this.lexer.next(this);
if(!token) return token;
if(this.nextBlankIsIndent && token.type === 'ws') {
this.nextBlankIsIndent = false;
this.indent(token.value);
return this.next();
} else {
this.nextBlankIsIndent = (token.type === 'nl');
return token;
}
}
}
IndentedLexer.prototype.indent = function(indent) {
while(indent !== this.indents[this.indents.length-1]) {
var prev = this.indents[this.indents.length-1];
if(startsWith(indent, prev)) { // more indentation than we had.
this.tokens.push({type: 'indent'});
this.indents.push(indent);
return;
} else if(startsWith(prev, indent)) { // less indentation than we had.
this.tokens.push({type: 'dedent'});
this.indents.pop(); // check the previous one.
} else {
prev = unicodeDebugString(prev);
indent = unicodeDebugString(indent);
throw('Indentations cannot be compared: ' + prev + ' and ' + indent + '.');
}
}
}
IndentedLexer.prototype.save = function() {
return this.lexer.save();
}
IndentedLexer.prototype.reset = function(data, info) {
this.lexer.reset(data, info);
}
IndentedLexer.prototype.formatError = function(token) {
return this.lexer.formatError(token);
}
IndentedLexer.prototype.has = function(name) {
return lexer.has(name);
}
// ---------------------------------------------------------
function startsWith(str, prefix) {
return str.lastIndexOf(prefix, 0) === 0;
}
function unicodeDebugString(s) {
u = '';
for(let i=0; i<s.length; ++i) {
var n = s.charCodeAt(i), hex16 = '';
for(let j=0; j<4; ++j) {
var d = n % 16; n /= 16;
var offset = (d < 10) ? 0x30 : 0x61-10
var ch = String.fromCharCode(d + offset);
hex16 = ch + hex16;
}
u += ('\\u' + hex16);
}
return '"' + u + '"';
}
// ---------------------------------------------------------
if(typeof module === 'object' && module.exports) {
module.exports = IndentedLexer;
} else {
window.IndentedLexer = IndentedLexer;
}
})();
var lex = new IndentedLexer(moo.compile({
ws: /[ \t\u00A0\u1680\u2000-\u200a\u2028\u2029\u202f\u3000]+/,
nl: { match: /\n/, lineBreaks: true },
id: /\w+/
}));
lex.reset(
''
+ 'This line has no indentation\n'
+ ' This line is indented with two spaces\n'
+ ' So is this one\n'
+ ' \tThis one adds a tab\n'
+ ' \t And then two more spaces\n'
+ ' \t Another line with the same indentation\n'
+ ' \t And another\n'
+ ' This one goes back to the initial two spaces\n'
+ 'And back to no indentation\n'
);
var token = lex.next();
while(token) {
console.log(token.value || '{{' + token.type + '}} ');
token = lex.next();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment