Last active
September 16, 2017 21:08
-
-
Save JoshuaGrams/84acba3f58410f9cef2d496d85bfa173 to your computer and use it in GitHub Desktop.
moo + indentation tokens.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
'use strict'; | |
function IndentedLexer(lexer) { | |
this.lexer = lexer; | |
this.indents = ['']; | |
this.tokens = []; | |
this.nextBlankIsIndent = true; | |
} | |
IndentedLexer.prototype.next = function() { | |
if(this.tokens.length) { | |
return this.tokens.shift(); | |
} else { | |
var token = this.lexer.next(this); | |
if(!token) return token; | |
if(this.nextBlankIsIndent && token.type === 'ws') { | |
this.nextBlankIsIndent = false; | |
this.indent(token.value); | |
return this.next(); | |
} else { | |
this.nextBlankIsIndent = (token.type === 'nl'); | |
return token; | |
} | |
} | |
} | |
IndentedLexer.prototype.indent = function(indent) { | |
while(indent !== this.indents[this.indents.length-1]) { | |
var prev = this.indents[this.indents.length-1]; | |
if(startsWith(indent, prev)) { // more indentation than we had. | |
this.tokens.push({type: 'indent'}); | |
this.indents.push(indent); | |
return; | |
} else if(startsWith(prev, indent)) { // less indentation than we had. | |
this.tokens.push({type: 'dedent'}); | |
this.indents.pop(); // check the previous one. | |
} else { | |
prev = unicodeDebugString(prev); | |
indent = unicodeDebugString(indent); | |
throw('Indentations cannot be compared: ' + prev + ' and ' + indent + '.'); | |
} | |
} | |
} | |
IndentedLexer.prototype.save = function() { | |
return this.lexer.save(); | |
} | |
IndentedLexer.prototype.reset = function(data, info) { | |
this.lexer.reset(data, info); | |
} | |
IndentedLexer.prototype.formatError = function(token) { | |
return this.lexer.formatError(token); | |
} | |
IndentedLexer.prototype.has = function(name) { | |
return lexer.has(name); | |
} | |
// --------------------------------------------------------- | |
function startsWith(str, prefix) { | |
return str.lastIndexOf(prefix, 0) === 0; | |
} | |
function unicodeDebugString(s) { | |
u = ''; | |
for(let i=0; i<s.length; ++i) { | |
var n = s.charCodeAt(i), hex16 = ''; | |
for(let j=0; j<4; ++j) { | |
var d = n % 16; n /= 16; | |
var offset = (d < 10) ? 0x30 : 0x61-10 | |
var ch = String.fromCharCode(d + offset); | |
hex16 = ch + hex16; | |
} | |
u += ('\\u' + hex16); | |
} | |
return '"' + u + '"'; | |
} | |
// --------------------------------------------------------- | |
if(typeof module === 'object' && module.exports) { | |
module.exports = IndentedLexer; | |
} else { | |
window.IndentedLexer = IndentedLexer; | |
} | |
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var lex = new IndentedLexer(moo.compile({ | |
ws: /[ \t\u00A0\u1680\u2000-\u200a\u2028\u2029\u202f\u3000]+/, | |
nl: { match: /\n/, lineBreaks: true }, | |
id: /\w+/ | |
})); | |
lex.reset( | |
'' | |
+ 'This line has no indentation\n' | |
+ ' This line is indented with two spaces\n' | |
+ ' So is this one\n' | |
+ ' \tThis one adds a tab\n' | |
+ ' \t And then two more spaces\n' | |
+ ' \t Another line with the same indentation\n' | |
+ ' \t And another\n' | |
+ ' This one goes back to the initial two spaces\n' | |
+ 'And back to no indentation\n' | |
); | |
var token = lex.next(); | |
while(token) { | |
console.log(token.value || '{{' + token.type + '}} '); | |
token = lex.next(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment