Created
August 8, 2012 09:33
initial attempt implementing the go template lexer in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
TOK_TEXT = 'TOK_TEXT' | |
TOK_LEFT_META = 'TOK_LEFT_META' | |
TOK_RIGHT_META = 'TOK_RIGHT_META' | |
TOK_DUMMY = 'TOK_DUMMY' | |
# A token has | |
# type: one of the TOK_* constants | |
# value: string value, as taken from input | |
# | |
Token = namedtuple('Token', 'type value') | |
class LexerError(Exception): pass | |
class TemplateLexer(object): | |
""" A lexer for the template language. Initialize with the input | |
string, and then call lex() which generates tokens. None is | |
generated at EOF (and the generator expires). | |
""" | |
def __init__(self, input): | |
self.input = input | |
self.pos = 0 | |
self.curstart = 0 | |
self.state = self._lex_text | |
def lex(self): | |
while self.state: | |
self.state = yield from self.state() | |
#--------- Internal ---------# | |
_LEFT_META = '{{' | |
_RIGHT_META = '}}' | |
def _eof(self): | |
return self.pos >= len(self.input) | |
def _emit(self, toktype): | |
tok = Token(toktype, self.input[self.curstart:self.pos]) | |
self.curstart = self.pos | |
return tok | |
def _lex_text(self): | |
while not self._eof(): | |
if self.input.startswith(self._LEFT_META, self.pos): | |
# {{ here. Emit the text we've seen so far. | |
if self.pos > self.curstart: | |
yield self._emit(TOK_TEXT) | |
return self._lex_left_meta | |
self.pos += 1 # ZZZ: can't just find to next {{ here? | |
# Reached EOF. Emit trailing text. | |
if self.pos > self.curstart: | |
yield self._emit(TOK_TEXT) | |
# Yielding None to signal EOF to the consumer. | |
# Returning None to stop the main lexing loop | |
yield None | |
return None | |
def _lex_left_meta(self): | |
self.pos += len(self._LEFT_META) | |
yield self._emit(TOK_LEFT_META) | |
return self._lex_inside_action | |
def _lex_right_meta(self): | |
self.pos += len(self._RIGHT_META) | |
yield self._emit(TOK_RIGHT_META) | |
return self._lex_text | |
def _lex_inside_action(self): | |
while not self._eof(): | |
if self.input.startswith(self._RIGHT_META, self.pos): | |
yield self._emit(TOK_DUMMY) | |
return self._lex_right_meta | |
self.pos += 1 | |
# Reached EOF | |
raise LexerError('Unterminated action') | |
return None | |
if __name__ == '__main__': | |
text = r''' | |
Some text here {{range $s.Text}} and here {{1.2 "%g"}} too {{.}} | |
''' | |
text = r''' | |
Some text here {{action}} and here {{action2}}''' | |
tlex = TemplateLexer(text) | |
for t in tlex.lex(): | |
print(t) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment