public
Created

initial attempt implementing the go template lexer in Python

  • Download Gist
templatelexer.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
from collections import namedtuple
 
 
TOK_TEXT = 'TOK_TEXT'
TOK_LEFT_META = 'TOK_LEFT_META'
TOK_RIGHT_META = 'TOK_RIGHT_META'
TOK_DUMMY = 'TOK_DUMMY'
 
 
# A token has
# type: one of the TOK_* constants
# value: string value, as taken from input
#
Token = namedtuple('Token', 'type value')
 
 
class LexerError(Exception): pass
 
 
class TemplateLexer(object):
""" A lexer for the template language. Initialize with the input
string, and then call lex() which generates tokens. None is
generated at EOF (and the generator expires).
"""
def __init__(self, input):
self.input = input
self.pos = 0
self.curstart = 0
 
self.state = self._lex_text
 
def lex(self):
while self.state:
self.state = yield from self.state()
 
#--------- Internal ---------#
 
_LEFT_META = '{{'
_RIGHT_META = '}}'
 
def _eof(self):
return self.pos >= len(self.input)
 
def _emit(self, toktype):
tok = Token(toktype, self.input[self.curstart:self.pos])
self.curstart = self.pos
return tok
 
def _lex_text(self):
while not self._eof():
if self.input.startswith(self._LEFT_META, self.pos):
# {{ here. Emit the text we've seen so far.
if self.pos > self.curstart:
yield self._emit(TOK_TEXT)
return self._lex_left_meta
self.pos += 1 # ZZZ: can't just find to next {{ here?
# Reached EOF. Emit trailing text.
if self.pos > self.curstart:
yield self._emit(TOK_TEXT)
# Yielding None to signal EOF to the consumer.
# Returning None to stop the main lexing loop
yield None
return None
def _lex_left_meta(self):
self.pos += len(self._LEFT_META)
yield self._emit(TOK_LEFT_META)
return self._lex_inside_action
 
def _lex_right_meta(self):
self.pos += len(self._RIGHT_META)
yield self._emit(TOK_RIGHT_META)
return self._lex_text
 
def _lex_inside_action(self):
while not self._eof():
if self.input.startswith(self._RIGHT_META, self.pos):
yield self._emit(TOK_DUMMY)
return self._lex_right_meta
self.pos += 1
# Reached EOF
raise LexerError('Unterminated action')
return None
 
 
if __name__ == '__main__':
text = r'''
Some text here {{range $s.Text}} and here {{1.2 "%g"}} too {{.}}
'''
 
text = r'''
Some text here {{action}} and here {{action2}}'''
tlex = TemplateLexer(text)
 
for t in tlex.lex():
print(t)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.