-
-
Save PiDelport/3298365 to your computer and use it in GitHub Desktop.
Some takes on Eli Bendersky's implementation of Rob Pike's template lexer in Go.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from multitask import Queue | |
TOK_TEXT = 'TOK_TEXT' | |
TOK_LEFT_META = 'TOK_LEFT_META' | |
TOK_RIGHT_META = 'TOK_RIGHT_META' | |
TOK_DUMMY = 'TOK_DUMMY' | |
# A token has | |
# type: one of the TOK_* constants | |
# value: string value, as taken from input | |
# | |
Token = namedtuple('Token', 'type value') | |
class LexerError(Exception): pass | |
class TemplateLexer(object): | |
""" A lexer for the template language. Initialize with the input | |
string, and then call lex() which generates tokens. None is | |
generated at EOF (and the generator expires). | |
""" | |
def __init__(self, input): | |
self.items = Queue() | |
self.input = input | |
self.pos = 0 | |
self.curstart = 0 | |
self.state = self._lex_text | |
def lex(self): | |
while self.state: | |
self.state = yield self.state() | |
#--------- Internal ---------# | |
_LEFT_META = '{{' | |
_RIGHT_META = '}}' | |
def _eof(self): | |
return self.pos >= len(self.input) | |
def _emit(self, toktype): | |
tok = Token(toktype, self.input[self.curstart:self.pos]) | |
self.curstart = self.pos | |
yield self.items.put(tok) | |
def _lex_text(self): | |
while not self._eof(): | |
if self.input.startswith(self._LEFT_META, self.pos): | |
# {{ here. Emit the text we've seen so far. | |
if self.pos > self.curstart: | |
yield self._emit(TOK_TEXT) | |
raise StopIteration(self._lex_left_meta) | |
self.pos += 1 # ZZZ: can't just find to next {{ here? | |
# Reached EOF. Emit trailing text. | |
if self.pos > self.curstart: | |
yield self._emit(TOK_TEXT) | |
def _lex_left_meta(self): | |
self.pos += len(self._LEFT_META) | |
yield self._emit(TOK_LEFT_META) | |
raise StopIteration(self._lex_inside_action) | |
def _lex_right_meta(self): | |
self.pos += len(self._RIGHT_META) | |
yield self._emit(TOK_RIGHT_META) | |
raise StopIteration(self._lex_text) | |
def _lex_inside_action(self): | |
while not self._eof(): | |
if self.input.startswith(self._RIGHT_META, self.pos): | |
yield self._emit(TOK_DUMMY) | |
raise StopIteration(self._lex_right_meta) | |
self.pos += 1 | |
# Reached EOF | |
raise LexerError('Unterminated action') | |
if __name__ == '__main__': | |
text = r''' | |
Some text here {{range $s.Text}} and here {{1.2 "%g"}} too {{.}} | |
''' | |
text = r''' | |
Some text here {{action}} and here {{action2}}''' | |
tlex = TemplateLexer(text) | |
def print_tokens(): | |
while not (tlex._eof() and tlex.items.empty()): | |
print((yield tlex.items.get())) | |
# Run producer and consumer concurrently in a task scheduler. | |
import multitask | |
multitask.add(tlex.lex()) | |
multitask.add(print_tokens()) | |
multitask.run() |
To see what actually changed between each variation, i'd recommend looking them in a checkout, with a history browser.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Notes:
First variation:
This corresponds most closely to the first version of Rob Pike's lexer, as presented in his talk. This involved the following changes:
items
) like a Go channel.yield from
becomes ayield
(multitask implements it appropriately), and valuedreturn
statements have to be written out asraise StopIteration(...)
instead.Second variation:
This is the same as the first, but using concurrent OS threads instead of concurrent generators. The only significant code change is the removal of yields, and the actual thread pool initialization.
Third variation:
This corresponds to the second version of Rob Pike's Go lexer. Like the Go version, this uses no concurrency features: just the modified
nextItem()
runner. (I also modified this to use a plain list instead of a queue, but that's only for illustrative purposes.)