jagt/gist:5473794

## gistfile1.py
# Simple LL(1) lexer and parser for a random data structure
# Based on description on book Language Implementation Patterns
class Lexer(object):
    WS, NAME, STRING, NUM, LBRACK, RBRACK, LBRACE, RBRACE, COMMA, EQUAL, COMMENT,END = range(12);
    EOF = '' # StringIO gives '' when nothing to read
    def __init__(self, inio):
        self.inio = inio
        self.consume()

    def consume(self):
        self.char = self.inio.read(1)

    def match(self, c):
        if (self.char == c):
            self.consume()
        else:
            raise RuntimeError("Expecting %s, getting %s" % (c, self.char))

    def next_token(self):
        while (self.char != Lexer.EOF):
            while self.char != Lexer.EOF and self.char in ' \n\t\r': self.consume() # consume all white space
            if   self.char == '[': self.consume(); return (Lexer.LBRACK, '[')
            elif self.char == ']': self.consume(); return (Lexer.RBRACK, ']')
            elif self.char == '{': self.consume(); return (Lexer.LBRACE, '{')
            elif self.char == '}': self.consume(); return (Lexer.RBRACE, '}')
            elif self.char == '=': self.consume(); return (Lexer.EQUAL, '=')
            elif self.char == ',': self.consume(); return (Lexer.COMMA, ',')
            elif self.char.isalpha():
                namels = [self.char]; self.consume()
                while self.char.isalpha() or self.char.isdigit() or self.char == '_':
                    namels.append(self.char)
                    self.consume()
                return (Lexer.NAME, ''.join(namels))
            elif self.char.isdigit() or self.char == '.':
                numls = [self.char]; self.consume()
                while self.char.isdigit() or self.char == '.':
                    numls.append(self.char)
                    self.consume()
                return (Lexer.NUM, ''.join(numls))
            elif self.char in '"\'':
                strls = [self.char]; self.consume();
                while self.char != strls[0]:
                    strls.append(self.char)
                    self.consume()
                strls.append(self.char) # add closing char
                self.consume()
                return (Lexer.STRING, ''.join(strls))
            elif self.char == '/':
                self.consume()
                self.match('/')
                while self.char != '\n':
                    self.consume()
                return (Lexer.COMMENT, "#comment")
            else:
                raise RuntimeError("Illegal character %s" % self.char)
        return (Lexer.END, '')

class Parser(object):
    class ParseTreeNode(list):
        def __init__(self, name):
            list.__init__(self)
            self.name = name

        def __str__(self):
            return self.name + ':' + list.__str__(self)

        def __repr__(self):
            return self.name + ':' + list.__repr__(self)


    def __init__(self, lexer):
        self.lexer = lexer
        self.consume()
        self.parse_tree_root = None
        self.parse_tree_current = None

    def consume(self):
        self.tok = lexer.next_token()
        if self.test(Lexer.COMMENT): # ignore as whitespace
            self.consume()

    def match(self, tokid):
        if self.test(tokid):
            val = self.tok[1]
            self.consume();
            return val
        else:
            self.error(tokid)

    def test(self, tokid):
        return self.tok[0] == tokid

    def print_parse(func):
        def _wrapper(self):
            print '# parsing %s' % func.func_name
            val = func(self)
            print '# end parsing %s' % func.func_name
            return val
        _wrapper.func_name = func.func_name
        return _wrapper

    # build very basic parse tree through decorator
    def build_parse_tree(func):
        def _wrapper(self):
            node = Parser.ParseTreeNode(func.func_name)
            if self.parse_tree_root is None:
                self.parse_tree_current = self.parse_tree_root = node
            else:
                self.parse_tree_current.append(node)
            before = self.parse_tree_current
            self.parse_tree_current = node
            val = func(self)
            self.parse_tree_current = before
            return val
        _wrapper.func_name = func.func_name
        return _wrapper


    def error(self, tokid):
        raise RuntimeError("Expecting tokken %d, getting %d" % (tokid, self.tok[0]))

    @build_parse_tree
    @print_parse
    def record(self):
        name = self.match(Lexer.NAME)
        self.match(Lexer.EQUAL)
        value = self.value()
        return (name , value)

    @build_parse_tree
    @print_parse
    def literal(self):
        if self.test(Lexer.STRING):
            return self.match(Lexer.STRING)
        elif self.test(Lexer.NUM):
            num = self.match(Lexer.NUM)
            if '.' in num:
                return float(num)
            else:
                return int(num)
        else:
            self.error(-1)

    @build_parse_tree
    @print_parse
    def list(self):
        ls = []
        self.match(Lexer.LBRACK)
        if self.test(Lexer.RBRACK):
            self.match(Lexer.RBRACK)
        else:
            ls.append(self.literal())
            while self.test(Lexer.COMMA):
                self.match(Lexer.COMMA)
                ls.append(self.literal())
            self.match(Lexer.RBRACK)
        return ls;

    @build_parse_tree
    @print_parse
    def value(self):
        if self.test(Lexer.LBRACE):
            value = {}
            self.match(Lexer.LBRACE);
            while self.test(Lexer.NAME):
                record = self.record();
                value[record[0]] = record[1]
            self.match(Lexer.RBRACE);
        elif self.test(Lexer.LBRACK):
            value = self.list()
        elif self.test(Lexer.STRING) or self.test(Lexer.NUM):
            value = self.literal();
        else:
            self.error(-1)
        return value

    def start(self):
        self.parse_tree_root = self.parse_tree_current = None
        return self.record()

if __name__ == '__main__':
    s = '''
NAME = {
    KEY1 = "Value"
    KEY2 = ["These are my twisted words", 253, 20.0]
    NEST_NAME = {
        // single line comment
        KEY3 = "Value"
        KEY4 = []
    }
    KEY5 = [253, "Daily Days"]
    KEY8 = ["foo", 253]
}
'''

    from StringIO import StringIO
    from pprint import pprint
    lexer = Lexer(StringIO(s))
    parser = Parser(lexer)
    pprint(parser.start())
    pprint(parser.parse_tree_root)
	# Simple LL(1) lexer and parser for a random data structure
	# Based on description on book Language Implementation Patterns
	class Lexer(object):
	WS, NAME, STRING, NUM, LBRACK, RBRACK, LBRACE, RBRACE, COMMA, EQUAL, COMMENT,END = range(12);
	EOF = '' # StringIO gives '' when nothing to read
	def __init__(self, inio):
	self.inio = inio
	self.consume()

	def consume(self):
	self.char = self.inio.read(1)

	def match(self, c):
	if (self.char == c):
	self.consume()
	else:
	raise RuntimeError("Expecting %s, getting %s" % (c, self.char))

	def next_token(self):
	while (self.char != Lexer.EOF):
	while self.char != Lexer.EOF and self.char in ' \n\t\r': self.consume() # consume all white space
	if self.char == '[': self.consume(); return (Lexer.LBRACK, '[')
	elif self.char == ']': self.consume(); return (Lexer.RBRACK, ']')
	elif self.char == '{': self.consume(); return (Lexer.LBRACE, '{')
	elif self.char == '}': self.consume(); return (Lexer.RBRACE, '}')
	elif self.char == '=': self.consume(); return (Lexer.EQUAL, '=')
	elif self.char == ',': self.consume(); return (Lexer.COMMA, ',')
	elif self.char.isalpha():
	namels = [self.char]; self.consume()
	while self.char.isalpha() or self.char.isdigit() or self.char == '_':
	namels.append(self.char)
	self.consume()
	return (Lexer.NAME, ''.join(namels))
	elif self.char.isdigit() or self.char == '.':
	numls = [self.char]; self.consume()
	while self.char.isdigit() or self.char == '.':
	numls.append(self.char)
	self.consume()
	return (Lexer.NUM, ''.join(numls))
	elif self.char in '"\'':
	strls = [self.char]; self.consume();
	while self.char != strls[0]:
	strls.append(self.char)
	self.consume()
	strls.append(self.char) # add closing char
	self.consume()
	return (Lexer.STRING, ''.join(strls))
	elif self.char == '/':
	self.consume()
	self.match('/')
	while self.char != '\n':
	self.consume()
	return (Lexer.COMMENT, "#comment")
	else:
	raise RuntimeError("Illegal character %s" % self.char)
	return (Lexer.END, '')

	class Parser(object):
	class ParseTreeNode(list):
	def __init__(self, name):
	list.__init__(self)
	self.name = name

	def __str__(self):
	return self.name + ':' + list.__str__(self)

	def __repr__(self):
	return self.name + ':' + list.__repr__(self)


	def __init__(self, lexer):
	self.lexer = lexer
	self.consume()
	self.parse_tree_root = None
	self.parse_tree_current = None

	def consume(self):
	self.tok = lexer.next_token()
	if self.test(Lexer.COMMENT): # ignore as whitespace
	self.consume()

	def match(self, tokid):
	if self.test(tokid):
	val = self.tok[1]
	self.consume();
	return val
	else:
	self.error(tokid)

	def test(self, tokid):
	return self.tok[0] == tokid

	def print_parse(func):
	def _wrapper(self):
	print '# parsing %s' % func.func_name
	val = func(self)
	print '# end parsing %s' % func.func_name
	return val
	_wrapper.func_name = func.func_name
	return _wrapper

	# build very basic parse tree through decorator
	def build_parse_tree(func):
	def _wrapper(self):
	node = Parser.ParseTreeNode(func.func_name)
	if self.parse_tree_root is None:
	self.parse_tree_current = self.parse_tree_root = node
	else:
	self.parse_tree_current.append(node)
	before = self.parse_tree_current
	self.parse_tree_current = node
	val = func(self)
	self.parse_tree_current = before
	return val
	_wrapper.func_name = func.func_name
	return _wrapper


	def error(self, tokid):
	raise RuntimeError("Expecting tokken %d, getting %d" % (tokid, self.tok[0]))

	@build_parse_tree
	@print_parse
	def record(self):
	name = self.match(Lexer.NAME)
	self.match(Lexer.EQUAL)
	value = self.value()
	return (name , value)

	@build_parse_tree
	@print_parse
	def literal(self):
	if self.test(Lexer.STRING):
	return self.match(Lexer.STRING)
	elif self.test(Lexer.NUM):
	num = self.match(Lexer.NUM)
	if '.' in num:
	return float(num)
	else:
	return int(num)
	else:
	self.error(-1)

	@build_parse_tree
	@print_parse
	def list(self):
	ls = []
	self.match(Lexer.LBRACK)
	if self.test(Lexer.RBRACK):
	self.match(Lexer.RBRACK)
	else:
	ls.append(self.literal())
	while self.test(Lexer.COMMA):
	self.match(Lexer.COMMA)
	ls.append(self.literal())
	self.match(Lexer.RBRACK)
	return ls;

	@build_parse_tree
	@print_parse
	def value(self):
	if self.test(Lexer.LBRACE):
	value = {}
	self.match(Lexer.LBRACE);
	while self.test(Lexer.NAME):
	record = self.record();
	value[record[0]] = record[1]
	self.match(Lexer.RBRACE);
	elif self.test(Lexer.LBRACK):
	value = self.list()
	elif self.test(Lexer.STRING) or self.test(Lexer.NUM):
	value = self.literal();
	else:
	self.error(-1)
	return value

	def start(self):
	self.parse_tree_root = self.parse_tree_current = None
	return self.record()

	if __name__ == '__main__':
	s = '''
	NAME = {
	KEY1 = "Value"
	KEY2 = ["These are my twisted words", 253, 20.0]
	NEST_NAME = {
	// single line comment
	KEY3 = "Value"
	KEY4 = []
	}
	KEY5 = [253, "Daily Days"]
	KEY8 = ["foo", 253]
	}
	'''

	from StringIO import StringIO
	from pprint import pprint
	lexer = Lexer(StringIO(s))
	parser = Parser(lexer)
	pprint(parser.start())
	pprint(parser.parse_tree_root)