Skip to content

Instantly share code, notes, and snippets.

@fbwright
Created May 30, 2015 15:35
Show Gist options
  • Save fbwright/68a03126dd00740b7ae9 to your computer and use it in GitHub Desktop.
Save fbwright/68a03126dd00740b7ae9 to your computer and use it in GitHub Desktop.
Recursive descent parser and interpreter/(Python|Forth) compiler
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import sys, re
if sys.version_info.major < 3:
input = raw_input
#I wonder... how difficult would be writing a parser that takes well-formed
#BNF in input and outputs a recursive descent parser based on that grammar?
#I'd have to recognize, name and put into an enum tokens ('...'), create a
#dictionary of identifiers (<...>), and each identifier will be assigned
#some other identifiers/tokens, separared by spaces, perhaps inside () or []
#or followed by *
#The only prebuilt part would be the atom - for numbers, strings and ids
def tabify(lines, tabs='\t'):
s = ""
for line in lines.split("\n"):
s = "%s\n%s%s" % (s, tabs, line)
return s
def is_string(s):
return s.startswith("str(") or s.startswith('"') and s.endswith('"')
def to_number(number):
try:
return int(number)
except ValueError:
try:
return float(number)
except ValueError:
raise ValueError("Not a number")
def parse_number(number):
try:
return to_number(number)
except ValueError:
pass
if number.endswith(('i', 'I')):
suffix = number[-2]
number = to_number(number[:-2])
number *= {
'': 1,
'K': 2**10,
'k': 2**10,
'M': 2**20,
'G': 2**30,
'T': 2**40,
'P': 2**50,
'E': 2**60,
'Z': 2**70,
'Y': 2**80,
}[suffix]
return number
suffix = number[-1]
number = to_number(number[:-1])
number *= {
'y': 10**-24,
'z': 10**-21,
'a': 10**-18,
'f': 10**-15,
'p': 10**-12,
'n': 10**-9,
'u': 10**-6,
'm': 10**-3,
'c': 10**-2,
'd': 10**-1,
'': 1,
'da': 10,
'D': 10,
'h': 100,
'H': 100,
'k': 10**3,
'K': 10**3,
'M': 10**6,
'G': 10**9,
'T': 10**12,
'P': 10**15,
'E': 10**18,
'Z': 10**21,
'Y': 10**24
}[suffix]
return number
class Enum(tuple): __getattr__ = tuple.index
Tokens = Enum("NUMBER STRING IDENTIFIER ASSIGN SMART_ASSIGN COLON SEMICOLON COMMA DOT ADD SUB MUL DIV MOD POW LEFT_SHIFT RIGHT_SHIFT LEFT_ROUND_PAREN RIGHT_ROUND_PAREN NOT AND NAND OR NOR XOR XNOR BIT_NOT BIT_AND BIT_NAND BIT_OR BIT_NOR BIT_XOR BIT_XNOR BEGIN END WRITE READ IF THEN ELSE WHILE DO EQ NEQ GT GTE LT LTE QUESTION_MARK ELVIS TRUE FALSE NULL COMMENT NEWLINE".split())
class Token(object):
__slots__ = ["type", "value", "pos"]
def __init__(self, value, type, pos=0):
self.type = type
self.value = value
self.pos = pos
def __repr__(self):
return "(%s '%s' @ %s)" % (Tokens[self.type], self.value, self.pos)
class Node(object):
__slots__ = ["operation", "args", "pos"]
def __init__(self, operation, *args):
self.operation = operation
if type(args[0]) in (list, tuple):
args = args[0]
self.args = args
#self.pos = pos
def __repr__(self):
if self.operation is "NUM":
return "%s" % self.args[0]
return "(%s %s)" % (self.operation, " ".join(map(str,self.args)))
def eval(self, env, debug=False):
raise Exception("%s"%repr(self))
class Block(Node):
def eval(self, env, debug=False):
for statement in self.args:
statement.eval(env, debug)
def compile(self, debug=False):
return "\n".join(map(lambda i: i.compile(debug), self.args))
def compile_forth(self, debug=False):
return "\n".join(map(lambda i: i.compile_forth(debug), self.args))
class Comment(Node):
def eval(self, env, debug=False):
return
def compile(self, debug=False):
return self.args[0]
def compile_forth(self, debug=False):
return "( {0} )".format(self.args[0])
class Assignment(Node):
def eval(self, env, debug=False):
name, expression = self.args
value = expression.eval(env, debug)
env[name] = value
def compile(self, debug=False):
name, expression = self.args
return "%s = %s" % (name, expression.compile(debug))
def compile_forth(self, debug=False):
name, expression = self.args
return "%s\nPUSH %s\nSTORE" % (expression.compile_forth(debug), name)
class Number(Node):
def eval(self, env, debug=False):
return self.args[0]
def compile(self, debug=False):
return "%s" % self.args[0]
def compile_forth(self, debug=False):
return "PUSH %s" % self.args[0]
class String(Node):
def eval(self, env, debug=False):
return self.args[0].strip('"')
def compile(self, debug=False):
return "\"%s\"" % self.args[0].strip('"')
def compile_forth(self, debug=False):
return '"%s"' % self.args[0].strip('"')
class Variable(Node):
def eval(self, env, debug=False):
try:
return env[self.args[0]]
except KeyError:
raise RuntimeError("Variable %s doesn't exists" % self.args[0])
def compile(self, debug=False):
return "%s" % self.args[0]
def compile_forth(self, debug=False):
return "PUSH %s\nLOAD" % self.args[0]
class UnaryOperation(Node):
def _getop(self):
try:
return {
'+': '+{0}',
'-': '-{0}',
'~': '~{0}',
'NOT': 'not {0}',
}[self.operation.upper()]
except KeyError:
raise SyntaxError("Unknown operation %s" % self.operation)
def _getop_forth(self):
try:
return {
'+': '{0}',
'-': '{0}\nNEG',
'~': '{0}\nNOT',
'NOT': '{0} =0',
}[self.operation.upper()]
except KeyError:
raise SyntaxError("Unknown operation %s" % self.operation)
def eval(self, env, debug=False):
operation, value = self.operation, self.args[0]
value = value.eval(env, debug)
temp = "lambda a: %s" % self._getop().format('a')
value = eval(temp)(value)
return value
def compile(self, debug=False):
operation, value = self.operation, self.args[0]
value = value.compile(debug)
return self._getop().format(value)
def compile_forth(self, debug=False):
operation, value = self.operation, self.args[0]
value = value.compile_forth(debug)
return self._getop_forth().format(value)
class BinaryOperation(Node):
def _getop(self):
try:
return {
'+': '{0} + {1}',
'-': '{0} - {1}',
'*': '{0} * {1}',
'/': '{0} / {1}',
'%': '{0} % {1}',
'**': '{0} ** {1}',
'<<': '{0} << {1}',
'>>': '{0} >> {1}',
'&': '{0} & {1}',
'~&': '~({0} & {1})',
'|': '{0} | {1}',
'~|': '~({0} | {1})',
'^': '{0} ^ {1}',
'~^': '~({0} ^ {1})',
'AND': '{0} and {1}',
'NAND': 'not ({0} and {1})',
'OR': '{0} or {1}',
'NOR': 'not ({0} or {1})',
'XOR': '({0} or {1}) and not ({0} and {1})',
'XNOR': 'not (({0} or {1}) and not ({0} and {1}))',
'==': '{0} == {1}',
'<=': '{0} <= {1}',
'>=': '{0} >= {1}',
'<': '{0} < {1}',
'>': '{0} > {1}',
'!=': '{0} != {1}',
'><': '{0} != {1}',
'<>': '{0} != {1}',
'?:': '{0} if ( {0} is not None ) else {1}',
}[self.operation.upper()]
except KeyError:
raise SyntaxError("Unknown operation %s" % self.operation)
def _getop_forth(self):
try:
return {
'+': '{0}\n{1}\nADD',#
'-': '{0}\n{1}\nSUB',#
'*': '{0} {1} *',#
'/': '{0} {1} /',#
'%': '{0} {1} mod',#
'**': '{0} {1} f**',#
'<<': '{0} {1} 2*',#
'>>': '{0} {1} 2/',#
'&': '{0} {1} and',#
'~&': '{0} {1} and =0',#
'|': '{0} {1} or',#
'~|': '{0} {1} or =0',#
'^': '{0} {1} xor',#
'~^': '{0} {1} xor =0',#
'AND': '{0} {1} and',#
'NAND': '{0} {1} and =0',#
'OR': '{0} {1} or',#
'NOR': '{0} {1} or =0',#
'XOR': '{0} {1} xor',#
'XNOR': '{0} {1} xor =0',#
'==': '{0} {1} =',
'<=': '{0} {1} <=',
'>=': '{0} {1} >=',
'<': '{0} {1} <',
'>': '{0} {1} >',
'!=': '{0} {1} <>',
'><': '{0} {1} <>',
'<>': '{0} {1} <>',
'?:': '{0} if ( {0} is not None ) else {1}',#TODO
}[self.operation.upper()]
except KeyError:
raise SyntaxError("Unknown operation %s" % self.operation)
def eval(self, env, debug=False):
#DONE: this needs some work.
# It won't produce correct python code for some operators (namely,
# AND-OR-XOR [as they're uppercase - and besides, xor doesn't
# exist in python], <>->< and the elvis operator (?:)
#And, yes, I used some sorcery to get it right - but this works.
#Still needs work - won't work for python code
#This should work... mwha-ha-ha-ha!
operation, left, right = self.operation, self.args[0], self.args[1]
left = left.eval(env, debug)
right = right.eval(env, debug)
if operation == '+' and (type(left) is str or type(right) is str):
left = str(left)
right = str(right)
temp = "lambda a,b: %s" % self._getop().format('a', 'b')
value = eval(temp)(left, right)
return value
def compile(self, debug=False):
operation, left, right = self.operation, self.args[0], self.args[1]
left = left.compile(debug)
right = right.compile(debug)
if operation == '+' and ( is_string(left) or is_string(right) ):
left = "str(%s)" % left
right = "str(%s)" % right
return self._getop().format(left, right)
def compile_forth(self, debug=False):
operation, left, right = self.operation, self.args[0], self.args[1]
left = left.compile_forth(debug)
right = right.compile_forth(debug)
return self._getop_forth().format(left, right)
class TernaryOperation(Node):
def eval(self, env, debug=False):
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2]
test = test.eval(env, debug)
if_true = if_true.eval(env, debug)
if_false = if_false.eval(env, debug)
try:
value = {
'?': lambda a,b,c: b if a else c,
}[operation](test, if_true, if_false)
except KeyError:
raise SyntaxError("Unknown operation %s" % operation)
return value
def compile(self, debug=False):
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2]
test = test.compile(debug)
if_true = if_true.compile(debug)
if_false = if_false.compile(debug)
return "%s if %s else %s" % (if_true, test, if_false)
def compile_forth(self, debug=False):
operation, test, if_true, if_false = self.operation, self.args[0], self.args[1], self.args[2]
test = test.compile_forth(debug)
if_true = if_true.compile_forth(debug)
if_false = if_false.compile_forth(debug)
return "{0} if {1} else {2} then".format(test, if_true, if_false)
class Write(Node):
def eval(self, env, debug=False):
if debug:
return
print(" ".join(map(lambda i: str(i.eval(env)), self.args)))
def compile(self, debug=False):
if debug:
return "pass"
return "print(%s)" % ",".join(map(lambda i: i.compile(), self.args))
def compile_forth(self, debug=False):
if debug:
return "0 drop ( NOP in place of write )"
s = ""
for item in self.args:
if isinstance(item, String):
s = "{0} .\" {1}\"".format(s, item.compile_forth(debug).strip('"'))
elif isinstance(item, Variable) or isinstance(item, Number):
s = "{0} {1}\nCALL print".format(s, item.compile_forth(debug))
else:
s = "{0} {1}\nCALL print".format(s, item.compile_forth(debug))
return s
return '." %s" cr' % "".join(map(lambda i: str(i.compile_forth()).strip('"'), self.args))
class If(Node):
def eval(self, env, debug=False):
if len(self.args) == 2:
condition, if_true = self.args
else:
condition, if_true, if_false = self.args
if condition.eval(env, debug):
if_true.eval(env, debug)
elif if_false:
if_false.eval(env, debug)
def compile(self, debug=False):
if len(self.args) == 2:
condition, if_true = self.args
else:
condition, if_true, if_false = self.args
s = "if %s:" % condition.compile(debug)
s = s + tabify(if_true.compile(debug))
if if_false:
s = s + "\nelse:"
s = s + tabify(if_false.compile(debug))
return s
def compile_forth(self, debug=False):
if len(self.args) == 2:
condition, if_true = self.args
else:
condition, if_true, if_false = self.args
return "{0} if {1} {2} then".format(
condition.compile_forth(debug),
if_true.compile_forth(debug),
"else {0}".format(if_false.compile_forth(debug)) if if_false else "")
class While(Node):
def eval(self, env, debug=False):
condition, body = self.args
while condition.eval(env, debug):
body.eval(env, debug)
def compile(self, debug=False):
condition, body = self.args
s = "while %s:" % condition.compile(debug)
s = s + tabify(body.compile(debug))
return s
token_expressions = [
(r'[ \n\t]+', None), #Discard whitespaces
(r'//[^\n]*', None), #Line comments, C-style - discarded
(r'#[^\n]*', None), #Line comments, Python-style
(r'#\[.*#\]', Tokens.COMMENT), #New-style block comment
(r'(\+|-|\*|/|%|\*\*|&|\||^|<<|>>|\?)=', Tokens.SMART_ASSIGN),
(r'\?:', Tokens.ELVIS),
(r'\?', Tokens.QUESTION_MARK),
(r';', Tokens.SEMICOLON),
(r',', Tokens.COMMA),
(r'\.', Tokens.DOT),
(r'=', Tokens.ASSIGN),
(r':', Tokens.COLON),
(r'\(', Tokens.LEFT_ROUND_PAREN),
(r'\)', Tokens.RIGHT_ROUND_PAREN),
(r'\+', Tokens.ADD), #Mathematical operators
(r'-', Tokens.SUB),
(r'\*\*', Tokens.POW),
(r'\*', Tokens.MUL),
(r'/', Tokens.DIV),
(r'%', Tokens.MOD),
(r'<<', Tokens.LEFT_SHIFT),
(r'>>', Tokens.RIGHT_SHIFT),
(r'~&', Tokens.BIT_NAND), #Bitwise operators
(r'~\|', Tokens.BIT_NOR),
(r'~\^', Tokens.BIT_XNOR),
(r'~', Tokens.BIT_NOT),
(r'&', Tokens.BIT_AND),
(r'\|', Tokens.BIT_OR),
(r'\^', Tokens.BIT_XOR),
(r'not', Tokens.NOT), #Boolean operators
(r'nand', Tokens.NAND),
(r'and', Tokens.AND),
(r'or', Tokens.OR),
(r'nor', Tokens.NOR),
(r'xor', Tokens.XOR),
(r'xnor', Tokens.XNOR),
(r'==', Tokens.EQ), #Comparisons
(r'<>|><|!=', Tokens.NEQ),
(r'>=', Tokens.GTE),
(r'<=', Tokens.LTE),
(r'>', Tokens.GT),
(r'<', Tokens.LT),
(r'read', Tokens.READ), #Statements
(r'write', Tokens.WRITE),
(r'if(?![a-z_])', Tokens.IF),
(r'then(?![a-z_])', Tokens.THEN),
(r'else(?![a-z_])', Tokens.ELSE),
(r'while(?![a-z_])', Tokens.WHILE),
(r'do(?![a-z_])', Tokens.DO),
(r'begin', Tokens.BEGIN),
(r'end', Tokens.END),
(r'true(?![a-z_])', Tokens.TRUE),
(r'false(?![a-z_])', Tokens.FALSE),
(r'null(?![a-z_])', Tokens.NULL),
(r'[+-]?[0-9]+(\.[0-9]*)?(((da)|(d(?!a))|[cmunpfazyhDH])(?!i)|[kKMGTPEZY]i?|e[+-]?[0-9]+)?', Tokens.NUMBER),
(r'"[^"\n]*"', Tokens.STRING),
(r"'[^'\n]*'", Tokens.STRING),
(r"\n", Tokens.NEWLINE),
(r'[A-Za-z_][A-Za-z0-9_]*', Tokens.IDENTIFIER),
]
def tokenize(text):
position = 0
while position < len(text):
match = None
for token_expression in token_expressions:
pattern, tag = token_expression
regex = re.compile(pattern, re.I)
match = regex.match(text, position)
if match:
matched_text = match.group(0)
if tag is not None:
token = Token(matched_text, tag, pos=position)
yield token
break
if match is None:
raise SyntaxError("Illegal character %r @ %s" % (text[position], position))
else:
position = match.end(0)
#TODO: I could use function decorators to write shorter code...
# or perhaps function factories. For example, and_expr, xor_expr, or_expr
# are almost identical one to the other
class TreeBuilder(object):
def parse(self, text, _who_you_gonna_call = None):
self.tokens = tokenize(text)
self.token, self.next_token = None, None
self._advance()
if _who_you_gonna_call is None:
_who_you_gonna_call = self.block
return _who_you_gonna_call()
#self.statement()#self.expression()#self.block()
def _advance(self):
self.token, self.next_token = self.next_token, next(self.tokens, None)
def _accept(self, *token_types):
if self.next_token is not None and (self.next_token.type in token_types
or self.next_token.type is Tokens.COMMENT):
self._advance()
return True
return False
def _expect(self, *token_types):
if not self._accept(*token_types):
e=", ".join(map(lambda i:Tokens[i], token_types))
raise SyntaxError("{0}: expected {1}".format(self.token, e))#Tokens[token_types])
def atom(self):
"<atom> ::= <identifier> | <literal> | <enclosure>"
if self._accept(Tokens.IDENTIFIER):
value = self.token.value
return Variable("VAR", value)
elif self._accept(Tokens.NUMBER):
value = parse_number(self.token.value)
return Number("NUM", value)
elif self._accept(Tokens.STRING):
value = self.token.value.strip("'\"")
return String("STR", value)
elif self._accept(Tokens.TRUE, Tokens.FALSE, Tokens.NULL):
value = {
Tokens.TRUE: True,
Tokens.FALSE: False,
Tokens.NULL: None,
}[self.token.type]
return Number("NUM", value)
elif self._accept(Tokens.LEFT_ROUND_PAREN):
value = self.expression()
self._expect(Tokens.RIGHT_ROUND_PAREN)
return value
elif self._accept(Tokens.COMMENT):
return Comment("###", self.token.value)
raise SyntaxError("{0}: expected IDENTIFIER, NUMBER, STRING or LEFT_ROUND_PAREN".format(self.token))
def primary(self):
"<primary> ::= <atom>"
return self.atom()
def power(self):
"<power> ::= <primary> ['**' <unary_expr>]"
value = self.primary()
if self._accept(Tokens.POW):
right = self.unary_expr()
return BinaryOperation('**', value, right)
return value
def unary_expr(self):
"<unary_expr> ::= <power> | ('+'|'-'|'~') <unary_expr>"
if self._accept(Tokens.ADD, Tokens.SUB, Tokens.BIT_NOT):
operation = self.token.value
value = self.unary_expr()
return UnaryOperation(operation, value)
return self.power()
def mul_expr(self):
"<mul_expr> ::= <unary_expr> [ ('*'|'/'|'%') <mul_expr> ]"
value = self.unary_expr()
if self._accept(Tokens.MUL, Tokens.DIV, Tokens.MOD):
operation = self.token.value
right = self.mul_expr()
return BinaryOperation(operation, value, right)
return value
def add_expr(self):
"<add_expr> ::= <mul_expr> [ ('+'|'-') <add_expr> ]"
value = self.mul_expr()
if self._accept(Tokens.ADD, Tokens.SUB):
operation = self.token.value
right = self.add_expr()
return BinaryOperation(operation, value, right)
return value
def shift_expr(self):
"<shift_expr> ::= <add_expr> [ ('<<'|'>>') <shift_expr> ]"
value = self.add_expr()
if self._accept(Tokens.LEFT_SHIFT, Tokens.RIGHT_SHIFT):
operation = self.token.value
right = self.shift_expr()
return BinaryOperation(operation, value, right)
return value
def and_expr(self):
"<and_expr> ::= <shift_expr> [ '&' <and_expr> ]"
value = self.shift_expr()
if self._accept(Tokens.BIT_AND, Tokens.BIT_NAND):
operation = self.token.value
right = self.add_expr()
return BinaryOperation(operation, value, right)
return value
def xor_expr(self):
"<xor_expr> ::= <and_expr> [ '^' <xor_expr> ]"
value = self.and_expr()
if self._accept(Tokens.BIT_XOR, Tokens.BIT_XNOR):
operation = self.token.value
right = self.xor_expr()
return BinaryOperation(operation, value, right)
return value
def or_expr(self):
"<or_expr> ::= <xor_expr> [ '|' <or_expr> ]"
value = self.xor_expr()
if self._accept(Tokens.BIT_OR, Tokens.BIT_NOR, Tokens.ELVIS):
operation = self.token.value
right = self.or_expr()
return BinaryOperation(operation, value, right)
return value
def comparison(self):
"<comparison> ::= <or_expr> ( <comp_operator> <or_expr> )*"
"<comp_operator> ::= '<' | '>' | '<=' | '>=' | '==' | '<>' | '><'"
value = self.or_expr()
while self._accept(Tokens.EQ, Tokens.NEQ, Tokens.GT, Tokens.GTE, Tokens.LT, Tokens.LTE):
operation = self.token.value
right = self.or_expr()
value = BinaryOperation(operation, value, right)
return value
def or_test(self):
"<or_test> ::= <xor_test> [ 'or' | 'nor' <or_test> ]"
value = self.xor_test()
if self._accept(Tokens.OR, Tokens.NOR):
operation = self.token.value
right = self.or_test()
return BinaryOperation(operation, value, right)
return value
def xor_test(self):
"<xor_test> ::= <and_test> [ 'xor' | 'xnor' <xor_test> ]"
value = self.and_test()
if self._accept(Tokens.XOR, Tokens.XNOR):
operation = self.token.value
right = self.xor_test()
return BinaryOperation(operation, value, right)
return value
def and_test(self):
"<and_test> ::= <not_test> [ 'and' | 'nand' <and_test> ]"
value = self.not_test()
if self._accept(Tokens.AND, Tokens.NAND):
operation = self.token.value
right = self.and_test()
return BinaryOperation(operation, value, right)
return value
def not_test(self):
"<not_test> ::= <comparison> | 'not' <not_test>"
if self._accept(Tokens.NOT):
value = self.not_test()
return UnaryOperation("NOT", value)
return self.comparison()
def conditional_expression(self):
"<conditional_expression> ::= <or_test> ('?' <or_test> ':' <expression>)"
value = self.or_test()
if self._accept(Tokens.QUESTION_MARK):
test = self.or_test()
self._expect(Tokens.COLON)
if_false = self.expression()
return TernaryOperation("?", test, value, if_false)
return value
def expression(self):
"<expression> ::= <conditional_expression>"
return self.conditional_expression()
def expression_list(self):
"<expression_list> ::= <expression> ( ',' <expression> )*"
paren = self._accept(Tokens.LEFT_ROUND_PAREN)
value = [self.expression()]
while self._accept(Tokens.COMMA):
value.append(self.expression())
if paren:
self._expect(Tokens.RIGHT_ROUND_PAREN)
return value
def assignment(self):
"<assignment> ::= <identifier> <assign_operator> <expression>"
"<assign_operator> ::= ':=' | '+=' | '-=' | '*=' | '/=' | '%=' | '**=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '?='"
if self.token.type is not Tokens.IDENTIFIER \
and not self._accept(Tokens.IDENTIFIER):
raise SyntaxError("{0}: expected IDENTIFIER".format(self.token))
variable = self.token.value
self._expect(Tokens.ASSIGN, Tokens.SMART_ASSIGN)
assign_type, assign_literal = self.token.type, self.token.value
value = self.expression()
if assign_type is Tokens.SMART_ASSIGN:
var_ = Variable("VAR", variable)
op_type = assign_literal[:-1]
op_type = '?:' if op_type=='?' else op_type
value = BinaryOperation(op_type, var_, value)
return Assignment("SET", variable, value)
def statement(self):
if self._accept(Tokens.IDENTIFIER, Tokens.IF, Tokens.WHILE, Tokens.WRITE):
value = {
Tokens.IDENTIFIER: self.assignment,
Tokens.IF: self.if_block,
Tokens.WHILE: self.while_block,
Tokens.WRITE: self.write_statement,
}[self.token.type]()
return value
def expr_or_statement(self):
#I'm approaching this the wrong way...
#There's too much code here
if self._accept(Tokens.IF, Tokens.WHILE, Tokens.WRITE):
value = {
Tokens.IF: self.if_block,
Tokens.WHILE: self.while_block,
Tokens.WRITE: self.write_statement,
}[self.token.type]()
return value
elif self._accept(Tokens.IDENTIFIER, ):
variable = self.token.value
if self._accept(Tokens.ASSIGN, Tokens.SMART_ASSIGN):
assign_type, assign_literal = self.token.type, self.token.value
value = self.expression()
if assign_type is Tokens.SMART_ASSIGN:
var_ = Variable("VAR", variable)
op_type = assign_literal[:-1]
op_type = '?:' if op_type=='?' else op_type
value = BinaryOperation(op_type, var_, value)
return Assignment("SET", variable, value)
else:
value = Variable("VAR", self.token.value)
if self._accept(Tokens.QUESTION_MARK):
test = self.or_test()
self._expect(Tokens.COLON)
if_false = self.expression()
return TernaryOperation("?", test, value, if_false)
return value
return self.expression()
def if_block(self):
"<if> ::= 'if' <expression> 'then' <block> [ 'else' <block> ] 'end'"
condition = self.expression()
self._expect(Tokens.THEN)
if_true = self.block()
if_false = None
if self._accept(Tokens.ELSE):
if_false = self.block()
self._expect(Tokens.END)
return If("IF", condition, if_true, if_false)
def while_block(self):
"<while> ::= 'while' <expression> 'do' <block> 'end'"
condition = self.expression()
self._expect(Tokens.DO)
operations = self.block()
self._expect(Tokens.END)
return While("WHILE", condition, operations)
def write_statement(self):
"<write> ::= 'write' <expression-list> [ 'on' <identifier> ]"
value = self.expression_list() #TODO: complete write_statement
return Write("WRITE", value)
def line(self):
"<line> ::= <statement> ';'"
value = self.statement()
#if value:
self._accept(Tokens.SEMICOLON, Tokens.NEWLINE)
return value
def block(self):
"<block> ::= <line> <line>*"
lines = []
line = self.line()
while line is not None:
lines.append(line)
line = self.line()
return Block("BLOCK", lines)
if 0:
t = TreeBuilder()
env = {}
#for token in tokenize("title?='Untitled'"):
# print(token)
print(t.parse("a:=10k*1k/10**6;"))
print(t.parse("a:=10k*1k/10**6;").eval(env))
print(t.parse("b:='3'+('<'?3<2:'>=')+'2';").eval(env))
t.parse("title:=Null;").eval(env)
t.parse("title?='Untitled';").eval(env)
t.parse("i:=10;while i do write i; i-=1; end;").eval(env)
print(env)
if __name__ == "__main__":
t, env = TreeBuilder(), {}
line, debug = '', True
while line.lower() != 'quit':
line = input(">>> ").strip()
if line.lower() not in ('quit', 'debug'):
try:
parsed = t.parse(line, _who_you_gonna_call = t.block)#t.expr_or_statement)#t.expression) #t.expr_or_statement)
if debug:
print("AST:", parsed)
print("Forth Code:\n", parsed.compile_forth())
evaluated = parsed.eval(env)
if evaluated is not None:
print(evaluated)
exec('_ = %s'%evaluated)
env['_'] = evaluated
except SyntaxError as e:
print("Syntax Error:",e)
except RuntimeError as e:
print("Runtime Error:", e)
elif line.lower() == 'debug':
debug = not debug
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment