Skip to content

Instantly share code, notes, and snippets.

@seriyps
Created April 11, 2014 14:00
Show Gist options
  • Save seriyps/10471310 to your computer and use it in GitHub Desktop.
Save seriyps/10471310 to your computer and use it in GitHub Desktop.
Lisp subset interpreter
# -*- coding: utf-8 -*-
'''
Created on 2014-04-05
@author: Sergey Prokhorov <me@seriyps.ru>
'''
import re
import operator
import decimal
class LexerError(Exception):
def __init__(self, line, pos):
super(LexerError, self).__init__(line, pos)
self.line = line
self.pos = pos
def __unicode__(self):
l = max(self.pos - 5, 0)
return u"Scan error near: '{0}' position={1}".format(
self.line[l:self.pos + 5], self.pos)
class ParserError(Exception):
def __init__(self, msg, token_type, re_match, state):
super(ParserError, self).__init__(msg, token_type, re_match, state)
self.msg = msg
self.token_type = token_type
self.re_match = re_match
self.state = state
def __unicode__(self):
return u"Parser error: {0} position={1} state={2}".format(
self.msg, self.re_match.pos, self.state)
def Lexer(rules):
prepared = [(re.compile(regexp), token_type)
for regexp, token_type in rules]
def lex(line):
ll = len(line)
i = 0
while i < ll:
for pattern, token_type in prepared:
match = pattern.match(line, i)
if match is None:
continue
i = match.end()
yield (match, token_type)
break
if match is None:
raise LexerError(line, i)
return lex
(T_LP, T_RP, T_PLUS, T_MINUS, T_MUL, T_DIV, T_GT, T_LT, T_GTE, T_LTE, T_EQ,
T_IF, T_VAR, T_NUM, T_SP) = range(15)
RULES = [
(r"\s+", T_SP),
(r"\(", T_LP),
(r"\)", T_RP),
(r"\+", T_PLUS),
(r"\-", T_MINUS),
(r"\*", T_MUL),
(r"\/", T_DIV),
(r">=", T_GTE),
(r"<=", T_LTE),
(r">", T_GT),
(r"<", T_LT),
(r"==", T_EQ),
(r"if", T_IF),
(r"([a-z]+)", T_VAR),
(r"([0-9]+(\.[0-9]+)?)", T_NUM),
]
lexer = Lexer(RULES)
class Literal(object):
def __init__(self, val):
self.val = val
def eval(self, ctx):
return self.val
class SExp(object):
__slots__ = ('fun', 'args')
def __init__(self, fun, args):
self.fun = fun
self.args = args
def eval(self, ctx):
return self.fun(*(a.eval(ctx) for a in self.args))
class Variable(object):
def __init__(self, name):
self.name = name
def eval(self, ctx):
return ctx[self.name]
OP2FUN = {
T_PLUS: operator.add,
T_MINUS: operator.sub,
T_MUL: operator.mul,
T_DIV: operator.div,
T_GTE: operator.ge,
T_LTE: operator.le,
T_GT: operator.gt,
T_LT: operator.lt,
T_EQ: operator.eq,
T_IF: lambda cond, then, else_: then if cond else else_
}
S_LP, S_OP = range(2)
def parse(tokens):
state = S_OP
stack = [SExp(lambda a: a, [])]
for m, t in tokens:
if t == T_SP:
continue
if state is S_LP:
if t in OP2FUN:
stack[-1].fun = OP2FUN[t]
state = S_OP
continue
raise ParserError("Need operator after '('", t, m, state)
elif state is S_OP:
if t == T_NUM:
stack[-1].args.append(Literal(decimal.Decimal(m.group(1))))
continue
elif t == T_VAR:
stack[-1].args.append(Variable(m.group(1)))
continue
elif t == T_RP:
sexp = stack.pop()
stack[-1].args.append(sexp)
continue
elif t == T_LP:
stack.append(SExp(None, []))
state = S_LP
continue
raise ParserError("Invalid token", t, m, state)
if len(stack) != 1:
raise ParserError("Unbalanced parenthesis", t, m, state)
return stack[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment