Skip to content

Instantly share code, notes, and snippets.

@zhu327
Created November 16, 2017 07:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zhu327/4607b63150d8b61b289e906976fa675d to your computer and use it in GitHub Desktop.
Save zhu327/4607b63150d8b61b289e906976fa675d to your computer and use it in GitHub Desktop.
imp语言解释器实现 http://python.jobbole.com/82206/
# coding: utf-8
import sys
import re
def lex(characters, token_exprs):
pos = 0
tokens = []
while pos < len(characters):
match = None
for token_expr in token_exprs:
pattern, tag = token_expr
regex = re.compile(pattern)
match = regex.match(characters, pos) # pos 指定匹配的索引位置
if match:
text = match.group(0) # 获取整个匹配到的字符串
if tag:
token = (text, tag)
tokens.append(token)
break # 一旦匹配到了就跳出for, 配置下一个词
if not match:
sys.stderr.write('Illegal character: %sn' % characters[pos])
sys.exit(1)
else:
pos = match.end(0) # 匹配的结果位置
return tokens
RESERVED = 'RESERVED' # 操作符
INT = 'INT' # 整数
ID = 'ID' # 标识符
token_exprs = [
(r'[ \n\t]+', None),
(r'#[^\n]*', None),
(r'\:=', RESERVED),
(r'\(', RESERVED),
(r'\)', RESERVED),
(r';', RESERVED),
(r'\+', RESERVED),
(r'-', RESERVED),
(r'\*', RESERVED),
(r'/', RESERVED),
(r'<=', RESERVED),
(r'<', RESERVED),
(r'>=', RESERVED),
(r'>', RESERVED),
(r'!=', RESERVED),
(r'=', RESERVED),
(r'and', RESERVED),
(r'or', RESERVED),
(r'not', RESERVED),
(r'if', RESERVED),
(r'then', RESERVED),
(r'else', RESERVED),
(r'while', RESERVED),
(r'do', RESERVED),
(r'end', RESERVED),
(r'[0-9]+', INT),
(r'[A-Za-z][A-Za-z0-9_]*', ID),
]
def imp_lex(characters):
return lex(characters, token_exprs)
class Result:
def __init__(self, value, pos):
self.value = value # 值, 作为AST树的一部分
self.pos = pos # 位置
def __repr__(self):
return 'Result(%s, %d)' % (self.value, self.pos)
class Parser:
def __call__(self, tokens, pos):
return None # subclasses will override this
def __add__(self, other):
return Concat(self, other)
def __mul__(self, other):
return Exp(self, other)
def __or__(self, other):
return Alternate(self, other)
def __xor__(self, function):
return Process(self, function)
class Reserved(Parser):
def __init__(self, value, tag):
self.value = value
self.tag = tag
def __call__(self, tokens, pos):
if pos < len(tokens) and \
tokens[pos][0] == self.value and \
tokens[pos][1] is self.tag:
return Result(tokens[pos][0], pos + 1) # 找到的同时, 移动到下一个位置
else:
return None
class Tag(Parser):
def __init__(self, tag):
self.tag = tag
def __call__(self, tokens, pos):
if pos < len(tokens) and tokens[pos][1] is self.tag:
return Result(tokens[pos][0], pos + 1)
else:
return None
# parser = Concat(Concat(Tag(INT), Reserved('+', RESERVED)), Tag(INT)) 1 + 2
# parser = Tag(INT) + Reserved('+', RESERVED) + Tag(INT)
class Concat(Parser):
def __init__(self, left, right):
self.left = left
self.right = right
def __call__(self, tokens, pos):
left_result = self.left(tokens, pos)
if left_result:
right_result = self.right(tokens, left_result.pos)
if right_result:
combined_value = (left_result.value, right_result.value)
return Result(combined_value, right_result.pos)
return None
"""
parser = Reserved('+', RESERVED) |
Reserved('-', RESERVED) |
Reserved('*', RESERVED) |
Reserved('/', RESERVED)
"""
class Alternate(Parser):
def __init__(self, left, right):
self.left = left
self.right = right
def __call__(self, tokens, pos):
left_result = self.left(tokens, pos)
if left_result:
return left_result
else:
right_result = self.right(tokens, pos)
return right_result
class Opt(Parser):
def __init__(self, parser):
self.parser = parser
def __call__(self, tokens, pos):
result = self.parser(tokens, pos)
if result:
return result
else:
return Result(None, pos) # 返回一个空的结果
class Rep(Parser):
def __init__(self, parser):
self.parser = parser
def __call__(self, tokens, pos):
results = []
result = self.parser(tokens, pos)
while result:
results.append(result.value)
pos = result.pos
result = self.parser(tokens, pos)
return Result(results, pos) # 重复解析, 直到没有结果
class Lazy(Parser):
def __init__(self, parser_func):
self.parser = None
self.parser_func = parser_func
def __call__(self, tokens, pos):
if not self.parser:
self.parser = self.parser_func()
return self.parser(tokens, pos) # 延迟解析, 只有被调用时, 才生成解析器
class Phrase(Parser): # 输入的解析器必须解析到结尾, 否则返回None
def __init__(self, parser):
self.parser = parser
def __call__(self, tokens, pos):
result = self.parser(tokens, pos)
if result and result.pos == len(tokens):
return result
else:
return None
class Exp(Parser): # 组合, 解析器, 分隔符解析器
def __init__(self, parser, separator):
self.parser = parser
self.separator = separator # 会返回一个函数
def __call__(self, tokens, pos):
result = self.parser(tokens, pos) # 正常解析
def process_next(parsed):
(sepfunc, right) = parsed
return sepfunc(result.value, right) # 使用separator返回的函数处理, 前后2个结果value
next_parser = self.separator + self.parser ^ process_next # + 会返回(函数, 解析结果), ^ 会调用process_next
next_result = result
while next_result: # 迭代, 直到产生最后的结果, 类似reduce
next_result = next_parser(tokens, result.pos)
if next_result:
result = next_result
return result
class Process(Parser): # 异或处理, 产生结果的同时调用函数, 处理结果
def __init__(self, parser, function):
self.parser = parser
self.function = function
def __call__(self, tokens, pos):
result = self.parser(tokens, pos)
if result:
result.value = self.function(result.value)
return result
class Equality: # 用于判断是否相等
def __eq__(self, other):
return isinstance(other, self.__class__) and \
self.__dict__ == other.__dict__
def __ne__(self, other):
return not self.__eq__(other)
class Aexp(Equality): # 算术表达式
pass
class IntAexp(Aexp): # int值
def __init__(self, i):
self.i = i
def __repr__(self):
return 'IntAexp(%d)' % self.i
def eval(self, env):
return self.i
class VarAexp(Aexp): # 变量
def __init__(self, name):
self.name = name
def __repr__(self):
return 'VarAexp(%s)' % self.name
def eval(self, env):
if self.name in env:
return env[self.name]
else:
return 0
class BinopAexp(Aexp): # 算术运算
def __init__(self, op, left, right):
self.op = op # 操作符
self.left = left # 左边的Aexp
self.right = right # 右边的Aexp
def __repr__(self):
return 'BinopAexp(%s, %s, %s)' % (self.op, self.left, self.right)
def eval(self, env):
left_value = self.left.eval(env)
right_value = self.right.eval(env)
if self.op == '+':
value = left_value + right_value
elif self.op == '-':
value = left_value - right_value
elif self.op == '*':
value = left_value * right_value
elif self.op == '/':
value = left_value / right_value
else:
raise RuntimeError('unknown operator: ' + self.op)
return value
class Bexp(Equality): # 布尔表达式
pass
class RelopBexp(Bexp): # 布尔运算
def __init__(self, op, left, right):
self.op = op
self.left = left
self.right = right
def __repr__(self):
return 'RelopBexp(%s, %s, %s)' % (self.op, self.left, self.right)
def eval(self, env):
left_value = self.left.eval(env)
right_value = self.right.eval(env)
if self.op == '<':
value = left_value < right_value
elif self.op == '<=':
value = left_value <= right_value
elif self.op == '>':
value = left_value > right_value
elif self.op == '>=':
value = left_value >= right_value
elif self.op == '=':
value = left_value == right_value
elif self.op == '!=':
value = left_value != right_value
else:
raise RuntimeError('unknown operator: ' + self.op)
return value
class AndBexp(Bexp): # and
def __init__(self, left, right):
self.left = left
self.right = right
def __repr__(self):
return 'AndBexp(%s, %s)' % (self.left, self.right)
def eval(self, env):
left_value = self.left.eval(env)
right_value = self.right.eval(env)
return left_value and right_value
class OrBexp(Bexp): # or
def __init__(self, left, right):
self.left = left
self.right = right
def __repr__(self):
return 'OrBexp(%s, %s)' % (self.left, self.right)
def eval(self, env):
left_value = self.left.eval(env)
right_value = self.right.eval(env)
return left_value or right_value
class NotBexp(Bexp): # not
def __init__(self, exp):
self.exp = exp
def __repr__(self):
return 'NotBexp(%s)' % self.exp
def eval(self, env):
value = self.exp.eval(env)
return not value
class Statement(Equality): # 关系表达式
pass
class AssignStatement(Statement): # 赋值语句
def __init__(self, name, aexp):
self.name = name # 变量
self.aexp = aexp # 算术表达式
def __repr__(self):
return 'AssignStatement(%s, %s)' % (self.name, self.aexp)
def eval(self, env):
value = self.aexp.eval(env)
env[self.name] = value
class CompoundStatement(Statement): # 复合语句
def __init__(self, first, second):
self.first = first
self.second = second
def __repr__(self):
return 'CompoundStatement(%s, %s)' % (self.first, self.second)
def eval(self, env):
self.first.eval(env)
self.second.eval(env)
class IfStatement(Statement): # 条件语句
def __init__(self, condition, true_stmt, false_stmt):
self.condition = condition # 条件
self.true_stmt = true_stmt # 真块
self.false_stmt = false_stmt # 假块
def __repr__(self):
return 'IfStatement(%s, %s, %s)' % (self.condition, self.true_stmt, self.false_stmt)
def eval(self, env):
condition_value = self.condition.eval(env)
if condition_value:
self.true_stmt.eval(env)
else:
if self.false_stmt:
self.false_stmt.eval(env)
class WhileStatement(Statement): # 循环语句
def __init__(self, condition, body):
self.condition = condition # 条件表达式
self.body = body # 循环体
def __repr__(self):
return 'WhileStatement(%s, %s)' % (self.condition, self.body)
def eval(self, env):
condition_value = self.condition.eval(env)
while condition_value:
self.body.eval(env)
condition_value = self.condition.eval(env)
def keyword(kw):
return Reserved(kw, RESERVED)
id = Tag(ID)
num = Tag(INT) ^ (lambda i: int(i)) # 转int类型
def aexp_value(): # 算术值 转成IntAexp或VarAexp对象
return (num ^ (lambda i: IntAexp(i))) | \
(id ^ (lambda v: VarAexp(v)))
def process_group(parsed): # 在 1 + 2 只取中间的结果
((_, p), _) = parsed
return p
aexp_precedence_levels = [ # 运算优先级
['*', '/'],
['+', '-'],
]
def precedence(value_parser, precedence_levels, combine):
def op_parser(precedence_level): # 最终返回的是combine 生成的函数
return any_operator_in_list(precedence_level) ^ combine # 一个或的paser完成后调用后面的function
parser = value_parser * op_parser(precedence_levels[0])
for precedence_level in precedence_levels[1:]:
parser = parser * op_parser(precedence_level) # 下一个优先级的分割符号
return parser # 实际上这里会先匹配 * / ,然后再在内部匹配+ 1
def any_operator_in_list(ops): # 输入运算符列表, 组合成|的解析器
op_parsers = [keyword(op) for op in ops]
parser = reduce(lambda l, r: l | r, op_parsers)
return parser
def process_binop(op): # 生成算术运算表达式工厂函数
return lambda l, r: BinopAexp(op, l, r)
def aexp(): # 最中能匹配所有表达式的paser
return precedence(aexp_term(),
aexp_precedence_levels,
process_binop)
def aexp_group(): # 组表达式, 用括号包住
return keyword('(') + Lazy(aexp) + keyword(')') ^ process_group # 匹配括号中间的内容, 并返回括号中间的表达式
def aexp_term(): # 要么是算术值(值或者变量), 要么用算术运算群
return aexp_value() | aexp_group()
"""
E0 = value_parser = aexp_term()
E1 = E0 * op_parser(precedence_levels[0])
E2 = E1 * op_parser(precedence_levels[1])
4 * a + b / 2 - (6 + c)
E0(4) * E0(a) + E0(b) / E0(2) - E0(6+c)
E1(4*a) + E1(b/2) - E1(6+c)
E2((4*a)+(b/2)-(6+c))
"""
def process_relop(parsed):
((left, op), right) = parsed
return RelopBexp(op, left, right)
def bexp_relop(): # bool运算表达式
relops = ['<', '<=', '>', '>=', '=', '!=']
return aexp() + any_operator_in_list(relops) + aexp() ^ process_relop
def bexp_not(): # not 表达式
return keyword('not') + Lazy(bexp_term) ^ (lambda parsed: NotBexp(parsed[1]))
def bexp_group(): # 括号
return keyword('(') + Lazy(bexp) + keyword(')') ^ process_group
def bexp_term():
return bexp_not() | \
bexp_relop() | \
bexp_group()
bexp_precedence_levels = [
['and'],
['or'],
]
def process_logic(op):
if op == 'and':
return lambda l, r: AndBexp(l, r)
elif op == 'or':
return lambda l, r: OrBexp(l, r)
else:
raise RuntimeError('unknown logic operator: ' + op)
def bexp():
return precedence(bexp_term(),
bexp_precedence_levels,
process_logic)
def assign_stmt(): # 赋值语句
def process(parsed):
((name, _), exp) = parsed
return AssignStatement(name, exp)
return id + keyword(':=') + aexp() ^ process
def stmt_list():
separator = keyword(';') ^ (lambda x: lambda l, r: CompoundStatement(l, r))
return Exp(stmt(), separator) # 复合语句
def if_stmt(): # if 语句
def process(parsed):
(((((_, condition), _), true_stmt), false_parsed), _) = parsed
if false_parsed:
(_, false_stmt) = false_parsed
else:
false_stmt = None
return IfStatement(condition, true_stmt, false_stmt)
return keyword('if') + bexp() + \
keyword('then') + Lazy(stmt_list) + \
Opt(keyword('else') + Lazy(stmt_list)) + \
keyword('end') ^ process # bexp 条件语句
def while_stmt(): # 循环语句
def process(parsed):
((((_, condition), _), body), _) = parsed
return WhileStatement(condition, body)
return keyword('while') + bexp() + \
keyword('do') + Lazy(stmt_list) + \
keyword('end') ^ process
def stmt():
return assign_stmt() | \
if_stmt() | \
while_stmt()
def parser():
return Phrase(stmt_list())
def imp_parse(tokens):
ast = parser()(tokens, 0)
return ast
"""
if __name__ == '__main__':
if len(sys.argv) != 3:
sys.stderr.write('usage: %s filename parsername' % sys.argv[0])
sys.exit(1)
filename = sys.argv[1]
file = open(filename)
characters = file.read()
file.close()
tokens = imp_lex(characters)
parser = globals()[sys.argv[2]]()
result = parser(tokens, 0)
print result
"""
def usage():
sys.stderr.write('Usage: imp filenamen')
sys.exit(1)
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
filename = sys.argv[1]
text = open(filename).read()
tokens = imp_lex(text)
parse_result = imp_parse(tokens)
if not parse_result:
sys.stderr.write('Parse error!n')
sys.exit(1)
ast = parse_result.value
env = {}
ast.eval(env)
sys.stdout.write('Final variable values:\n')
for name in env:
sys.stdout.write('%s: %s\n' % (name, env[name]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment