Skip to content

Instantly share code, notes, and snippets.

@knoguchi
Last active August 29, 2015 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save knoguchi/c407fb186550aef6d22a to your computer and use it in GitHub Desktop.
Save knoguchi/c407fb186550aef6d22a to your computer and use it in GitHub Desktop.
Pythonでコンパイラ: PL/0構文木 ref: http://qiita.com/knoguchi/items/6f9b7383b7252a9ebdad
['VAR', 'x', ',', 'squ', ';', 'PROCEDURE', 'square', ';', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', ';', 'BEGIN', 'x', ':=', '1', ';', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', ';', 'x', ':=', 'x', '+', '1', ';', 'END', 'END', '.']
# 先頭に追加.
LPAR, RPAR, COMMA, SEMICOLON, DOT = map(Suppress, "(),;.")
# 文字列を定数で置き換える
# before
# factor << (ident | number | "(" + expression + ")")
# after
factor << (ident | number | LPAR + expression + RPAR)
# 同様に, ; . も定数で置き換える
# 4. condition
#condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression
condition = infixNotation(
expression,
[
(ODD, UNARY, opAssoc.RIGHT),
(oneOf("< <= > >="), BINARY, opAssoc.LEFT),
(oneOf("= #"), BINARY, opAssoc.LEFT),
]
)
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', ['x', '*', 'x'], 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', ['x', '<=', '10'], 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', ['x', '+', '1'], 'END', 'END']
>>> print expression.parseString('1 + 2 * 3 + 4')
[['1', '+', ['2', '*', '3'], '+', '4']]
>>> print expression.parseString('1 + 2 / 3 * 4 - -5')
[['1', '+', ['2', '/', '3', '*', '4'], '-', ['-', '5']]]
$ python pl0_parser.py ex1.pl0
['VAR', 'x', 'squ', 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END']
# 11. var
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END']
# 11. var
def var_list(tokens):
tokens = tokens.asList()
return [tokens[0], tokens[1:]]
var = VAR + ident + ZeroOrMore(COMMA + ident) + SEMICOLON
var.setParseAction(var_list)
['VAR', ['x', 'squ'], 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END']
# 11. var
class Var(object):
def __init__(self, tokens):
tokens = tokens.asList()
self.variables = tokens[1]
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON
var.setParseAction(Var)
[<__main__.Var object at 0x10d418710>, 'PROCEDURE', 'square', 'BEGIN', 'squ', ':=', 'x', '*', 'x', 'END', 'BEGIN', 'x', ':=', '1', 'WHILE', 'x', '<=', '10', 'DO', 'BEGIN', 'CALL', 'square', 'x', ':=', 'x', '+', '1', 'END', 'END']
# term = Forward()
# factor = Forward()
# expression = Optional(oneOf("+ -")) + term + ZeroOrMore(oneOf("+ -") + term)
# term << (factor + ZeroOrMore(oneOf("* /") + factor))
# factor << (ident | number | LPAR + expression + RPAR)
# infixNotationは演算子の優先順位を定義する。
# 同位の演算子は1行で書くこと。
UNARY, BINARY, TERNARY = 1, 2, 3
factor = ident | number
expression = infixNotation(
factor,
[
(oneOf("+ -"), UNARY, opAssoc.RIGHT), # 符号は最優先。
(oneOf("* /"), BINARY, opAssoc.LEFT), # 掛け算割り算は足し算引き算より優先
(oneOf("+ -"), BINARY, opAssoc.LEFT),
]
)
# -*- coding: utf-8 -*-
from pyparsing import *
LPAR, RPAR, COMMA, SEMICOLON, DOT = map(Suppress, "(),;.")
# 1. reserved keyword
(CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD) = map(CaselessKeyword,
"CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD".replace(",", "").split())
keyword = MatchFirst((CONST, VAR, PROCEDURE, CALL, BEGIN, END, IF, THEN, WHILE, DO, ODD))
# 2. identifier
ident = ~keyword + Word(alphas, alphanums + "_")
# 3. expression
number = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
UNARY, BINARY, TERNARY = 1, 2, 3
factor = ident | number
expression = infixNotation(
factor,
[
(oneOf("+ -"), UNARY, opAssoc.RIGHT), # 符号は最優先
(oneOf("* /"), BINARY, opAssoc.LEFT), # 掛け算割り算は足し算引き算より優先
(oneOf("+ -"), BINARY, opAssoc.LEFT),
]
)
# 4. condition
#condition = ODD + expression | expression + oneOf('= # < <= > >=') + expression
condition = infixNotation(
expression,
[
(ODD, UNARY, opAssoc.RIGHT),
(oneOf("< <= > >="), BINARY, opAssoc.LEFT),
(oneOf("= #"), BINARY, opAssoc.LEFT),
]
)
# 5. assignment
assign_statement = ident + ":=" + expression
# 6. call
call_statement = CALL + ident
# 7. if-then
statement = Forward()
if_statement = IF + condition + THEN + statement
# 8. while-do
while_statement = WHILE + condition + DO + statement
# 9. statement
statement << Optional(assign_statement
| call_statement
| BEGIN + statement + ZeroOrMore(SEMICOLON + statement) + END
| if_statement
| while_statement
)
# 10. const
const = CONST + Group(Group(ident + "=" + number) + ZeroOrMore(COMMA + ident + "=" + number)) + SEMICOLON
# 11. var
var = VAR + Group(ident + ZeroOrMore(COMMA + ident)) + SEMICOLON
# 12. procedure
block = Forward()
procedure = PROCEDURE + ident + SEMICOLON + block + SEMICOLON
# 13. block
block << Optional(const) + Optional(var) + ZeroOrMore(procedure) + statement
# 14. program
program = block + DOT
if __name__ == '__main__':
import sys
with open(sys.argv[1], 'r') as fp:
txt = fp.read()
print program.parseString(txt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment