Last active
May 16, 2022 03:38
-
-
Save g101418/1a7a398542af155c0f3408e3351af2c2 to your computer and use it in GitHub Desktop.
Python技巧
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NodeVisitor(object): | |
def visit(self, node: AST) -> float: | |
method_name = 'visit_' + type(node).__name__ | |
visitor = getattr(self, method_name, self.generic_visit) | |
return visitor(node) | |
def generic_visit(self, node: AST) -> None: | |
raise Exception('No visit_{} method'.format(type(node).__name__)) | |
class Interpreter(NodeVisitor): | |
def __init__(self, parser: Parser): | |
self.parser = parser | |
def visit_BinOp(self, node: AST) -> float: | |
if node.op.type == PLUS: | |
return self.visit(node.left) + self.visit(node.right) | |
elif node.op.type == MINUS: | |
return self.visit(node.left) - self.visit(node.right) | |
elif node.op.type == MUL: | |
return self.visit(node.left) * self.visit(node.right) | |
elif node.op.type == DIV: | |
return self.visit(node.left) / self.visit(node.right) | |
def visit_Num(self, node: AST) -> float: | |
return node.value | |
def visit_UnaryOp(self, node: AST) -> float: | |
op = node.op.type | |
if op == PLUS: | |
return +self.visit(node.expr) | |
elif op == MINUS: | |
return -self.visit(node.expr) | |
def interpret(self) -> float: | |
tree = self.parser.parse() | |
return self.visit(tree) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python技巧合集 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Symbol(object): | |
def __init__(self, name, type=None): | |
self.name = name | |
self.type = type | |
class VarSymbol(Symbol): | |
def __init__(self, name, type): | |
super().__init__(name, type) | |
def __str__(self): | |
return '<{name}:{type}>'.format(name=self.name, type=self.type) | |
__repr__ = __str__ | |
class BuiltinTypeSymbol(Symbol): | |
def __init__(self, name): | |
super().__init__(name) | |
def __str__(self): | |
return self.name | |
__repr__ = __str__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Error(Exception): | |
def __init__(self, error_code=None, token=None, message=None): | |
self.error_code = error_code | |
self.token = token | |
# add exception class name before the message | |
self.message = f'{self.__class__.__name__}: {message}' | |
class LexerError(Error): | |
pass | |
class Another(??): | |
def error(self): | |
s = "Lexer error on '{lexeme}' line: {lineno} column: {column}".format( | |
lexeme=self.current_char, | |
lineno=self.lineno, | |
column=self.column, | |
) | |
raise LexerError(message=s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TokenType(Enum): | |
# single-character token types | |
PLUS = '+' | |
MINUS = '-' | |
MUL = '*' | |
FLOAT_DIV = '/' | |
LPAREN = '(' | |
RPAREN = ')' | |
SEMI = ';' | |
DOT = '.' | |
COLON = ':' | |
COMMA = ',' | |
# block of reserved words | |
PROGRAM = 'PROGRAM' # marks the beginning of the block | |
INTEGER = 'INTEGER' | |
REAL = 'REAL' | |
INTEGER_DIV = 'DIV' | |
VAR = 'VAR' | |
PROCEDURE = 'PROCEDURE' | |
BEGIN = 'BEGIN' | |
END = 'END' # marks the end of the block | |
# misc | |
ID = 'ID' | |
INTEGER_CONST = 'INTEGER_CONST' | |
REAL_CONST = 'REAL_CONST' | |
ASSIGN = ':=' | |
EOF = 'EOF' | |
class Token(object): | |
def __init__(self, type, value, lineno=None, column=None): | |
self.type = type | |
self.value = value | |
self.lineno = lineno | |
self.column = column | |
def __str__(self): | |
"""String representation of the class instance. | |
Example: | |
>>> Token(TokenType.INTEGER, 7, lineno=5, column=10) | |
Token(TokenType.INTEGER, 7, position=5:10) | |
""" | |
return 'Token({type}, {value}, position={lineno}:{column})'.format( | |
type=self.type, | |
value=repr(self.value), | |
lineno=self.lineno, | |
column=self.column, | |
) | |
def __repr__(self): | |
return self.__str__() | |
def _build_reserved_keywords(): | |
"""Build a dictionary of reserved keywords. | |
The function relies on the fact that in the TokenType | |
enumeration the beginning of the block of reserved keywords is | |
marked with PROGRAM and the end of the block is marked with | |
the END keyword. | |
Result: | |
{'PROGRAM': <TokenType.PROGRAM: 'PROGRAM'>, | |
'INTEGER': <TokenType.INTEGER: 'INTEGER'>, | |
'REAL': <TokenType.REAL: 'REAL'>, | |
'DIV': <TokenType.INTEGER_DIV: 'DIV'>, | |
'VAR': <TokenType.VAR: 'VAR'>, | |
'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, | |
'BEGIN': <TokenType.BEGIN: 'BEGIN'>, | |
'END': <TokenType.END: 'END'>} | |
""" | |
# enumerations support iteration, in definition order | |
tt_list = list(TokenType) | |
start_index = tt_list.index(TokenType.PROGRAM) | |
end_index = tt_list.index(TokenType.END) | |
reserved_keywords = { | |
token_type.value: token_type | |
for token_type in tt_list[start_index:end_index + 1] | |
} | |
return reserved_keywords | |
RESERVED_KEYWORDS = _build_reserved_keywords() | |
class Lexer(object): | |
def __init__(self, text): | |
# client string input, e.g. "4 + 2 * 3 - 6 / 2" | |
self.text = text | |
# self.pos is an index into self.text | |
self.pos = 0 | |
self.current_char = self.text[self.pos] | |
# token line number and column number | |
self.lineno = 1 | |
self.column = 1 | |
def error(self): | |
s = "Lexer error on '{lexeme}' line: {lineno} column: {column}".format( | |
lexeme=self.current_char, | |
lineno=self.lineno, | |
column=self.column, | |
) | |
raise LexerError(message=s) | |
def advance(self): | |
"""Advance the `pos` pointer and set the `current_char` variable.""" | |
if self.current_char == '\n': | |
self.lineno += 1 | |
self.column = 0 | |
self.pos += 1 | |
if self.pos > len(self.text) - 1: | |
self.current_char = None # Indicates end of input | |
else: | |
self.current_char = self.text[self.pos] | |
self.column += 1 | |
def peek(self): | |
peek_pos = self.pos + 1 | |
if peek_pos > len(self.text) - 1: | |
return None | |
else: | |
return self.text[peek_pos] | |
def skip_whitespace(self): | |
while self.current_char is not None and self.current_char.isspace(): | |
self.advance() | |
def skip_comment(self): | |
while self.current_char != '}': | |
self.advance() | |
self.advance() # the closing curly brace | |
def number(self): | |
"""Return a (multidigit) integer or float consumed from the input.""" | |
# Create a new token with current line and column number | |
token = Token(type=None, value=None, lineno=self.lineno, column=self.column) | |
result = '' | |
while self.current_char is not None and self.current_char.isdigit(): | |
result += self.current_char | |
self.advance() | |
if self.current_char == '.': | |
result += self.current_char | |
self.advance() | |
while self.current_char is not None and self.current_char.isdigit(): | |
result += self.current_char | |
self.advance() | |
token.type = TokenType.REAL_CONST | |
token.value = float(result) | |
else: | |
token.type = TokenType.INTEGER_CONST | |
token.value = int(result) | |
return token | |
def _id(self): | |
"""Handle identifiers and reserved keywords""" | |
# Create a new token with current line and column number | |
token = Token(type=None, value=None, lineno=self.lineno, column=self.column) | |
value = '' | |
while self.current_char is not None and self.current_char.isalnum(): | |
value += self.current_char | |
self.advance() | |
token_type = RESERVED_KEYWORDS.get(value.upper()) | |
if token_type is None: | |
token.type = TokenType.ID | |
token.value = value | |
else: | |
# reserved keyword | |
token.type = token_type | |
token.value = value.upper() | |
return token | |
def get_next_token(self): | |
"""Lexical analyzer (also known as scanner or tokenizer) | |
This method is responsible for breaking a sentence | |
apart into tokens. One token at a time. | |
""" | |
while self.current_char is not None: | |
if self.current_char.isspace(): | |
self.skip_whitespace() | |
continue | |
if self.current_char == '{': | |
self.advance() | |
self.skip_comment() | |
continue | |
if self.current_char.isalpha(): | |
return self._id() | |
if self.current_char.isdigit(): | |
return self.number() | |
if self.current_char == ':' and self.peek() == '=': | |
token = Token( | |
type=TokenType.ASSIGN, | |
value=TokenType.ASSIGN.value, # ':=' | |
lineno=self.lineno, | |
column=self.column, | |
) | |
self.advance() | |
self.advance() | |
return token | |
# single-character token | |
try: | |
# get enum member by value, e.g. | |
# TokenType(';') --> TokenType.SEMI | |
token_type = TokenType(self.current_char) | |
except ValueError: | |
# no enum member with value equal to self.current_char | |
self.error() | |
else: | |
# create a token with a single-character lexeme as its value | |
token = Token( | |
type=token_type, | |
value=token_type.value, # e.g. ';', '.', etc | |
lineno=self.lineno, | |
column=self.column, | |
) | |
self.advance() | |
return token | |
# EOF (end-of-file) token indicates that there is no more | |
# input left for lexical analysis | |
return Token(type=TokenType.EOF, value=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment