Skip to content

Instantly share code, notes, and snippets.

@g101418
Last active May 16, 2022 03:38
Show Gist options
  • Save g101418/1a7a398542af155c0f3408e3351af2c2 to your computer and use it in GitHub Desktop.
Save g101418/1a7a398542af155c0f3408e3351af2c2 to your computer and use it in GitHub Desktop.
Python技巧
class NodeVisitor(object):
def visit(self, node: AST) -> float:
method_name = 'visit_' + type(node).__name__
visitor = getattr(self, method_name, self.generic_visit)
return visitor(node)
def generic_visit(self, node: AST) -> None:
raise Exception('No visit_{} method'.format(type(node).__name__))
class Interpreter(NodeVisitor):
def __init__(self, parser: Parser):
self.parser = parser
def visit_BinOp(self, node: AST) -> float:
if node.op.type == PLUS:
return self.visit(node.left) + self.visit(node.right)
elif node.op.type == MINUS:
return self.visit(node.left) - self.visit(node.right)
elif node.op.type == MUL:
return self.visit(node.left) * self.visit(node.right)
elif node.op.type == DIV:
return self.visit(node.left) / self.visit(node.right)
def visit_Num(self, node: AST) -> float:
return node.value
def visit_UnaryOp(self, node: AST) -> float:
op = node.op.type
if op == PLUS:
return +self.visit(node.expr)
elif op == MINUS:
return -self.visit(node.expr)
def interpret(self) -> float:
tree = self.parser.parse()
return self.visit(tree)
# Python技巧合集
class Symbol(object):
def __init__(self, name, type=None):
self.name = name
self.type = type
class VarSymbol(Symbol):
def __init__(self, name, type):
super().__init__(name, type)
def __str__(self):
return '<{name}:{type}>'.format(name=self.name, type=self.type)
__repr__ = __str__
class BuiltinTypeSymbol(Symbol):
def __init__(self, name):
super().__init__(name)
def __str__(self):
return self.name
__repr__ = __str__
class Error(Exception):
def __init__(self, error_code=None, token=None, message=None):
self.error_code = error_code
self.token = token
# add exception class name before the message
self.message = f'{self.__class__.__name__}: {message}'
class LexerError(Error):
pass
class Another(??):
def error(self):
s = "Lexer error on '{lexeme}' line: {lineno} column: {column}".format(
lexeme=self.current_char,
lineno=self.lineno,
column=self.column,
)
raise LexerError(message=s)
class TokenType(Enum):
# single-character token types
PLUS = '+'
MINUS = '-'
MUL = '*'
FLOAT_DIV = '/'
LPAREN = '('
RPAREN = ')'
SEMI = ';'
DOT = '.'
COLON = ':'
COMMA = ','
# block of reserved words
PROGRAM = 'PROGRAM' # marks the beginning of the block
INTEGER = 'INTEGER'
REAL = 'REAL'
INTEGER_DIV = 'DIV'
VAR = 'VAR'
PROCEDURE = 'PROCEDURE'
BEGIN = 'BEGIN'
END = 'END' # marks the end of the block
# misc
ID = 'ID'
INTEGER_CONST = 'INTEGER_CONST'
REAL_CONST = 'REAL_CONST'
ASSIGN = ':='
EOF = 'EOF'
class Token(object):
def __init__(self, type, value, lineno=None, column=None):
self.type = type
self.value = value
self.lineno = lineno
self.column = column
def __str__(self):
"""String representation of the class instance.
Example:
>>> Token(TokenType.INTEGER, 7, lineno=5, column=10)
Token(TokenType.INTEGER, 7, position=5:10)
"""
return 'Token({type}, {value}, position={lineno}:{column})'.format(
type=self.type,
value=repr(self.value),
lineno=self.lineno,
column=self.column,
)
def __repr__(self):
return self.__str__()
def _build_reserved_keywords():
"""Build a dictionary of reserved keywords.
The function relies on the fact that in the TokenType
enumeration the beginning of the block of reserved keywords is
marked with PROGRAM and the end of the block is marked with
the END keyword.
Result:
{'PROGRAM': <TokenType.PROGRAM: 'PROGRAM'>,
'INTEGER': <TokenType.INTEGER: 'INTEGER'>,
'REAL': <TokenType.REAL: 'REAL'>,
'DIV': <TokenType.INTEGER_DIV: 'DIV'>,
'VAR': <TokenType.VAR: 'VAR'>,
'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>,
'BEGIN': <TokenType.BEGIN: 'BEGIN'>,
'END': <TokenType.END: 'END'>}
"""
# enumerations support iteration, in definition order
tt_list = list(TokenType)
start_index = tt_list.index(TokenType.PROGRAM)
end_index = tt_list.index(TokenType.END)
reserved_keywords = {
token_type.value: token_type
for token_type in tt_list[start_index:end_index + 1]
}
return reserved_keywords
RESERVED_KEYWORDS = _build_reserved_keywords()
class Lexer(object):
def __init__(self, text):
# client string input, e.g. "4 + 2 * 3 - 6 / 2"
self.text = text
# self.pos is an index into self.text
self.pos = 0
self.current_char = self.text[self.pos]
# token line number and column number
self.lineno = 1
self.column = 1
def error(self):
s = "Lexer error on '{lexeme}' line: {lineno} column: {column}".format(
lexeme=self.current_char,
lineno=self.lineno,
column=self.column,
)
raise LexerError(message=s)
def advance(self):
"""Advance the `pos` pointer and set the `current_char` variable."""
if self.current_char == '\n':
self.lineno += 1
self.column = 0
self.pos += 1
if self.pos > len(self.text) - 1:
self.current_char = None # Indicates end of input
else:
self.current_char = self.text[self.pos]
self.column += 1
def peek(self):
peek_pos = self.pos + 1
if peek_pos > len(self.text) - 1:
return None
else:
return self.text[peek_pos]
def skip_whitespace(self):
while self.current_char is not None and self.current_char.isspace():
self.advance()
def skip_comment(self):
while self.current_char != '}':
self.advance()
self.advance() # the closing curly brace
def number(self):
"""Return a (multidigit) integer or float consumed from the input."""
# Create a new token with current line and column number
token = Token(type=None, value=None, lineno=self.lineno, column=self.column)
result = ''
while self.current_char is not None and self.current_char.isdigit():
result += self.current_char
self.advance()
if self.current_char == '.':
result += self.current_char
self.advance()
while self.current_char is not None and self.current_char.isdigit():
result += self.current_char
self.advance()
token.type = TokenType.REAL_CONST
token.value = float(result)
else:
token.type = TokenType.INTEGER_CONST
token.value = int(result)
return token
def _id(self):
"""Handle identifiers and reserved keywords"""
# Create a new token with current line and column number
token = Token(type=None, value=None, lineno=self.lineno, column=self.column)
value = ''
while self.current_char is not None and self.current_char.isalnum():
value += self.current_char
self.advance()
token_type = RESERVED_KEYWORDS.get(value.upper())
if token_type is None:
token.type = TokenType.ID
token.value = value
else:
# reserved keyword
token.type = token_type
token.value = value.upper()
return token
def get_next_token(self):
"""Lexical analyzer (also known as scanner or tokenizer)
This method is responsible for breaking a sentence
apart into tokens. One token at a time.
"""
while self.current_char is not None:
if self.current_char.isspace():
self.skip_whitespace()
continue
if self.current_char == '{':
self.advance()
self.skip_comment()
continue
if self.current_char.isalpha():
return self._id()
if self.current_char.isdigit():
return self.number()
if self.current_char == ':' and self.peek() == '=':
token = Token(
type=TokenType.ASSIGN,
value=TokenType.ASSIGN.value, # ':='
lineno=self.lineno,
column=self.column,
)
self.advance()
self.advance()
return token
# single-character token
try:
# get enum member by value, e.g.
# TokenType(';') --> TokenType.SEMI
token_type = TokenType(self.current_char)
except ValueError:
# no enum member with value equal to self.current_char
self.error()
else:
# create a token with a single-character lexeme as its value
token = Token(
type=token_type,
value=token_type.value, # e.g. ';', '.', etc
lineno=self.lineno,
column=self.column,
)
self.advance()
return token
# EOF (end-of-file) token indicates that there is no more
# input left for lexical analysis
return Token(type=TokenType.EOF, value=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment