Skip to content

Instantly share code, notes, and snippets.

@costrouc
Created January 28, 2021 18:58
Show Gist options
  • Save costrouc/4350deef7d0eb686d9b22a11535176a9 to your computer and use it in GitHub Desktop.
Save costrouc/4350deef7d0eb686d9b22a11535176a9 to your computer and use it in GitHub Desktop.
import re
import dataclasses
@dataclasses.dataclass
class Token:
type: str
value: str
def tokenize(tokens, text):
regex = '|'.join([f'(?P<{name}>{pattern})' for name, pattern in tokens.items()])
for match in re.finditer(regex, text):
yield Token(type=match.lastgroup, value=match.group(0))
yield Token(type=None, value=None)
import dataclasses
@dataclasses.dataclass
class Context:
grammar: dict
token_iterator: iter
right_token: str = None
def advance(context, token_type):
if context.right_token.type == token_type:
left_token = context.right_token
context.right_token = next(context.token_iterator)
return left_token
def parse(context, right_binding_power=0):
left_token = context.right_token or next(context.token_iterator)
context.right_token = next(context.token_iterator)
left_ast = context.grammar[left_token.type]['null_denotation'](context, left_token)
while right_binding_power < context.grammar[context.right_token.type].get('left_binding_power', 0):
left_token = context.right_token
context.right_token = next(context.token_iterator)
left_ast = context.grammar[left_token.type]['left_denotation'](left_token, context, left_ast)
return left_ast
def grammar_update(grammar, token_type, left_binding_power=0, null_denotation=None, left_denotation=None):
if token_type in grammar:
grammar[token_type]['left_binding_power'] = max(
grammar[token_type].get('left_binding_power', 0),
left_binding_power)
if null_denotation is not None:
if 'null_denotation' in grammar:
raise RuntimeError(f'null denotation already set for {token_type}')
else:
grammar[token_type]['null_denotation'] = null_denotation
if left_denotation is not None:
if 'left_denotation' in grammar:
raise RuntimeError(f'left denotation already set for {token_type}')
else:
grammar[token_type]['left_denotation'] = left_denotation
else:
grammar[token_type] = {
'left_binding_power': left_binding_power,
'null_denotation': null_denotation,
'left_denotation': left_denotation
}
def grammar_end(grammar):
grammar_update(grammar, token_type=None)
def grammar_symbol(grammar, token_type):
grammar_update(grammar, token_type)
def grammar_literal(grammar, token_type, function):
def nud_function(context, token):
return function(token)
grammar_update(grammar, token_type,
left_binding_power=0,
null_denotation=nud_function)
def grammar_prefix(grammar, token_type, left_binding_power, function):
def nud_function(context, token):
operand = parse(context, right_binding_power=left_binding_power)
return function(token, operand)
grammar_update(grammar, token_type,
left_binding_power=left_binding_power,
null_denotation=nud_function)
def grammar_infix(grammar, token_type, left_binding_power, function):
def led_function(token, context, left_ast):
right_ast = parse(context, right_binding_power=left_binding_power)
return function(token, left_ast, right_ast)
grammar_update(grammar, token_type,
left_binding_power=left_binding_power,
left_denotation=led_function)
def grammar_infix_r(grammar, token_type, left_binding_power, function):
def led_function(token, context, left_ast):
right_ast = parse(context, right_binding_power=left_binding_power - 1)
return function(token, left_ast, right_ast)
grammar_update(grammar, token_type,
left_binding_power=left_binding_power,
left_denotation=led_function)
tokens = {
'float': '\d+\.\d+',
'integer': '\d+',
'subtraction': '\-',
'addition': '\+',
'multiplication': '\*',
'power': '\^',
}
grammar = { }
def integer_literal(token):
return int(token.value)
def float_literal(token):
return float(token.value)
def subtraction_infix(token, left_ast, right_ast):
return left_ast - right_ast
def subtraction_prefix(token, right_ast):
return -right_ast
def addition_infix(token, left_ast, right_ast):
return left_ast + right_ast
def multiplication_infix(token, left_ast, right_ast):
return left_ast * right_ast
def power_infix_r(token, left_ast, right_ast):
return pow(left_ast, right_ast)
grammar_end(grammar)
grammar_literal(grammar, 'integer', integer_literal)
grammar_literal(grammar, 'float', float_literal)
grammar_infix(grammar, 'subtraction', 10, subtraction_infix)
grammar_prefix(grammar, 'subtraction', 10, subtraction_prefix)
grammar_infix(grammar, 'addition', 10, addition_infix)
grammar_infix(grammar, 'multiplication', 20, multiplication_infix)
grammar_infix_r(grammar, 'power', 30, power_infix_r)
def grammar_enclosed(grammar, begin_token_type, end_token_type, left_binding_power, function):
def nud_function(context, left_token):
body = parse(context)
right_token = advance(context, end_token_type)
return function(left_token, right_token, body)
grammar_update(grammar, begin_token_type,
left_binding_power=left_binding_power,
null_denotation=nud_function)
grammar_update(grammar, end_token_type)
tokens.update({
'left_paren': '\(',
'right_paren': '\)',
'bar': '\|'
})
def parenthesis_enclosed(left_token, right_token, body):
return body
def absolute_enclosed(left_token, right_token, body):
return abs(body)
grammar_enclosed(grammar, 'left_paren', 'right_paren', 0, parenthesis_enclosed)
grammar_enclosed(grammar, 'bar', 'bar', 0, absolute_enclosed)
text = '1 + 1 * (2 - -----4) + |-7| + (2 ^ 3) ^ 2'
context = Context(grammar, tokenize(tokens, text))
print(parse(context))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment