Last active
January 20, 2023 09:58
-
-
Save Paulomart/e4bc68f893cd077f40428c3ca847166b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tabulate import tabulate | |
from typing import List, Tuple | |
import re | |
Rule = Tuple[str, List[str]] | |
def rule_to_string(rule: Rule) -> str: | |
""" | |
Converts a rule to a string. | |
""" | |
left, right = rule | |
if right: | |
return f'{left} -> {" ".join(right)}' | |
else: | |
return f'{left} ->' | |
def terminal_set_to_string(terminal_set: set[str]) -> str: | |
""" | |
Converts a set of terminals to a string. | |
""" | |
return '{' + ', '.join(sorted(terminal_set)) + '}' | |
def convert_string_notation_to_list_notation(string_notation: str) -> list[Rule]: | |
""" | |
Converts this form of notation: | |
E → T R | |
R → + T R | |
R → - T R | |
R → | |
T → <NUM> | |
T → ( E ) | |
Into this form of notation: | |
[ | |
('E', ['T', 'R']), | |
('R', ['+', 'T', 'R']), | |
('R', ['-', 'T', 'R']), | |
('R', []), | |
('T', ['<NUM>']), | |
('T', ['(', 'E', ')']), | |
] | |
""" | |
rules = [] | |
for line in string_notation.splitlines(): | |
if not line: | |
continue | |
left, right = line.split('→') if line.count('→') == 1 else line.split('->') | |
left = left.strip() | |
right = right.strip() | |
if right: | |
right = right.split() | |
else: | |
right = [] | |
rules.append((left, right)) | |
return rules | |
def extract_nonterminals(rules: list[Rule]) -> set[str]: | |
""" | |
Extracts nonterminals from a list of rules. | |
""" | |
nonterminals = set() | |
for left, _ in rules: | |
nonterminals.add(left) | |
return nonterminals | |
def extract_terminals(rules: list[Rule]) -> set[str]: | |
""" | |
Extracts terminals from a list of rules. | |
""" | |
nonterminals = extract_nonterminals(rules) | |
terminals = set() | |
for _, right in rules: | |
for symbol in right: | |
if symbol not in nonterminals: | |
terminals.add(symbol) | |
return terminals | |
def calulate_nullable_first_symbols(rules: list[Rule]) -> dict: | |
""" | |
Calculates the basic sets for a list of rules. | |
""" | |
nonterminals = extract_nonterminals(rules) | |
terminals = extract_terminals(rules) | |
nullable_symbols = dict() | |
first_symbols = dict() | |
nullable_rules = dict() | |
first_rules = dict() | |
for nonterminal in nonterminals: | |
nullable_symbols[nonterminal] = False | |
first_symbols[nonterminal] = set() | |
for terminal in terminals: | |
nullable_symbols[terminal] = False | |
first_symbols[terminal] = {terminal} | |
for left, right in rules: | |
rule_key = rule_to_string((left, right)) | |
nullable_rules[rule_key] = False | |
first_rules[rule_key] = set() | |
while True: | |
changed = False | |
for left, right in rules: | |
rule_key = rule_to_string((left, right)) | |
if all(nullable_symbols[symbol] for symbol in right): | |
if not nullable_symbols[left]: | |
nullable_symbols[left] = True | |
changed = True | |
if not nullable_rules[rule_key]: | |
nullable_rules[rule_key] = True | |
changed = True | |
for i, symbol in enumerate(right): | |
if all(nullable_symbols[a] for a in right[:i]): | |
to_add = first_symbols[symbol] | |
if not to_add.issubset(first_symbols[left]): | |
first_symbols[left] |= to_add | |
changed = True | |
if not to_add.issubset(first_rules[rule_key]): | |
first_rules[rule_key] |= to_add | |
changed = True | |
if not changed: | |
break | |
return { | |
'nullable_symbols': nullable_symbols, | |
'first_symbols': first_symbols, | |
'nullable_rules': nullable_rules, | |
'first_rules': first_rules, | |
} | |
def calulate_follow_symbols(rules: list[Rule]) -> dict: | |
symbols = extract_nonterminals(rules) | extract_terminals(rules) | |
nullable_and_first_symbols = calulate_nullable_first_symbols(rules) | |
nullable_symbols = nullable_and_first_symbols['nullable_symbols'] | |
first_symbols = nullable_and_first_symbols['first_symbols'] | |
follow_symbols = dict() | |
for symbols in symbols: | |
follow_symbols[symbols] = set() | |
start_variable = rules[0][0] | |
follow_symbols[start_variable] |= {'$'} | |
while True: | |
changed = False | |
for left, right in rules: | |
for i, symbol in enumerate(right): | |
for k in range(i + 1, len(right)): | |
if all(nullable_symbols[a] for a in right[i + 1:k]): | |
to_add = first_symbols[right[k]] | |
if not to_add.issubset(follow_symbols[symbol]): | |
follow_symbols[symbol] |= to_add | |
changed = True | |
if all(nullable_symbols[a] for a in right[i + 1:]): | |
to_add = follow_symbols[left] | |
if not to_add.issubset(follow_symbols[symbol]): | |
follow_symbols[symbol] |= to_add | |
changed = True | |
if not changed: | |
break | |
return follow_symbols | |
def print_symbols_nullable_first(rules: list[Rule]): | |
symbols = sorted(extract_nonterminals(rules) | extract_terminals(rules)) | |
result = calulate_nullable_first_symbols(rules) | |
nullable = result['nullable_symbols'] | |
first = result['first_symbols'] | |
table_data = [ | |
[symbol, nullable[symbol], terminal_set_to_string(first[symbol])] | |
for symbol in symbols | |
] | |
print(tabulate(table_data, headers=['Symbol', 'Nullable', 'First'])) | |
def print_rules_nullable_first(rules: list[Rule]): | |
result = calulate_nullable_first_symbols(rules) | |
nullable = result['nullable_rules'] | |
first = result['first_rules'] | |
table_data = [ | |
[rule, nullable[rule], terminal_set_to_string(first[rule])] | |
for rule in [ | |
rule_to_string(rule) | |
for rule in rules | |
] | |
] | |
print(tabulate(table_data, headers=['Rule', 'Nullable', 'First'])) | |
def print_follow_symbols(rules: list[Rule]): | |
follow = calulate_follow_symbols(rules) | |
table_data = [ | |
[symbol, terminal_set_to_string(follow[symbol])] | |
for symbol in sorted(extract_nonterminals(rules)) | |
] | |
print(tabulate(table_data, headers=['Symbol', 'Follow'])) | |
def print_grammar(rules: list[Rule]): | |
print ('Rules: ') | |
for rule in rules: | |
print(' ' * 4 + rule_to_string(rule)) | |
print('Variablen: ' + terminal_set_to_string(extract_nonterminals(rules))) | |
print('Terminale: ' + terminal_set_to_string(extract_terminals(rules))) | |
print() | |
print_symbols_nullable_first(rules) | |
print() | |
print_rules_nullable_first(rules) | |
print() | |
print_follow_symbols(rules) | |
def sanitize_input(string: str) -> str: | |
number_prefix_removed = re.sub(r'\d+\.\s*', '', string) | |
epsilon_replaced = number_prefix_removed.replace('ε', '') | |
return epsilon_replaced | |
rules = convert_string_notation_to_list_notation(sanitize_input(""" | |
E → T R | |
R → + T R | |
R → - T R | |
R → | |
T → <NUM> | |
T → ( E ) | |
""")) | |
print_grammar(rules) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment