Skip to content

Instantly share code, notes, and snippets.

@Paulomart
Last active January 20, 2023 09:58
Show Gist options
  • Save Paulomart/e4bc68f893cd077f40428c3ca847166b to your computer and use it in GitHub Desktop.
Save Paulomart/e4bc68f893cd077f40428c3ca847166b to your computer and use it in GitHub Desktop.
from tabulate import tabulate
from typing import List, Tuple
import re
Rule = Tuple[str, List[str]]
def rule_to_string(rule: Rule) -> str:
"""
Converts a rule to a string.
"""
left, right = rule
if right:
return f'{left} -> {" ".join(right)}'
else:
return f'{left} ->'
def terminal_set_to_string(terminal_set: set[str]) -> str:
"""
Converts a set of terminals to a string.
"""
return '{' + ', '.join(sorted(terminal_set)) + '}'
def convert_string_notation_to_list_notation(string_notation: str) -> list[Rule]:
"""
Converts this form of notation:
E → T R
R → + T R
R → - T R
R →
T → <NUM>
T → ( E )
Into this form of notation:
[
('E', ['T', 'R']),
('R', ['+', 'T', 'R']),
('R', ['-', 'T', 'R']),
('R', []),
('T', ['<NUM>']),
('T', ['(', 'E', ')']),
]
"""
rules = []
for line in string_notation.splitlines():
if not line:
continue
left, right = line.split('→') if line.count('→') == 1 else line.split('->')
left = left.strip()
right = right.strip()
if right:
right = right.split()
else:
right = []
rules.append((left, right))
return rules
def extract_nonterminals(rules: list[Rule]) -> set[str]:
"""
Extracts nonterminals from a list of rules.
"""
nonterminals = set()
for left, _ in rules:
nonterminals.add(left)
return nonterminals
def extract_terminals(rules: list[Rule]) -> set[str]:
"""
Extracts terminals from a list of rules.
"""
nonterminals = extract_nonterminals(rules)
terminals = set()
for _, right in rules:
for symbol in right:
if symbol not in nonterminals:
terminals.add(symbol)
return terminals
def calulate_nullable_first_symbols(rules: list[Rule]) -> dict:
"""
Calculates the basic sets for a list of rules.
"""
nonterminals = extract_nonterminals(rules)
terminals = extract_terminals(rules)
nullable_symbols = dict()
first_symbols = dict()
nullable_rules = dict()
first_rules = dict()
for nonterminal in nonterminals:
nullable_symbols[nonterminal] = False
first_symbols[nonterminal] = set()
for terminal in terminals:
nullable_symbols[terminal] = False
first_symbols[terminal] = {terminal}
for left, right in rules:
rule_key = rule_to_string((left, right))
nullable_rules[rule_key] = False
first_rules[rule_key] = set()
while True:
changed = False
for left, right in rules:
rule_key = rule_to_string((left, right))
if all(nullable_symbols[symbol] for symbol in right):
if not nullable_symbols[left]:
nullable_symbols[left] = True
changed = True
if not nullable_rules[rule_key]:
nullable_rules[rule_key] = True
changed = True
for i, symbol in enumerate(right):
if all(nullable_symbols[a] for a in right[:i]):
to_add = first_symbols[symbol]
if not to_add.issubset(first_symbols[left]):
first_symbols[left] |= to_add
changed = True
if not to_add.issubset(first_rules[rule_key]):
first_rules[rule_key] |= to_add
changed = True
if not changed:
break
return {
'nullable_symbols': nullable_symbols,
'first_symbols': first_symbols,
'nullable_rules': nullable_rules,
'first_rules': first_rules,
}
def calulate_follow_symbols(rules: list[Rule]) -> dict:
symbols = extract_nonterminals(rules) | extract_terminals(rules)
nullable_and_first_symbols = calulate_nullable_first_symbols(rules)
nullable_symbols = nullable_and_first_symbols['nullable_symbols']
first_symbols = nullable_and_first_symbols['first_symbols']
follow_symbols = dict()
for symbols in symbols:
follow_symbols[symbols] = set()
start_variable = rules[0][0]
follow_symbols[start_variable] |= {'$'}
while True:
changed = False
for left, right in rules:
for i, symbol in enumerate(right):
for k in range(i + 1, len(right)):
if all(nullable_symbols[a] for a in right[i + 1:k]):
to_add = first_symbols[right[k]]
if not to_add.issubset(follow_symbols[symbol]):
follow_symbols[symbol] |= to_add
changed = True
if all(nullable_symbols[a] for a in right[i + 1:]):
to_add = follow_symbols[left]
if not to_add.issubset(follow_symbols[symbol]):
follow_symbols[symbol] |= to_add
changed = True
if not changed:
break
return follow_symbols
def print_symbols_nullable_first(rules: list[Rule]):
symbols = sorted(extract_nonterminals(rules) | extract_terminals(rules))
result = calulate_nullable_first_symbols(rules)
nullable = result['nullable_symbols']
first = result['first_symbols']
table_data = [
[symbol, nullable[symbol], terminal_set_to_string(first[symbol])]
for symbol in symbols
]
print(tabulate(table_data, headers=['Symbol', 'Nullable', 'First']))
def print_rules_nullable_first(rules: list[Rule]):
result = calulate_nullable_first_symbols(rules)
nullable = result['nullable_rules']
first = result['first_rules']
table_data = [
[rule, nullable[rule], terminal_set_to_string(first[rule])]
for rule in [
rule_to_string(rule)
for rule in rules
]
]
print(tabulate(table_data, headers=['Rule', 'Nullable', 'First']))
def print_follow_symbols(rules: list[Rule]):
follow = calulate_follow_symbols(rules)
table_data = [
[symbol, terminal_set_to_string(follow[symbol])]
for symbol in sorted(extract_nonterminals(rules))
]
print(tabulate(table_data, headers=['Symbol', 'Follow']))
def print_grammar(rules: list[Rule]):
print ('Rules: ')
for rule in rules:
print(' ' * 4 + rule_to_string(rule))
print('Variablen: ' + terminal_set_to_string(extract_nonterminals(rules)))
print('Terminale: ' + terminal_set_to_string(extract_terminals(rules)))
print()
print_symbols_nullable_first(rules)
print()
print_rules_nullable_first(rules)
print()
print_follow_symbols(rules)
def sanitize_input(string: str) -> str:
number_prefix_removed = re.sub(r'\d+\.\s*', '', string)
epsilon_replaced = number_prefix_removed.replace('ε', '')
return epsilon_replaced
rules = convert_string_notation_to_list_notation(sanitize_input("""
E → T R
R → + T R
R → - T R
R →
T → <NUM>
T → ( E )
"""))
print_grammar(rules)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment