Last active
January 27, 2024 15:15
-
-
Save Kreijstal/bc594816f2a1048cf046bd5566444444 to your computer and use it in GitHub Desktop.
State Machine and automata, enjoy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
# Redefining the CYK algorithm functions with concise and meaningful names suitable for a library | |
def preprocess(grammar): | |
# Placeholder for any preprocessing steps on the grammar, if needed. | |
return grammar | |
def initialize_table(string_length): | |
# Initialize the CYK table with empty lists | |
return [[[] for _ in range(string_length)] for _ in range(string_length)] | |
def fill_table_with_unit_productions_refined(table, grammar, string): | |
# Fills the first row of the CYK table with unit productions | |
for s, char in enumerate(string): | |
table[0][s] = [non_terminal for non_terminal in grammar | |
for production in grammar[non_terminal] | |
if len(production) == 1 and production[0] == char] | |
def perform_dynamic_filling_refined(table, grammar, string): | |
# Fills the rest of the CYK table using dynamic programming | |
n = len(string) | |
# Filtering productions with exactly two elements | |
filtered_productions = [ | |
(non_terminal, production) | |
for non_terminal in grammar | |
for production in grammar[non_terminal] | |
if len(production) == 2 | |
] | |
# Generating cross product with filtered productions | |
for length, start, partition, (non_terminal, production) in itertools.product( | |
range(2, n + 1), | |
range(n - length + 1), | |
range(1, length), | |
filtered_productions | |
): | |
b, c = production | |
if b in table[partition - 1][start] and c in table[length - partition - 1][start + partition]: | |
table[length - 1][start].append(non_terminal) | |
def cyk_parse(grammar, string): | |
# Main function to run the CYK algorithm | |
grammar = preprocess(grammar) | |
table = initialize_table(len(string)) | |
fill_table_with_unit_productions(table, grammar, string) | |
perform_dynamic_filling(table, grammar, string) | |
return table | |
def format_cyk_output(table): | |
# Format the CYK table for better readability | |
formatted_output = [] | |
for i, row in enumerate(table): | |
formatted_row = [cell if cell else [''] for j, cell in enumerate(row) if j < len(row) - i] | |
formatted_output.append(formatted_row) | |
return formatted_output | |
# Example usage with a defined grammar and string | |
example_grammar = { | |
'S': [['R', 'T'], ['A', 'V'], ['S', 'Z'], ['Z', 'A'], ['b']], | |
'R': [['A', 'Z'], ['A', 'V'], ['V', 'V'], ['a']], | |
'T': [['B', 'R'], ['A', 'Z'], ['A', 'W'], ['B', 'B']], | |
'Z': [['R', 'R'], ['C', 'B'], ['c']], | |
'A': [['a']], | |
'B': [['b']], | |
'C': [['c']], | |
'V': [['R', 'T']], | |
'W': [['Z', 'C']] | |
} | |
example_string = "abaaabc" | |
cyk_table = cyk_parse(example_grammar, example_string) | |
formatted_cyk_table = format_cyk_output(cyk_table) | |
# Displaying the formatted CYK table | |
formatted_cyk_table | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
transitions = { | |
('z0', 'a'): 'z3', ('z0', 'b'): 'z1', | |
('z1', 'a'): 'z5', ('z1', 'b'): 'z2', | |
('z2', 'a'): 'z2', ('z2', 'b'): 'z8', | |
('z3', 'a'): 'z4', ('z3', 'b'): 'z7', | |
('z4', 'a'): 'z7', ('z4', 'b'): 'z5', | |
('z5', 'a'): 'z1', ('z5', 'b'): 'z2', | |
('z7', 'a'): 'z8', ('z7', 'b'): 'z3', | |
('z8', 'a'): 'z7', ('z8', 'b'): 'z5' | |
} | |
# Function to update the equivalence matrix based on DFA transitions | |
def update_equivalence(R, transitions): | |
changed = False | |
for s1 in R.index: | |
for s2 in R.columns: | |
if R.at[s1, s2] == 1: # Only consider pairs currently marked as equivalent | |
for symbol in ['a', 'b']: | |
# Get the states they transition to | |
next_state1 = transitions.get((s1, symbol), None) | |
next_state2 = transitions.get((s2, symbol), None) | |
# Check if the next states are not equivalent | |
if next_state1 is not None and next_state2 is not None and R.at[next_state1, next_state2] == 0: | |
R.at[s1, s2] = 0 # Update the equivalence to 0 (not equivalent) | |
changed = True | |
return R, changed | |
# Iteratively update the matrix R until no more changes are made | |
while True: | |
R, changed = update_equivalence(R, transitions) | |
if not changed: | |
break | |
R # Display the final equivalence matrix R | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NDTreeNode: | |
def __init__(self, state, tapes, head_positions, parent=None): | |
self.state = state | |
self.tapes = tapes | |
self.head_positions = head_positions | |
self.parent = parent | |
self.children = [] | |
class NDTapeTuringMachine: | |
def __init__(self, states, transitions, start_state, accept_state, blank_symbol, num_tapes): | |
self.states = states | |
self.transitions = transitions | |
self.start_state = start_state | |
self.accept_state = accept_state | |
self.blank_symbol = blank_symbol | |
self.num_tapes = num_tapes | |
def run(self, input_strings, find_all_paths=False): | |
initial_tapes = [list(input_string) + [self.blank_symbol] for input_string in input_strings] | |
initial_heads = [0 for _ in range(self.num_tapes)] | |
self.root = NDTreeNode(self.start_state, initial_tapes, initial_heads) # Store the root node | |
self.find_all_paths = find_all_paths | |
return self._explore(self.root) | |
def _explore(self, node): | |
#print(f"Exploring Node: State={node.state}, Tapes={node.tapes}, Heads={node.head_positions}") # Added print statement | |
if node.state == self.accept_state: | |
return True | |
current_symbols = [tape[head] for tape, head in zip(node.tapes, node.head_positions)] | |
actions = self.transitions.get((node.state, tuple(current_symbols))) | |
if actions is None: | |
return False | |
found_accepting_path = False | |
for action in actions: # Non-deterministic branching | |
#print(f"Considering Action: {action}") # Added print statement | |
new_state, *tape_actions = action | |
new_tapes = [list(tape) for tape in node.tapes] | |
new_heads = list(node.head_positions) | |
# Apply actions for each tape | |
for i in range(self.num_tapes): | |
new_symbol, move = tape_actions[i*2], tape_actions[i*2 + 1] | |
new_tapes[i][new_heads[i]] = new_symbol | |
if move == "R": | |
new_heads[i] += 1 | |
if new_heads[i] == len(new_tapes[i]): | |
new_tapes[i].append(self.blank_symbol) | |
elif move == "L": | |
new_heads[i] -= 1 | |
if new_heads[i] < 0: | |
new_heads[i] = 0 | |
new_tapes[i].insert(0, self.blank_symbol) | |
child_node = NDTreeNode(new_state, new_tapes, new_heads, node) | |
node.children.append(child_node) | |
if self._explore(child_node): | |
found_accepting_path = True | |
if not self.find_all_paths: | |
return True | |
return found_accepting_path | |
def get_all_paths(self, node, path=None, all_paths=None): | |
if all_paths is None: | |
all_paths = [] | |
if path is None: | |
path = [] | |
path.append((node.state, [''.join(tape) for tape in node.tapes], node.head_positions)) | |
if not node.children: # If leaf node | |
all_paths.append(path.copy()) | |
else: | |
for child in node.children: | |
self.get_all_paths(child, path, all_paths) | |
path.pop() # Backtrack | |
return all_paths | |
def get_accepting_paths(self, node, path=None, paths=None): | |
if paths is None: | |
paths = [] | |
if path is None: | |
path = [] | |
path.append((node.state, [''.join(tape) for tape in node.tapes], node.head_positions)) | |
if node.state == self.accept_state: | |
paths.append(path.copy()) | |
else: | |
for child in node.children: | |
self.get_accepting_paths(child, path, paths) | |
path.pop() # Backtrack | |
return paths | |
def print_path(self, path): | |
for state, tapes, heads in path: | |
print(f"State: {state}") | |
for i, tape in enumerate(tapes): | |
head_marker = ' '.join(['^' if j == heads[i] else ' ' for j in range(len(tape))]) | |
print(f"Tape {i + 1}: {tape}\n {head_marker}") | |
print() | |
def get_tree_representation(self, node): | |
# Function to represent the node and its children in a nested list format | |
def represent_node(n): | |
node_representation = (n.state, [''.join(tape).strip(self.blank_symbol) for tape in n.tapes], n.head_positions) | |
children_representations = [represent_node(child) for child in n.children] | |
return [node_representation] + children_representations | |
return represent_node(node) | |
# Example usage for the non-deterministic Turing machine | |
# Define states, transitions, start_state, accept_state, blank_symbol, num_tapes, and input_strings | |
num_tapes = 2 | |
states = {"q0", "q1", "q2", "q3", "q4", "q5"} | |
transitions = { | |
('q0', ('a', '□')): [('q0', '□', 'R', '□', 'N'), ('q1', '□', 'R', '□', 'N')], | |
('q0', ('b', '□')): [('q0', '□', 'R', '□', 'R')], | |
('q1', ('b', '□')): [('q1', '□', 'R', '□', 'L')], | |
('q1', ('c', '□')): [('q2', '□', 'R', '□', 'R'),('q3', '□', 'R', '□', 'L')], | |
('q2', ('c', '□')): [('q4', '□', 'R', '□', 'N')], | |
('q4', ('c', '□')): [('q2', '□', 'R', '□', 'N')], | |
('q4', ('□', '□')): [('q5', '□', 'R', '□', 'N')], | |
('q3', ('c', '□')): [('q3', '□', 'R', '□', 'L')], | |
('q3', ('□', '□')): [('q5', '□', 'N', '□', 'N')], | |
} | |
start_state = "q0" | |
accept_state = "q5" | |
blank_symbol = "□" | |
# Example input | |
input_strings = ["ac", ""] | |
ndtm = NDTapeTuringMachine(states, transitions, start_state, accept_state, blank_symbol, num_tapes) | |
#bbbb | |
#oder | |
#bbbbc | |
# Run the machine with the given input | |
ndtm.run(input_strings,find_all_paths=False) | |
# Retrieve and print the paths leading to acceptance | |
tree_representation = ndtm.get_accepting_paths(ndtm.root) | |
def configuration_to_latex(state, tapes, head_positions): | |
""" | |
Convert a Turing Machine configuration tuple to a LaTeX formatted string. | |
Adjusting the state to move along with the tape head. | |
""" | |
state_corrected = state.replace("q", "q_") # Correcting the state format for LaTeX | |
tape_strs = [] | |
for tape, head_position in zip(tapes, head_positions): | |
tape_list = list(tape) | |
if head_position < len(tape_list): | |
tape_list[head_position] = f"{state_corrected}{tape_list[head_position]}" | |
tape_str = ''.join(tape_list).replace("□", "\\Box ").replace("•", "\\bullet ") | |
tape_strs.append(tape_str) | |
return ' ;& '.join(tape_strs) | |
def generate_latex_output(all_paths): | |
""" | |
Generate LaTeX output for the given paths of a Turing Machine. | |
Abbreviate consecutive identical transitions. | |
""" | |
latex_output = "\\begin{align*}\n" | |
for path in all_paths: | |
prev_latex_str = "" | |
repeat_count = 1 | |
for config in path: | |
state, tapes, head_positions = config | |
latex_str = configuration_to_latex(state, tapes, head_positions) | |
if latex_str == prev_latex_str: | |
repeat_count += 1 | |
else: | |
if repeat_count > 1: | |
latex_output += f"\\vdash^{repeat_count} &" + prev_latex_str + " \\\\\n" | |
else: | |
if prev_latex_str: | |
latex_output += "\\vdash &" + prev_latex_str + " \\\\\n" | |
repeat_count = 1 | |
prev_latex_str = latex_str | |
# Add the last configuration | |
if repeat_count > 1: | |
latex_output += f"\\vdash^{repeat_count} &" + prev_latex_str + " \\\\\n" | |
else: | |
if prev_latex_str: | |
latex_output += "\\vdash &" + prev_latex_str + " \\\\\n" | |
latex_output += "\\end{align*}" | |
return latex_output | |
# Generate LaTeX output from the tree representation | |
latex_output = generate_latex_output(tree_representation) | |
print(latex_output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NFA: | |
def __init__(self, states, alphabet, start_states, final_states, transition_function): | |
self.states = states | |
self.alphabet = alphabet | |
self.start_states = start_states | |
self.final_states = final_states | |
self.transition_function = transition_function | |
def get_transitions(self, state, symbol): | |
return self.transition_function.get((state, symbol), set()) | |
class DFA: | |
def __init__(self): | |
self.states = set() | |
self.alphabet = set() | |
self.start_state = None | |
self.final_states = set() | |
self.transition_function = {} | |
def add_state(self, state, is_final=False): | |
self.states.add(state) | |
if is_final: | |
self.final_states.add(state) | |
def set_start_state(self, state): | |
self.start_state = state | |
def add_transition(self, from_state, symbol, to_state): | |
self.transition_function[(from_state, symbol)] = to_state | |
def nfa_to_dfa(nfa): | |
dfa = DFA() | |
dfa.alphabet = nfa.alphabet | |
start_state = frozenset(nfa.start_states) | |
dfa.set_start_state(start_state) | |
unprocessed_states = [start_state] | |
dfa.add_state(start_state, start_state & nfa.final_states != set()) | |
while unprocessed_states: | |
current_state = unprocessed_states.pop() | |
for symbol in nfa.alphabet: | |
next_state = frozenset( | |
sum([list(nfa.get_transitions(state, symbol)) for state in current_state], []) | |
) | |
if next_state not in dfa.states: | |
unprocessed_states.append(next_state) | |
dfa.add_state(next_state, next_state & nfa.final_states != set()) | |
dfa.add_transition(current_state, symbol, next_state) | |
return dfa | |
# Example of usage | |
nfa_example = NFA( | |
states={'s0', 's1', 's2', 's3', 's4', 's5'}, | |
alphabet={'a', 'b'}, | |
start_states={'s0', 's3'}, | |
final_states={'s3'}, | |
transition_function={ | |
('s0', 'a'): {'s1'}, | |
('s1', 'a'): {'s1'}, | |
('s2', 'a'): {'s1'}, | |
('s3', 'a'): {'s1'}, | |
('s3', 'b'): {'s4'}, | |
('s4', 'b'): {'s3', 's5'}, | |
('s5', 'a'): {'s1'}, | |
('s5', 'b'): {'s2', 's4'} | |
} | |
) | |
dfa = nfa_to_dfa(nfa_example) | |
dfa.states, dfa.final_states, dfa.transition_function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original grammar | |
original_grammar = { | |
'S': ['aSc', 'UW', 'ab', 'Ubc'], | |
'U': ['aUb', 'ab'], | |
'W': ['bWc', 'bc'] | |
} | |
# Terminals replacement mapping | |
terminals = {'a': 'A', 'b': 'B', 'c': 'C'} | |
# Function to create a new nonterminal | |
def create_new_nonterminal(existing_nonterminals, prefix="NT"): | |
count = len(existing_nonterminals) + 1 | |
new_nonterminal = f"{prefix}{count}" | |
while new_nonterminal in existing_nonterminals: | |
count += 1 | |
new_nonterminal = f"{prefix}{count}" | |
return new_nonterminal | |
# Adjusted function to binarize a single rule | |
def binarize_rule_adjusted(rule, grammar, existing_nonterminals): | |
# Convert rule to a list of symbols if it's a string | |
rule = list(rule) | |
while len(rule) > 2: | |
first_two_symbols = rule[:2] | |
# Check if a nonterminal already exists for these two symbols | |
existing_nt = [nt for nt, r in grammar.items() if ''.join(r[0]) == ''.join(first_two_symbols)] | |
if existing_nt: | |
new_nonterminal = existing_nt[0] | |
else: | |
# Create a new nonterminal for the first two symbols | |
new_nonterminal = create_new_nonterminal(existing_nonterminals) | |
existing_nonterminals.add(new_nonterminal) | |
grammar[new_nonterminal] = [first_two_symbols] | |
# Replace the first two symbols in the rule with the new nonterminal | |
rule = [new_nonterminal] + rule[2:] | |
# Join the symbols in the rule for consistency with the rest of the grammar | |
return ''.join(rule) | |
# Adjusted function to binarize all rules in the grammar | |
def binarize_rules_adjusted(grammar): | |
new_grammar = {} | |
existing_nonterminals = set(grammar.keys()) | |
for nonterminal, rules in grammar.items(): | |
new_rules = [] | |
for rule in rules: | |
new_rule = binarize_rule_adjusted(rule, new_grammar, existing_nonterminals) | |
new_rules.append(new_rule) | |
new_grammar[nonterminal] = new_rules | |
return new_grammar | |
# Apply the TERM step | |
grammar = {'S0': ['S']} | |
grammar.update(original_grammar) | |
for nonterminal, rules in grammar.items(): | |
new_rules = [] | |
for rule in rules: | |
if len(rule) > 1: | |
new_rule = ''.join(terminals.get(symbol, symbol) for symbol in rule) | |
new_rules.append(new_rule) | |
else: | |
new_rules.append(rule) | |
grammar[nonterminal] = new_rules | |
for terminal, nonterminal in terminals.items(): | |
grammar[nonterminal] = [terminal] | |
# Apply the BIN step with the adjusted functions | |
adjusted_binarized_grammar = binarize_rules_adjusted(grammar) | |
print(adjusted_binarized_grammar) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TuringMachine: | |
def __init__(self, states, transitions, start_state, accept_state, reject_state, blank_symbol): | |
self.states = states | |
self.transitions = transitions | |
self.start_state = start_state | |
self.accept_state = accept_state | |
self.reject_state = reject_state | |
self.blank_symbol = blank_symbol | |
self.reset() | |
def reset(self): | |
self.tape = [self.blank_symbol] | |
self.current_state = self.start_state | |
self.head_position = 0 | |
def step(self): | |
current_symbol = self.tape[self.head_position] | |
action = self.transitions.get((self.current_state, current_symbol)) | |
if action is None: | |
return False | |
new_state, new_symbol, direction = action | |
self.tape[self.head_position] = new_symbol | |
self.current_state = new_state | |
if direction == "R": | |
self.head_position += 1 | |
if self.head_position == len(self.tape): | |
self.tape.append(self.blank_symbol) | |
elif direction == "L": | |
self.head_position -= 1 | |
if self.head_position < 0: | |
self.head_position = 0 | |
self.tape.insert(0, self.blank_symbol) | |
return True | |
def run(self, input_string): | |
self.reset() | |
self.tape = list(input_string) + [self.blank_symbol] | |
while self.step(): | |
if self.current_state == self.accept_state: | |
return True | |
elif self.current_state == self.reject_state: | |
return False | |
return self.current_state == self.accept_state | |
def get_tape(self): | |
return ''.join(self.tape).strip(self.blank_symbol) | |
# Defining the Turing Machine | |
states = {"z0", "z1", "z2", "z3", "ze"} | |
transitions = { | |
("z0", "b"): ("z1", "•", "R"), | |
("z1", "b"): ("z1", "•", "R"), | |
("z1", "a"): ("z2", "□", "L"), | |
("z2", "a"): ("z2", "a", "L"), | |
("z2", "b"): ("z2", "b", "L"), | |
("z2", "•"): ("z2", "•", "L"), | |
("z2", "□"): ("z0", "□", "R"), | |
("z0", "•"): ("z3", "□", "R"), | |
("z3", "•"): ("z3", "□", "R"), | |
("z3", "□"): ("ze", "□", "N") | |
} | |
start_state = "z0" | |
accept_state = "ze" | |
reject_state = None | |
blank_symbol = "□" | |
class TuringMachineVerbose(TuringMachine): | |
def run_verbose(self, input_string): | |
self.reset() | |
self.tape = list(input_string) + [self.blank_symbol] | |
configurations = [] | |
while self.step(): | |
configurations.append((self.current_state, ''.join(self.tape), self.head_position)) | |
if self.current_state == self.accept_state: | |
break | |
elif self.current_state == self.reject_state: | |
break | |
return configurations | |
# Creating and running the machine | |
tm = TuringMachineVerbose(states, transitions, start_state, accept_state, reject_state, blank_symbol) | |
input_string = "bbabaa" | |
result = tm.run_verbose(input_string) | |
# Displaying the results | |
(input_string, result, tm.get_tape()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Based on Theoretische Grundlagen der Informatik: Formelsammlung TU-Berlin