Skip to content

Instantly share code, notes, and snippets.

@Kreijstal
Last active January 27, 2024 15:15
Show Gist options
  • Save Kreijstal/bc594816f2a1048cf046bd5566444444 to your computer and use it in GitHub Desktop.
Save Kreijstal/bc594816f2a1048cf046bd5566444444 to your computer and use it in GitHub Desktop.
State Machine and automata, enjoy
import itertools
# Redefining the CYK algorithm functions with concise and meaningful names suitable for a library
def preprocess(grammar):
# Placeholder for any preprocessing steps on the grammar, if needed.
return grammar
def initialize_table(string_length):
# Initialize the CYK table with empty lists
return [[[] for _ in range(string_length)] for _ in range(string_length)]
def fill_table_with_unit_productions_refined(table, grammar, string):
# Fills the first row of the CYK table with unit productions
for s, char in enumerate(string):
table[0][s] = [non_terminal for non_terminal in grammar
for production in grammar[non_terminal]
if len(production) == 1 and production[0] == char]
def perform_dynamic_filling_refined(table, grammar, string):
# Fills the rest of the CYK table using dynamic programming
n = len(string)
# Filtering productions with exactly two elements
filtered_productions = [
(non_terminal, production)
for non_terminal in grammar
for production in grammar[non_terminal]
if len(production) == 2
]
# Generating cross product with filtered productions
for length, start, partition, (non_terminal, production) in itertools.product(
range(2, n + 1),
range(n - length + 1),
range(1, length),
filtered_productions
):
b, c = production
if b in table[partition - 1][start] and c in table[length - partition - 1][start + partition]:
table[length - 1][start].append(non_terminal)
def cyk_parse(grammar, string):
# Main function to run the CYK algorithm
grammar = preprocess(grammar)
table = initialize_table(len(string))
fill_table_with_unit_productions(table, grammar, string)
perform_dynamic_filling(table, grammar, string)
return table
def format_cyk_output(table):
# Format the CYK table for better readability
formatted_output = []
for i, row in enumerate(table):
formatted_row = [cell if cell else [''] for j, cell in enumerate(row) if j < len(row) - i]
formatted_output.append(formatted_row)
return formatted_output
# Example usage with a defined grammar and string
example_grammar = {
'S': [['R', 'T'], ['A', 'V'], ['S', 'Z'], ['Z', 'A'], ['b']],
'R': [['A', 'Z'], ['A', 'V'], ['V', 'V'], ['a']],
'T': [['B', 'R'], ['A', 'Z'], ['A', 'W'], ['B', 'B']],
'Z': [['R', 'R'], ['C', 'B'], ['c']],
'A': [['a']],
'B': [['b']],
'C': [['c']],
'V': [['R', 'T']],
'W': [['Z', 'C']]
}
example_string = "abaaabc"
cyk_table = cyk_parse(example_grammar, example_string)
formatted_cyk_table = format_cyk_output(cyk_table)
# Displaying the formatted CYK table
formatted_cyk_table
transitions = {
('z0', 'a'): 'z3', ('z0', 'b'): 'z1',
('z1', 'a'): 'z5', ('z1', 'b'): 'z2',
('z2', 'a'): 'z2', ('z2', 'b'): 'z8',
('z3', 'a'): 'z4', ('z3', 'b'): 'z7',
('z4', 'a'): 'z7', ('z4', 'b'): 'z5',
('z5', 'a'): 'z1', ('z5', 'b'): 'z2',
('z7', 'a'): 'z8', ('z7', 'b'): 'z3',
('z8', 'a'): 'z7', ('z8', 'b'): 'z5'
}
# Function to update the equivalence matrix based on DFA transitions
def update_equivalence(R, transitions):
changed = False
for s1 in R.index:
for s2 in R.columns:
if R.at[s1, s2] == 1: # Only consider pairs currently marked as equivalent
for symbol in ['a', 'b']:
# Get the states they transition to
next_state1 = transitions.get((s1, symbol), None)
next_state2 = transitions.get((s2, symbol), None)
# Check if the next states are not equivalent
if next_state1 is not None and next_state2 is not None and R.at[next_state1, next_state2] == 0:
R.at[s1, s2] = 0 # Update the equivalence to 0 (not equivalent)
changed = True
return R, changed
# Iteratively update the matrix R until no more changes are made
while True:
R, changed = update_equivalence(R, transitions)
if not changed:
break
R # Display the final equivalence matrix R
class NDTreeNode:
def __init__(self, state, tapes, head_positions, parent=None):
self.state = state
self.tapes = tapes
self.head_positions = head_positions
self.parent = parent
self.children = []
class NDTapeTuringMachine:
def __init__(self, states, transitions, start_state, accept_state, blank_symbol, num_tapes):
self.states = states
self.transitions = transitions
self.start_state = start_state
self.accept_state = accept_state
self.blank_symbol = blank_symbol
self.num_tapes = num_tapes
def run(self, input_strings, find_all_paths=False):
initial_tapes = [list(input_string) + [self.blank_symbol] for input_string in input_strings]
initial_heads = [0 for _ in range(self.num_tapes)]
self.root = NDTreeNode(self.start_state, initial_tapes, initial_heads) # Store the root node
self.find_all_paths = find_all_paths
return self._explore(self.root)
def _explore(self, node):
#print(f"Exploring Node: State={node.state}, Tapes={node.tapes}, Heads={node.head_positions}") # Added print statement
if node.state == self.accept_state:
return True
current_symbols = [tape[head] for tape, head in zip(node.tapes, node.head_positions)]
actions = self.transitions.get((node.state, tuple(current_symbols)))
if actions is None:
return False
found_accepting_path = False
for action in actions: # Non-deterministic branching
#print(f"Considering Action: {action}") # Added print statement
new_state, *tape_actions = action
new_tapes = [list(tape) for tape in node.tapes]
new_heads = list(node.head_positions)
# Apply actions for each tape
for i in range(self.num_tapes):
new_symbol, move = tape_actions[i*2], tape_actions[i*2 + 1]
new_tapes[i][new_heads[i]] = new_symbol
if move == "R":
new_heads[i] += 1
if new_heads[i] == len(new_tapes[i]):
new_tapes[i].append(self.blank_symbol)
elif move == "L":
new_heads[i] -= 1
if new_heads[i] < 0:
new_heads[i] = 0
new_tapes[i].insert(0, self.blank_symbol)
child_node = NDTreeNode(new_state, new_tapes, new_heads, node)
node.children.append(child_node)
if self._explore(child_node):
found_accepting_path = True
if not self.find_all_paths:
return True
return found_accepting_path
def get_all_paths(self, node, path=None, all_paths=None):
if all_paths is None:
all_paths = []
if path is None:
path = []
path.append((node.state, [''.join(tape) for tape in node.tapes], node.head_positions))
if not node.children: # If leaf node
all_paths.append(path.copy())
else:
for child in node.children:
self.get_all_paths(child, path, all_paths)
path.pop() # Backtrack
return all_paths
def get_accepting_paths(self, node, path=None, paths=None):
if paths is None:
paths = []
if path is None:
path = []
path.append((node.state, [''.join(tape) for tape in node.tapes], node.head_positions))
if node.state == self.accept_state:
paths.append(path.copy())
else:
for child in node.children:
self.get_accepting_paths(child, path, paths)
path.pop() # Backtrack
return paths
def print_path(self, path):
for state, tapes, heads in path:
print(f"State: {state}")
for i, tape in enumerate(tapes):
head_marker = ' '.join(['^' if j == heads[i] else ' ' for j in range(len(tape))])
print(f"Tape {i + 1}: {tape}\n {head_marker}")
print()
def get_tree_representation(self, node):
# Function to represent the node and its children in a nested list format
def represent_node(n):
node_representation = (n.state, [''.join(tape).strip(self.blank_symbol) for tape in n.tapes], n.head_positions)
children_representations = [represent_node(child) for child in n.children]
return [node_representation] + children_representations
return represent_node(node)
# Example usage for the non-deterministic Turing machine
# Define states, transitions, start_state, accept_state, blank_symbol, num_tapes, and input_strings
num_tapes = 2
states = {"q0", "q1", "q2", "q3", "q4", "q5"}
transitions = {
('q0', ('a', '□')): [('q0', '□', 'R', '□', 'N'), ('q1', '□', 'R', '□', 'N')],
('q0', ('b', '□')): [('q0', '□', 'R', '□', 'R')],
('q1', ('b', '□')): [('q1', '□', 'R', '□', 'L')],
('q1', ('c', '□')): [('q2', '□', 'R', '□', 'R'),('q3', '□', 'R', '□', 'L')],
('q2', ('c', '□')): [('q4', '□', 'R', '□', 'N')],
('q4', ('c', '□')): [('q2', '□', 'R', '□', 'N')],
('q4', ('□', '□')): [('q5', '□', 'R', '□', 'N')],
('q3', ('c', '□')): [('q3', '□', 'R', '□', 'L')],
('q3', ('□', '□')): [('q5', '□', 'N', '□', 'N')],
}
start_state = "q0"
accept_state = "q5"
blank_symbol = "□"
# Example input
input_strings = ["ac", ""]
ndtm = NDTapeTuringMachine(states, transitions, start_state, accept_state, blank_symbol, num_tapes)
#bbbb
#oder
#bbbbc
# Run the machine with the given input
ndtm.run(input_strings,find_all_paths=False)
# Retrieve and print the paths leading to acceptance
tree_representation = ndtm.get_accepting_paths(ndtm.root)
def configuration_to_latex(state, tapes, head_positions):
"""
Convert a Turing Machine configuration tuple to a LaTeX formatted string.
Adjusting the state to move along with the tape head.
"""
state_corrected = state.replace("q", "q_") # Correcting the state format for LaTeX
tape_strs = []
for tape, head_position in zip(tapes, head_positions):
tape_list = list(tape)
if head_position < len(tape_list):
tape_list[head_position] = f"{state_corrected}{tape_list[head_position]}"
tape_str = ''.join(tape_list).replace("□", "\\Box ").replace("•", "\\bullet ")
tape_strs.append(tape_str)
return ' ;& '.join(tape_strs)
def generate_latex_output(all_paths):
"""
Generate LaTeX output for the given paths of a Turing Machine.
Abbreviate consecutive identical transitions.
"""
latex_output = "\\begin{align*}\n"
for path in all_paths:
prev_latex_str = ""
repeat_count = 1
for config in path:
state, tapes, head_positions = config
latex_str = configuration_to_latex(state, tapes, head_positions)
if latex_str == prev_latex_str:
repeat_count += 1
else:
if repeat_count > 1:
latex_output += f"\\vdash^{repeat_count} &" + prev_latex_str + " \\\\\n"
else:
if prev_latex_str:
latex_output += "\\vdash &" + prev_latex_str + " \\\\\n"
repeat_count = 1
prev_latex_str = latex_str
# Add the last configuration
if repeat_count > 1:
latex_output += f"\\vdash^{repeat_count} &" + prev_latex_str + " \\\\\n"
else:
if prev_latex_str:
latex_output += "\\vdash &" + prev_latex_str + " \\\\\n"
latex_output += "\\end{align*}"
return latex_output
# Generate LaTeX output from the tree representation
latex_output = generate_latex_output(tree_representation)
print(latex_output)
class NFA:
def __init__(self, states, alphabet, start_states, final_states, transition_function):
self.states = states
self.alphabet = alphabet
self.start_states = start_states
self.final_states = final_states
self.transition_function = transition_function
def get_transitions(self, state, symbol):
return self.transition_function.get((state, symbol), set())
class DFA:
def __init__(self):
self.states = set()
self.alphabet = set()
self.start_state = None
self.final_states = set()
self.transition_function = {}
def add_state(self, state, is_final=False):
self.states.add(state)
if is_final:
self.final_states.add(state)
def set_start_state(self, state):
self.start_state = state
def add_transition(self, from_state, symbol, to_state):
self.transition_function[(from_state, symbol)] = to_state
def nfa_to_dfa(nfa):
dfa = DFA()
dfa.alphabet = nfa.alphabet
start_state = frozenset(nfa.start_states)
dfa.set_start_state(start_state)
unprocessed_states = [start_state]
dfa.add_state(start_state, start_state & nfa.final_states != set())
while unprocessed_states:
current_state = unprocessed_states.pop()
for symbol in nfa.alphabet:
next_state = frozenset(
sum([list(nfa.get_transitions(state, symbol)) for state in current_state], [])
)
if next_state not in dfa.states:
unprocessed_states.append(next_state)
dfa.add_state(next_state, next_state & nfa.final_states != set())
dfa.add_transition(current_state, symbol, next_state)
return dfa
# Example of usage
nfa_example = NFA(
states={'s0', 's1', 's2', 's3', 's4', 's5'},
alphabet={'a', 'b'},
start_states={'s0', 's3'},
final_states={'s3'},
transition_function={
('s0', 'a'): {'s1'},
('s1', 'a'): {'s1'},
('s2', 'a'): {'s1'},
('s3', 'a'): {'s1'},
('s3', 'b'): {'s4'},
('s4', 'b'): {'s3', 's5'},
('s5', 'a'): {'s1'},
('s5', 'b'): {'s2', 's4'}
}
)
dfa = nfa_to_dfa(nfa_example)
dfa.states, dfa.final_states, dfa.transition_function
# Original grammar
original_grammar = {
'S': ['aSc', 'UW', 'ab', 'Ubc'],
'U': ['aUb', 'ab'],
'W': ['bWc', 'bc']
}
# Terminals replacement mapping
terminals = {'a': 'A', 'b': 'B', 'c': 'C'}
# Function to create a new nonterminal
def create_new_nonterminal(existing_nonterminals, prefix="NT"):
count = len(existing_nonterminals) + 1
new_nonterminal = f"{prefix}{count}"
while new_nonterminal in existing_nonterminals:
count += 1
new_nonterminal = f"{prefix}{count}"
return new_nonterminal
# Adjusted function to binarize a single rule
def binarize_rule_adjusted(rule, grammar, existing_nonterminals):
# Convert rule to a list of symbols if it's a string
rule = list(rule)
while len(rule) > 2:
first_two_symbols = rule[:2]
# Check if a nonterminal already exists for these two symbols
existing_nt = [nt for nt, r in grammar.items() if ''.join(r[0]) == ''.join(first_two_symbols)]
if existing_nt:
new_nonterminal = existing_nt[0]
else:
# Create a new nonterminal for the first two symbols
new_nonterminal = create_new_nonterminal(existing_nonterminals)
existing_nonterminals.add(new_nonterminal)
grammar[new_nonterminal] = [first_two_symbols]
# Replace the first two symbols in the rule with the new nonterminal
rule = [new_nonterminal] + rule[2:]
# Join the symbols in the rule for consistency with the rest of the grammar
return ''.join(rule)
# Adjusted function to binarize all rules in the grammar
def binarize_rules_adjusted(grammar):
new_grammar = {}
existing_nonterminals = set(grammar.keys())
for nonterminal, rules in grammar.items():
new_rules = []
for rule in rules:
new_rule = binarize_rule_adjusted(rule, new_grammar, existing_nonterminals)
new_rules.append(new_rule)
new_grammar[nonterminal] = new_rules
return new_grammar
# Apply the TERM step
grammar = {'S0': ['S']}
grammar.update(original_grammar)
for nonterminal, rules in grammar.items():
new_rules = []
for rule in rules:
if len(rule) > 1:
new_rule = ''.join(terminals.get(symbol, symbol) for symbol in rule)
new_rules.append(new_rule)
else:
new_rules.append(rule)
grammar[nonterminal] = new_rules
for terminal, nonterminal in terminals.items():
grammar[nonterminal] = [terminal]
# Apply the BIN step with the adjusted functions
adjusted_binarized_grammar = binarize_rules_adjusted(grammar)
print(adjusted_binarized_grammar)
class TuringMachine:
def __init__(self, states, transitions, start_state, accept_state, reject_state, blank_symbol):
self.states = states
self.transitions = transitions
self.start_state = start_state
self.accept_state = accept_state
self.reject_state = reject_state
self.blank_symbol = blank_symbol
self.reset()
def reset(self):
self.tape = [self.blank_symbol]
self.current_state = self.start_state
self.head_position = 0
def step(self):
current_symbol = self.tape[self.head_position]
action = self.transitions.get((self.current_state, current_symbol))
if action is None:
return False
new_state, new_symbol, direction = action
self.tape[self.head_position] = new_symbol
self.current_state = new_state
if direction == "R":
self.head_position += 1
if self.head_position == len(self.tape):
self.tape.append(self.blank_symbol)
elif direction == "L":
self.head_position -= 1
if self.head_position < 0:
self.head_position = 0
self.tape.insert(0, self.blank_symbol)
return True
def run(self, input_string):
self.reset()
self.tape = list(input_string) + [self.blank_symbol]
while self.step():
if self.current_state == self.accept_state:
return True
elif self.current_state == self.reject_state:
return False
return self.current_state == self.accept_state
def get_tape(self):
return ''.join(self.tape).strip(self.blank_symbol)
# Defining the Turing Machine
states = {"z0", "z1", "z2", "z3", "ze"}
transitions = {
("z0", "b"): ("z1", "•", "R"),
("z1", "b"): ("z1", "•", "R"),
("z1", "a"): ("z2", "□", "L"),
("z2", "a"): ("z2", "a", "L"),
("z2", "b"): ("z2", "b", "L"),
("z2", "•"): ("z2", "•", "L"),
("z2", "□"): ("z0", "□", "R"),
("z0", "•"): ("z3", "□", "R"),
("z3", "•"): ("z3", "□", "R"),
("z3", "□"): ("ze", "□", "N")
}
start_state = "z0"
accept_state = "ze"
reject_state = None
blank_symbol = "□"
class TuringMachineVerbose(TuringMachine):
def run_verbose(self, input_string):
self.reset()
self.tape = list(input_string) + [self.blank_symbol]
configurations = []
while self.step():
configurations.append((self.current_state, ''.join(self.tape), self.head_position))
if self.current_state == self.accept_state:
break
elif self.current_state == self.reject_state:
break
return configurations
# Creating and running the machine
tm = TuringMachineVerbose(states, transitions, start_state, accept_state, reject_state, blank_symbol)
input_string = "bbabaa"
result = tm.run_verbose(input_string)
# Displaying the results
(input_string, result, tm.get_tape())
@Kreijstal
Copy link
Author

Based on Theoretische Grundlagen der Informatik: Formelsammlung TU-Berlin

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment