Skip to content

Instantly share code, notes, and snippets.

@theSage21
Created May 14, 2016 07:24
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save theSage21/c2da376a26c0000774a257e32a51ce34 to your computer and use it in GitHub Desktop.
Save theSage21/c2da376a26c0000774a257e32a51ce34 to your computer and use it in GitHub Desktop.
A very simple regular expression engine for learning purposes
# Define some special things
SPECIAL = '*|'
ALPHABET = '10'
EPSILON = None
# functions
def automaton_print(automaton):
"Neatly prints the automaton"
states, alphabet, start, final, transfer = automaton
print('STATES : ', states)
print('ALPHABET: ', alphabet)
print('START : ', start)
print('FINAL : ', final)
print('TABLE : ', '(state, symbol)--->{states}')
print('--------------------------------------')
for key, value in sorted(transfer.items()):
print(' ', key, ' ---- > ', value)
def merge_tables(table1, table2):
"""
Merge the transition tables of two NFAs while renaming the states
by appending _nfa1 and _nfa2 to the state names
"""
table = {(key[0]+'_nfa1', key[1]): {i+'_nfa1' for i in value} for key, value in table1.items()}
table.update({(key[0]+'_nfa2', key[1]): {i+'_nfa2' for i in value} for key, value in table2.items()})
return table
def relabel_nfa(automaton):
"""
Relabels automaton states and transitions with
new numbers
"""
states, alphabet, start, final, table = automaton
state_relabels = {state: str(index + 1) for index, state in enumerate(states)}
new_states = {state_relabels[i] for i in states}
new_start = state_relabels[start]
new_final = {state_relabels[i] for i in final}
new_table = {(state_relabels[key[0]], key[1]): {state_relabels[i] for i in value}
for key, value in table.items()}
new_automaton = (new_states, alphabet, new_start, new_final, new_table)
return new_automaton
def concat(nfa1, nfa2):
"Concatenate the two nfas"
states1, _alphabet, start1, final1, table1 = nfa1
states2, _alphabet, start2, final2, table2 = nfa2
table = merge_tables(table1, table2)
states = {i+'_nfa1' for i in states1}.union({i+'_nfa2' for i in states2})
new_state_name_map = {name: str(1 + index) for index, name in enumerate(states)}
new_state_name_map['end_nfa1'] = new_state_name_map['start_nfa2'] # These are the same states
new_state_name_map['start_nfa1'] = 'start'
new_state_name_map['end_nfa2'] = 'end'
new_states = {new_state_name_map[i] for i in states}
# Replace in table
new_table = {}
for key, value in table.items():
new_key = (new_state_name_map[key[0]], key[1])
if new_key not in new_table:
new_table[new_key] = {new_state_name_map[i] for i in value}
# Make the NFA
NFA = (new_states, ALPHABET, 'start', {'end'}, new_table)
return NFA
def union(nfa1, nfa2):
"""
Make union of the two nfa
"""
# Expand the nfas
states1, _alphabet, start1, final1, table1 = nfa1
states2, _alphabet, start2, final2, table2 = nfa2
table = merge_tables(table1, table2)
table.update({('start', EPSILON): {'start_nfa1', 'start_nfa2'},
('end_nfa1', EPSILON): {'end'},
('end_nfa2', EPSILON): {'end'},
})
states = {i+'_nfa1' for i in states1}.union({i+'_nfa2' for i in states2})
new_state_name_map = {name: str(1 + index) for index, name in enumerate(states)}
new_state_name_map['start'] = 'start'
new_state_name_map['end'] = 'end'
states = states.union({'start', 'end'})
new_states = {new_state_name_map[i] for i in states}
# Replace in table
new_table = {}
for key, value in table.items():
new_key = (new_state_name_map[key[0]], key[1])
if new_key not in new_table:
new_table[new_key] = {new_state_name_map[i] for i in value}
# Make the NFA
NFA = (new_states, ALPHABET, 'start', {'end'}, new_table)
return NFA
def kleen(nfa):
"""
Create kleen closure of this nfa
"""
states, _alphabet, start, final, table = nfa
new_state_name_map = {name: str(1 + index) for index, name in enumerate(states)}
new_states = {new_state_name_map[i] for i in states}
new_table = {(new_state_name_map[key[0]], key[1]): {new_state_name_map[i] for i in value}
for key, value in table.items()}
new_states = new_states.union({'start', 'end'})
new_table.update({('start', EPSILON):{'end', new_state_name_map['start']},
(new_state_name_map['end'], EPSILON):{'end', new_state_name_map['start']},
})
NFA = (new_states, ALPHABET, 'start', {'end'}, new_table)
return NFA
def make_nfa(pattern):
"""
Make an NFA from a pattern
"""
if isinstance(pattern, list):
pattern = ''.join(pattern)
assert all([i in ALPHABET + SPECIAL for i in pattern]) # everything is in ALPHABET + SPECIAL
if len(pattern) > 1:
head, *tail = pattern
if tail[0] not in SPECIAL:
NFA = concat(make_nfa(head), make_nfa(tail))
else:
symbol, *tail = tail
if symbol == '|':
other, *tail = tail
union_nfa = union(make_nfa(head), make_nfa(other))
NFA = concat(union_nfa, make_nfa(tail))
if symbol == '*':
NFA = concat(kleen(make_nfa(head)),
make_nfa(tail))
elif len(pattern) == 1: # single match
states = {'start', 'end'}
start = 'start'
final = 'end'
table = {('start', pattern): {'end'}}
NFA = (states, ALPHABET, start, final, table)
else: # empty transition
states = {'start', 'end'}
start = 'start'
final = 'end'
table = {('start', EPSILON): {'end'}}
NFA = (states, ALPHABET, start, final, table)
return NFA
def epsilon_closure(state, table):
"""
find epsilon closure of a given state
using the given transition table
"""
done, not_done = [], [state]
# Build a tree with this state as root
while not_done:
to_do = []
for s in not_done:
epsilon_from_s = {tuple(v) for k, v in table.items() if ((k[0] == s) and (k[1] is None))}
[to_do.append(i) for j in epsilon_from_s for i in j if i not in done]
done.append(s)
not_done = to_do
return set(done)
def to_nfa_without_epsilon_transitions(nfa):
"""Removes epsilon transition from nfa"""
states, alphabet, start, final, table = nfa
closure_map = {i: frozenset(epsilon_closure(i, table)) for i in states}
new_states = {closure_map[i] for i in states}
new_start = closure_map[start]
new_final = {i for i in closure_map.values() if any(((f in i) for f in final))}
new_table = {}
for new_state in new_states:
for symbol in alphabet:
key = (new_state, symbol)
target = set()
for member in new_state:
if (member, symbol) in table.keys():
member_targets = table[(member, symbol)]
new_targets = {i for i in new_states for mt in member_targets if mt in i}
target = target.union(new_targets)
if len(target) > 0:
new_table[key] = target
# Relabel to avoid frozenset floods
NFA = (new_states, alphabet, new_start, new_final, new_table)
NFA = relabel_nfa(NFA)
return NFA
def remove_unreachable(nfa):
"""
Removes unreachable states and transitions
from given nfa
"""
states, alphabet, start, final, table = nfa
new_states, new_final, new_table = set(), set(), dict()
done, to_do = set(), set([start])
while to_do:
targets = set()
for state in to_do - done:
for symbol in alphabet:
key = (state, symbol)
if key in table.keys():
new_table[key] = table[key]
targets = targets.union(table[key])
done.add(state)
to_do = targets
new_states = states.intersection(done)
new_final = {i for i in final if i in done}
NFA = (new_states, alphabet, start, new_final, new_table)
return NFA
def to_dfa(nfa):
"""
Convert given NFA to DFA
NFA must be non epsilon move
"""
states, alphabet, start, final, table = nfa
new_table = dict()
# We create states starting from the start state
states_done, states_to_do = set(), [{start}]
while states_to_do:
target_list = []
for current_states in states_to_do:
for symbol in alphabet:
target_states = set()
for state in current_states:
key = (state, symbol)
if key in table:
targets = table[key]
target_states = target_states.union(targets)
if (target_states not in states_done) or\
((frozenset(current_states), symbol) not in new_table.keys()):
if len(target_states) > 0:
new_table[(frozenset(current_states), symbol)] = target_states
states_done.add(frozenset(current_states))
if len(target_states) > 0:
target_list.append(frozenset(target_states))
states_to_do = set(target_list) - set(states_done)
# start and end states
new_start = frozenset({start})
new_states = states_done
new_final = {i for i in new_states if any(((f in i) for f in final))}
state_relabels = {frozenset(state): str(index + 1) for index, state in enumerate(new_states)}
new_states = {state_relabels[i] for i in new_states}
new_start = state_relabels[new_start]
new_final = {state_relabels[i] for i in new_final}
new_table = {(state_relabels[key[0]], key[1]): state_relabels[frozenset(value)]
for key, value in new_table.items()}
DFA = (new_states, alphabet, new_start, new_final, new_table)
return DFA
def run_dfa(dfa, string):
"""
Runs the given DFA on the given input string
"""
states, alphabet, start, final, table = dfa
current_state = start
for symbol in string:
if symbol not in alphabet:
return False
key = (current_state, symbol)
try:
current_state = table[key]
except KeyError:
return False
return current_state in final
if __name__ == '__main__':
pattern = '0|10*'
print(pattern)
automaton = make_nfa(pattern)
automaton = to_nfa_without_epsilon_transitions(automaton)
automaton = remove_unreachable(automaton)
automaton = to_dfa(automaton)
automaton_print(automaton)
strings_to_run_on = ['1',
'0',
'01',
'100',
'11111'
]
for string in strings_to_run_on:
print(string, run_dfa(automaton, string))
def test():
"Test stuff"
def test_epsilon_closure():
state = '1'
table = {('1', None): {'2', '3', '4'},
('2', '1'): {'5'},
('3', '1'): {'4'},
('4', None): {'6', '7'},
('4', '1'): {'9'},
('6', None): {'2', '8'},
('7', None): {'1'},
('8', None): {'3'}
}
x = (epsilon_closure(state, table))
assert set(x) == {'1', '2', '3', '4', '6', '7', '8'}
print('Epsilon closure working ok')
# Run the tests
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment