Skip to content

Instantly share code, notes, and snippets.

@HParker
Created April 14, 2023 22:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HParker/7b541a360c2e475f2f83c2208bad854b to your computer and use it in GitHub Desktop.
Save HParker/7b541a360c2e475f2f83c2208bad854b to your computer and use it in GitHub Desktop.
# coding: utf-8
# Steps to convert a regular expression into a NFA
#
# 1. create NFAs for each trivial part of the regular expression.
# 2. join them together replacing the transition part with one of the trivial parts created above.
# 3. mark the final state a success
require 'set'
class State
attr_reader :id
@@id = 0
def initialize
@id = @@id
@@id += 1
end
def self.reset!
@@id = 0
end
end
class Transition
attr_reader :from
attr_reader :to
attr_reader :char
def initialize(from, to, char)
if from.nil? || to.nil? || char.nil?
raise "WOW NO NILS! #{[from, to, char]}"
end
@from = from
@to = to
@char = char
end
end
class FA
def dot
output = "digraph G {\n"
output.concat " rankdir=LR;\n"
output.concat " size=\"8,5\";\n"
@states.each do |s|
if @finish == s
output.concat " \"#{s.id}\" [shape=\"doublecircle\"]\n"
else
output.concat " \"#{s.id}\" [shape=\"circle\"]\n"
end
end
@transitions.each do |t|
if t.char == "epsilon"
output.concat " \"#{t.from.id}\" -> \"#{t.to.id}\" [label=\"ε\"];\n"
else
output.concat " \"#{t.from.id}\" -> \"#{t.to.id}\" [label=\"#{t.char}\"];\n"
end
end
output.concat "}\n"
puts output
end
def ascii
@transitions.each do |t|
puts "(#{t.from.id}) -#{t.char}-> (#{t.to.id})";
end
end
def merge(nfa)
@transitions.concat(nfa.transitions)
@states.concat(nfa.states)
end
def new_transition(start, finish, char)
transition = Transition.new(start, finish, char)
@transitions << transition
transition
end
def new_state
state = State.new
@states << state
state
end
end
class DFA < FA
attr_reader :transitions, :states, :start, :finish
attr_writer :start, :finish
def initialize
@start = nil
@finish = nil
@transitions = []
@states = []
@end_states = []
end
# TODO: hopcroft DFA minimization
def self.from_nfa(nfa)
dfa = self.new
dfa.start = dfa.new_state
cur_dfa_state = dfa.start
states_to_resolve = [[nfa.start.id]]
nfa_state_ids_to_dfa_states = {}
nfa_state_ids_to_dfa_states[[nfa.start.id]] = dfa.start
while !states_to_resolve.empty?
cur_nfa_states = states_to_resolve.pop
cur_dfa_state = nfa_state_ids_to_dfa_states[cur_nfa_states]
nfa.uniq_transition_keys.each do |transition_key|
state_ids = []
ids = nfa.epsilon_closure(cur_nfa_states, char: transition_key)
state_ids.concat(ids)
state_ids.uniq!
state_ids.sort!
if !state_ids.empty?
if nfa_state_ids_to_dfa_states[state_ids].nil?
new_state = dfa.new_state
nfa_state_ids_to_dfa_states[state_ids] = new_state
dfa.new_transition(cur_dfa_state, new_state, transition_key)
if !state_ids.empty?
states_to_resolve << state_ids
end
else
dfa.new_transition(cur_dfa_state, nfa_state_ids_to_dfa_states[state_ids], transition_key)
end
end
state_ids = []
end
end
dfa
end
end
class NFA < FA
attr_reader :transitions, :states, :start, :finish
attr_writer :start, :finish
def initialize
@start = nil
@finish = nil
@transitions = []
@states = []
@end_states = []
end
def uniq_transition_keys
s = Set.new
@transitions.each do |t|
if t.char != "epsilon"
s.add(t.char)
end
end
s.to_a
end
def to_dfa
DFA.from_nfa(self)
end
def epsilon_closure(state_ids, char: nil)
states = []
@transitions.each do |t|
if state_ids.include?(t.from.id) && t.char == char
states << t.to.id
states.concat(epsilon_closure([t.to.id], char: "epsilon"))
end
end
states
end
def self.from_string(string)
nfa = nil
last_nfa = nil
i = 0
chars = string.chars
while i < chars.size
char = chars[i]
puts "PROCESSING #{char}"
if char == "*"
if !last_nfa.nil?
nfa = NFA.concatination(nfa, NFA.closure(last_nfa))
last_nfa = nil
else
nfa = NFA.closure(nfa)
end
elsif char == "|"
if !last_nfa.nil?
nfa = NFA.concatination(nfa, last_nfa)
last_nfa = nil
end
alternative_path, new_index = from_string(chars.last(chars.size - (i + 1)).join)
nfa = NFA.alternation(nfa, alternative_path)
return [nfa, new_index + i + 1]
elsif char == "("
if !last_nfa.nil?
nfa = NFA.concatination(nfa, last_nfa)
last_nfa = nil
end
parenthetical, new_index = from_string(chars.last(chars.size - (i + 1)).join)
i = new_index + i
last_nfa = parenthetical
elsif char == ")"
if !last_nfa.nil?
nfa = NFA.concatination(nfa, last_nfa)
last_nfa = nil
end
return [nfa, i + 1]
else
if !last_nfa.nil?
nfa = NFA.concatination(nfa, last_nfa)
end
if nfa.nil?
nfa = NFA.simple(char)
else
last_nfa = NFA.simple(char)
end
#
end
i += 1
end
[nfa, i]
end
def self.simple(char)
nfa = self.new
nfa.start = nfa.new_state
nfa.finish = nfa.new_state
nfa.new_transition(nfa.start, nfa.finish, char)
nfa
end
def self.concatination(left_nfa, right_nfa)
nfa = self.new
nfa.start = left_nfa.start
nfa.finish = right_nfa.finish
nfa.merge(left_nfa)
nfa.merge(right_nfa)
nfa.new_transition(left_nfa.finish, right_nfa.start, "epsilon")
nfa
end
def self.alternation(left_nfa, right_nfa)
nfa = self.new
nfa.start = nfa.new_state
nfa.finish = nfa.new_state
nfa.merge(left_nfa)
nfa.merge(right_nfa)
nfa.new_transition(nfa.start, left_nfa.start, "epsilon")
nfa.new_transition(nfa.start, right_nfa.start, "epsilon")
nfa.new_transition(left_nfa.finish, nfa.finish, "epsilon")
nfa.new_transition(right_nfa.finish, nfa.finish,"epsilon")
nfa
end
def self.closure(inner_nfa)
nfa = self.new
nfa.merge(inner_nfa)
nfa.start = nfa.new_state
nfa.finish = nfa.new_state
nfa.new_transition(nfa.start, inner_nfa.start, "epsilon")
nfa.new_transition(inner_nfa.finish, nfa.finish, "epsilon")
nfa.new_transition(nfa.start, nfa.finish, "epsilon")
nfa.new_transition(inner_nfa.finish, inner_nfa.start, "epsilon")
nfa
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment