Created
March 5, 2017 18:50
-
-
Save hclarke/21fcbe24515301dc7e343697e142b006 to your computer and use it in GitHub Desktop.
nondeterministic finite automata implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(dead_code)] | |
use std::collections::{HashMap,HashSet}; | |
struct NFA { | |
state_count : usize, //0 is start state, -1 is accept | |
alphabet : HashMap<char, HashMap<i64, HashSet<i64>>>, | |
empty_transitions : HashMap<i64, HashSet<i64>>, | |
} | |
fn offset_state(state : i64, offset : usize) -> i64 { | |
if state > 0 { | |
state + ( offset as i64 ) | |
} | |
else { | |
state | |
} | |
} | |
impl NFA { | |
fn state_size(&self) -> usize { | |
(self.state_count+7)/8 | |
} | |
fn new() -> NFA { | |
NFA { | |
state_count : 0, | |
alphabet : HashMap::new(), | |
empty_transitions : HashMap::new(), | |
} | |
} | |
fn is_accepted(&self, states : &HashSet<i64>) -> bool { | |
states.contains(&-1) | |
} | |
fn is_match(&self, s : &str) -> bool { | |
let mut states = HashSet::new(); | |
states.insert(0); | |
self.apply_empty_transitions(&mut states); | |
for c in s.chars() { | |
states = self.apply_transitions(states, c); | |
self.apply_empty_transitions(&mut states); | |
} | |
return self.is_accepted(&states); | |
} | |
fn apply_transitions(&self, states : HashSet<i64>, c : char) -> HashSet<i64> { | |
let mut r = HashSet::new(); | |
if let Some(ts) = self.alphabet.get(&c) { | |
for state in states { | |
if let Some(nexts) = ts.get(&state) { | |
for next in nexts { | |
r.insert(*next); | |
} | |
} | |
} | |
} | |
r | |
} | |
fn apply_empty_transitions(&self, states : &mut HashSet<i64>) { | |
let mut sv : Vec<_> = states.iter().map(|x| *x).collect(); | |
let mut i = 0; | |
while i < sv.len() { | |
let state = sv[i]; | |
i += 1; | |
if let Some(nexts) = self.empty_transitions.get(&state) { | |
for next in nexts { | |
if states.insert(*next) { | |
sv.push(*next); | |
} | |
} | |
} | |
} | |
} | |
fn accept_char(c : char) -> NFA { | |
let mut nfa = NFA::new(); | |
nfa.add_transition(c,0,-1); | |
nfa | |
} | |
fn and(mut self, mut rhs : NFA) -> NFA { | |
//this is hacky and depends on or impl | |
let end = (self.state_count + 1) as i64; | |
self.retarget(-1,end); | |
rhs.retarget(0,end); | |
rhs.state_count += 1; //super hax | |
return self.or(rhs); | |
} | |
fn or(mut self, rhs : NFA) -> NFA { | |
let offset = self.state_count; | |
for (s, ns) in rhs.empty_transitions { | |
let os = offset_state(s, offset); | |
for n in ns { | |
let on = offset_state(n,offset); | |
self.add_empty_transition(os,on); | |
} | |
} | |
for (c, transitions) in rhs.alphabet { | |
for (s, ns) in transitions { | |
let os = offset_state(s, offset); | |
for n in ns { | |
let on = offset_state(n,offset); | |
self.add_transition(c,os,on); | |
} | |
} | |
} | |
self | |
} | |
fn many(mut self) -> NFA { | |
self.add_empty_transition(-1,0); | |
self.add_empty_transition(0,-1); | |
self | |
} | |
fn add_empty_transition(&mut self, from : i64, to : i64) { | |
if from > (self.state_count as i64) { | |
self.state_count = from as usize; | |
} | |
if to > (self.state_count as i64) { | |
self.state_count = to as usize; | |
} | |
if !self.empty_transitions.contains_key(&from) { | |
self.empty_transitions.insert(from,HashSet::new()); | |
} | |
let transitions = self.empty_transitions.get_mut(&from).unwrap(); | |
transitions.insert(to); | |
} | |
fn retarget(&mut self, from : i64, to : i64) { | |
if let Some(from_transitions) = self.empty_transitions.remove(&from) { | |
if self.empty_transitions.contains_key(&to) { | |
self.empty_transitions.get_mut(&to).unwrap().extend(from_transitions); | |
} | |
else { | |
self.empty_transitions.insert(to,from_transitions); | |
} | |
} | |
for ns in self.empty_transitions.values_mut() { | |
if ns.remove(&from) { | |
ns.insert(to); | |
} | |
} | |
for ts in self.alphabet.values_mut() { | |
if let Some(from_transitions) = ts.remove(&from) { | |
if ts.contains_key(&to) { | |
ts.get_mut(&to).unwrap().extend(from_transitions); | |
} | |
else { | |
ts.insert(to,from_transitions); | |
} | |
} | |
for ns in ts.values_mut() { | |
if ns.remove(&from) { | |
ns.insert(to); | |
} | |
} | |
} | |
} | |
fn add_transition(&mut self, c : char, from : i64, to : i64) { | |
if from > (self.state_count as i64) { | |
self.state_count = from as usize; | |
} | |
if to > (self.state_count as i64) { | |
self.state_count = to as usize; | |
} | |
if !self.alphabet.contains_key(&c) { | |
self.alphabet.insert(c, HashMap::new()); | |
} | |
let transition_map = self.alphabet.get_mut(&c).unwrap(); | |
if !transition_map.contains_key(&from) { | |
transition_map.insert(from,HashSet::new()); | |
} | |
let transitions = transition_map.get_mut(&from).unwrap(); | |
transitions.insert(to); | |
} | |
} | |
fn main() { | |
let a = NFA::accept_char('a'); | |
let b = NFA::accept_char('b'); | |
assert!(a.is_match("a")); | |
assert!(b.is_match("b")); | |
assert!(!a.is_match("b")); | |
assert!(!a.is_match("aa")); | |
let and_ab = NFA::accept_char('a').and(NFA::accept_char('b')); | |
assert!(and_ab.is_match("ab")); | |
assert!(!and_ab.is_match("ba")); | |
assert!(!and_ab.is_match("a")); | |
assert!(!and_ab.is_match("b")); | |
assert!(!and_ab.is_match("abb")); | |
assert!(!and_ab.is_match("aba")); | |
let or_ab = NFA::accept_char('a').or(NFA::accept_char('b')); | |
assert!(or_ab.is_match("a")); | |
assert!(or_ab.is_match("b")); | |
assert!(!or_ab.is_match("c")); | |
let many_or_ab = NFA::accept_char('a').or(NFA::accept_char('b')).many(); | |
assert!(many_or_ab.is_match("")); | |
assert!(many_or_ab.is_match("a")); | |
assert!(many_or_ab.is_match("b")); | |
assert!(many_or_ab.is_match("aa")); | |
assert!(many_or_ab.is_match("ab")); | |
assert!(many_or_ab.is_match("ba")); | |
assert!(many_or_ab.is_match("bb")); | |
assert!(!many_or_ab.is_match("bacb")); | |
let many_and_ab = NFA::accept_char('a').and(NFA::accept_char('b')).many(); | |
assert!(many_and_ab.is_match("")); | |
assert!(many_and_ab.is_match("ab")); | |
assert!(many_and_ab.is_match("abab")); | |
assert!(!many_and_ab.is_match("a")); | |
assert!(!many_and_ab.is_match("b")); | |
assert!(!many_and_ab.is_match("aba")); | |
assert!(!many_and_ab.is_match("ac")); | |
assert!(!many_and_ab.is_match("abb")); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment