Skip to content

Instantly share code, notes, and snippets.

@talyian
Created September 29, 2018 19:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save talyian/b0c3fa9f84c6b2d6e83a69bc2fbed54c to your computer and use it in GitHub Desktop.
Save talyian/b0c3fa9f84c6b2d6e83a69bc2fbed54c to your computer and use it in GitHub Desktop.
Convert a Regex to NFA to DFA
// An exercise in converting a regex to an NFA to a DFA
type RegexNode =
| Or of RegexNode list
| Seq of RegexNode List
| Star of RegexNode
| T of string
| Eta (* todo: does this value really belong here? *)
let regex_pattern = "x(x|y)*|z"
let regex = Or [ Seq [ T "x"; Star (Or [T "x"; T"y"])]; T "z"]
type Node = { n: int32 }
type 't Graph = {
nodes: ResizeArray<int32>
names: ResizeArray<string>
edges: ResizeArray<(int32 * int32 * 't)>
}
with
override __.ToString() =
let out = new System.IO.StringWriter()
let edges_by_first = __.edges |> Seq.groupBy (fun (a, b, c) -> a)
fprintf out "--- Terms: "
for x in __.names do fprintf out "[%O], " x
fprintfn out ""
for a, group in edges_by_first do
fprintfn out "[%s]" (__.names.[a])
for (a, b, c) in group do
fprintfn out " [%O] : %O" (__.names.[b]) c
out.ToString()
let NfaFromRegex (regex:RegexNode) =
let nodes = new ResizeArray<int32>()
let names = new ResizeArray<string>()
let edges = new ResizeArray<(int32 * int32 * RegexNode)>()
let newstate (name:string) =
let state = nodes.Count
names.Add name
nodes.Add state; {n=state}
let remove_edge_at i =
edges.[i] <- edges.[edges.Count - 1]
edges.RemoveAt(edges.Count - 1)
let a = newstate "start"
let b = newstate "end"
edges.Add(a.n, b.n, regex)
let mutable i = 0
while i < edges.Count do
match edges.[i] with
| (a, b, Or items) ->
remove_edge_at i
for x in items do
edges.Add(a, b, x)
| (a, b, Seq []) -> remove_edge_at i
| (a, b, Seq [x]) -> edges.[i] <- (a, b, x)
| (a, b, Seq items) ->
remove_edge_at i
for j in 1..items.Length - 1 do
let midstate = newstate(string nodes.Count)
edges.Add(a, midstate.n, items.[j-1])
edges.Add(midstate.n, b, items.[j])
| (a, b, Star inner) ->
remove_edge_at i
let start_inner = newstate(string nodes.Count)
let stop_inner = newstate(string nodes.Count)
edges.Add(a, start_inner.n, Eta)
edges.Add(a, b, Eta)
edges.Add(start_inner.n, stop_inner.n, inner)
edges.Add(stop_inner.n, b, Eta)
edges.Add(stop_inner.n, start_inner.n, Eta)
| _ -> i <- i + 1
{edges=edges;nodes=nodes;names=names}
let rec closure func starting_set =
let result_set = Seq.fold (fun s item -> Set.union (func item) s) Set.empty starting_set
if result_set =
starting_set then result_set
else
closure func result_set
let DfaFromNfa (nfa:'t Graph) (eta: 't) =
let eta_transitions = nfa.edges |> Seq.filter (fun (_from, _to, _val) -> _val = eta)
let self_transitions = nfa.nodes |> Seq.map (fun i -> (i, i, eta))
let transitions = Seq.concat [eta_transitions; self_transitions] |> Seq.toList
let etas_by_source =
transitions
|> Seq.groupBy (fun (a,b,c) -> a)
|> Seq.map (fun (src, items) -> src, Seq.map (fun (a,b,c) -> b) items |> Set.ofSeq)
|> List.ofSeq
let eta_closure (src, dests) =
let links d = etas_by_source |> List.find (fun (a, b) -> a = src) |> snd
src, closure links dests
let etas = List.map eta_closure etas_by_source
let transitions =
nfa.edges
|> Seq.filter (fun (_from, _to, _val) -> _val <> eta)
|> Seq.groupBy (fun (_from, _to, _val) -> _from)
|> Seq.map (fun (a, vals) -> a, (vals |> Seq.map (fun (a, b, c) -> c, b) |> List.ofSeq))
|> List.ofSeq
let get_transitions nfa_state =
transitions |> List.collect (fun (a, rest) -> if Set.contains a nfa_state then rest else [])
let dfaStates = ResizeArray<Set<int32>>()
let dfaEdges = ResizeArray<(int32 * int32 * 't)>()
let get_dfa_state item =
match dfaStates.FindIndex (fun a -> a = item) with
| -1 ->
dfaStates.Add item
dfaStates.Count - 1
| n -> n
dfaStates.Add(set [0])
let mutable i = 0
while i < dfaStates.Count do
for sym, nfa_state in get_transitions dfaStates.[i] do
let eta_closure =
List.pick (fun (b, e) -> if b = nfa_state then Some e else None) etas
let dfa_b = get_dfa_state eta_closure
dfaEdges.Add(i, dfa_b, sym)
i <- i + 1
{ Graph.edges=dfaEdges
Graph.nodes=ResizeArray([0..dfaStates.Count-1])
Graph.names= dfaStates.ConvertAll(fun x -> (string x).[4..].Trim('[', ']'))}
let nfa = NfaFromRegex regex
let dfa = DfaFromNfa nfa Eta
printfn "%O" nfa
printfn "%O" dfa
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment