Skip to content

Instantly share code, notes, and snippets.

@lascar-pacagi lascar-pacagi/dfa2.ml
Last active Feb 3, 2020

Embed
What would you like to do?
module DFA2 : Matching = struct
type t1 = (int * state) ref
and state =
| Split of t1 * t1
| CharSet of CSet.t * t1
| Match
module S =
Set.Make(
struct
type t = t1
let compare { contents = (i1, _) } { contents = (i2, _) } =
compare i1 i2
end)
(* [set] represents a state of the DFA (a set of NFA states).
[transitions] are the memoized transitions from this state. *)
type t2 = { transitions : (char, t2) Hashtbl.t; set : S.t }
module Memo = Map.Make(S)
(* The first element of the pair [t] remembers the states of the DFA that we have already met so that
we can get the associated t2 value when we get this state from another transition in the automaton.
The second element of the pair [t] is the starting state of the DFA with its memoized transitions. *)
type t = t2 Memo.t ref * t2
let make_t1 =
let counter = ref 1 in
(fun s ->
if s = Match then
ref (0, s)
else
begin
let i = !counter in
incr counter;
ref (i, s)
end)
let rec follow ({ contents = (_, s) } as t1) next =
if S.mem t1 next then
next
else
let next = S.add t1 next in
match s with
| Split (t1a, t1b) ->
follow t1a next
|> follow t1b
| _ ->
next
let new_hashtbl () = Hashtbl.create 11
let init re =
let rec init re next =
match re with
| RE.CharSet set ->
make_t1 (CharSet (set, next))
| RE.Concatenation (re1, re2) ->
init re2 next
|> init re1
| RE.Union (re1, re2) ->
make_t1 (Split (init re1 next, init re2 next))
| RE.ZeroOrOne re ->
make_t1 (Split (init re next, next))
| RE.ZeroOrMore re ->
let t1 = make_t1 Match in
let res = make_t1 (Split (init re t1, next)) in
t1 := !res;
res
| RE.OneOrMore re ->
let t1 = make_t1 Match in
let res = init re t1 in
t1 := !(make_t1 (Split (res, next)));
res
in
(* [set] is the starting state of the DFA. *)
let set =
let nfa_start = init re (make_t1 Match) in
follow nfa_start S.empty
in
(* [t2] puts together the starting state with the memoized transitions from this state (initially empty). *)
let t2 = { transitions = new_hashtbl (); set } in
(* We memoize the association [set] --> [t2] to be able to get this [t2] if we reach [set] from a transition
during the construction. *)
ref (Memo.singleton set t2), t2
let full_match (memo, t2) s =
let hit = ref 0 in
let miss = ref 0 in
let step { transitions; set } c =
try
(* We first test if this transition has already been seen. *)
let res = Hashtbl.find transitions c in
incr hit;
res
with Not_found ->
incr miss;
(* If this is not the case, we have to build the new state. *)
let set' =
S.fold
(fun { contents = (_, s) } next ->
match s with
| CharSet (chars, t1) when CSet.mem c chars ->
follow t1 next
| _ ->
next
) set S.empty
in
let res =
try
(* We try to find [set'] in [memo] to get the associated [t2] if we already found this state from another
transition. *)
Memo.find set' !memo
with Not_found ->
(* If not, we need to build a new [t2]. *)
let t2 = { transitions = new_hashtbl (); set = set' } in
(* We then remember the association [set'] --> [t2] in [memo] in case we get to [set'] afterward from another
transition. *)
memo := Memo.add set' t2 !memo;
t2
in
(* We remember the new transition [set] --[c]--> [set'] in the DFA. *)
Hashtbl.add transitions c res;
res
in
let rec full_match l t2 =
match l with
| [] ->
S.mem (make_t1 Match) t2.set
| c :: r ->
step t2 c
|> full_match r
in
let res = full_match (explode s) t2 in
Printf.printf "states: %d, hits: %d, misses: %d\n" (Memo.cardinal !memo) !hit !miss;
res
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.