Skip to content

Instantly share code, notes, and snippets.

Last active February 3, 2020 13:36
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
module DFA2 : Matching = struct
type t1 = (int * state) ref
and state =
| Split of t1 * t1
| CharSet of CSet.t * t1
| Match
module S =
type t = t1
let compare { contents = (i1, _) } { contents = (i2, _) } =
compare i1 i2
(* [set] represents a state of the DFA (a set of NFA states).
[transitions] are the memoized transitions from this state. *)
type t2 = { transitions : (char, t2) Hashtbl.t; set : S.t }
module Memo = Map.Make(S)
(* The first element of the pair [t] remembers the states of the DFA that we have already met so that
we can get the associated t2 value when we get this state from another transition in the automaton.
The second element of the pair [t] is the starting state of the DFA with its memoized transitions. *)
type t = t2 Memo.t ref * t2
let make_t1 =
let counter = ref 1 in
(fun s ->
if s = Match then
ref (0, s)
let i = !counter in
incr counter;
ref (i, s)
let rec follow ({ contents = (_, s) } as t1) next =
if S.mem t1 next then
let next = S.add t1 next in
match s with
| Split (t1a, t1b) ->
follow t1a next
|> follow t1b
| _ ->
let new_hashtbl () = Hashtbl.create 11
let init re =
let rec init re next =
match re with
| RE.CharSet set ->
make_t1 (CharSet (set, next))
| RE.Concatenation (re1, re2) ->
init re2 next
|> init re1
| RE.Union (re1, re2) ->
make_t1 (Split (init re1 next, init re2 next))
| RE.ZeroOrOne re ->
make_t1 (Split (init re next, next))
| RE.ZeroOrMore re ->
let t1 = make_t1 Match in
let res = make_t1 (Split (init re t1, next)) in
t1 := !res;
| RE.OneOrMore re ->
let t1 = make_t1 Match in
let res = init re t1 in
t1 := !(make_t1 (Split (res, next)));
(* [set] is the starting state of the DFA. *)
let set =
let nfa_start = init re (make_t1 Match) in
follow nfa_start S.empty
(* [t2] puts together the starting state with the memoized transitions from this state (initially empty). *)
let t2 = { transitions = new_hashtbl (); set } in
(* We memoize the association [set] --> [t2] to be able to get this [t2] if we reach [set] from a transition
during the construction. *)
ref (Memo.singleton set t2), t2
let full_match (memo, t2) s =
let hit = ref 0 in
let miss = ref 0 in
let step { transitions; set } c =
(* We first test if this transition has already been seen. *)
let res = Hashtbl.find transitions c in
incr hit;
with Not_found ->
incr miss;
(* If this is not the case, we have to build the new state. *)
let set' =
(fun { contents = (_, s) } next ->
match s with
| CharSet (chars, t1) when CSet.mem c chars ->
follow t1 next
| _ ->
) set S.empty
let res =
(* We try to find [set'] in [memo] to get the associated [t2] if we already found this state from another
transition. *)
Memo.find set' !memo
with Not_found ->
(* If not, we need to build a new [t2]. *)
let t2 = { transitions = new_hashtbl (); set = set' } in
(* We then remember the association [set'] --> [t2] in [memo] in case we get to [set'] afterward from another
transition. *)
memo := Memo.add set' t2 !memo;
(* We remember the new transition [set] --[c]--> [set'] in the DFA. *)
Hashtbl.add transitions c res;
let rec full_match l t2 =
match l with
| [] ->
S.mem (make_t1 Match) t2.set
| c :: r ->
step t2 c
|> full_match r
let res = full_match (explode s) t2 in
Printf.printf "states: %d, hits: %d, misses: %d\n" (Memo.cardinal !memo) !hit !miss;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment