This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module DFA2 : Matching = struct | |
type t1 = (int * state) ref | |
and state = | |
| Split of t1 * t1 | |
| CharSet of CSet.t * t1 | |
| Match | |
module S = | |
Set.Make( | |
struct | |
type t = t1 | |
let compare { contents = (i1, _) } { contents = (i2, _) } = | |
compare i1 i2 | |
end) | |
(* [set] represents a state of the DFA (a set of NFA states). | |
[transitions] are the memoized transitions from this state. *) | |
type t2 = { transitions : (char, t2) Hashtbl.t; set : S.t } | |
module Memo = Map.Make(S) | |
(* The first element of the pair [t] remembers the states of the DFA that we have already met so that | |
we can get the associated t2 value when we get this state from another transition in the automaton. | |
The second element of the pair [t] is the starting state of the DFA with its memoized transitions. *) | |
type t = t2 Memo.t ref * t2 | |
let make_t1 = | |
let counter = ref 1 in | |
(fun s -> | |
if s = Match then | |
ref (0, s) | |
else | |
begin | |
let i = !counter in | |
incr counter; | |
ref (i, s) | |
end) | |
let rec follow ({ contents = (_, s) } as t1) next = | |
if S.mem t1 next then | |
next | |
else | |
let next = S.add t1 next in | |
match s with | |
| Split (t1a, t1b) -> | |
follow t1a next | |
|> follow t1b | |
| _ -> | |
next | |
let new_hashtbl () = Hashtbl.create 11 | |
let init re = | |
let rec init re next = | |
match re with | |
| RE.CharSet set -> | |
make_t1 (CharSet (set, next)) | |
| RE.Concatenation (re1, re2) -> | |
init re2 next | |
|> init re1 | |
| RE.Union (re1, re2) -> | |
make_t1 (Split (init re1 next, init re2 next)) | |
| RE.ZeroOrOne re -> | |
make_t1 (Split (init re next, next)) | |
| RE.ZeroOrMore re -> | |
let t1 = make_t1 Match in | |
let res = make_t1 (Split (init re t1, next)) in | |
t1 := !res; | |
res | |
| RE.OneOrMore re -> | |
let t1 = make_t1 Match in | |
let res = init re t1 in | |
t1 := !(make_t1 (Split (res, next))); | |
res | |
in | |
(* [set] is the starting state of the DFA. *) | |
let set = | |
let nfa_start = init re (make_t1 Match) in | |
follow nfa_start S.empty | |
in | |
(* [t2] puts together the starting state with the memoized transitions from this state (initially empty). *) | |
let t2 = { transitions = new_hashtbl (); set } in | |
(* We memoize the association [set] --> [t2] to be able to get this [t2] if we reach [set] from a transition | |
during the construction. *) | |
ref (Memo.singleton set t2), t2 | |
let full_match (memo, t2) s = | |
let hit = ref 0 in | |
let miss = ref 0 in | |
let step { transitions; set } c = | |
try | |
(* We first test if this transition has already been seen. *) | |
let res = Hashtbl.find transitions c in | |
incr hit; | |
res | |
with Not_found -> | |
incr miss; | |
(* If this is not the case, we have to build the new state. *) | |
let set' = | |
S.fold | |
(fun { contents = (_, s) } next -> | |
match s with | |
| CharSet (chars, t1) when CSet.mem c chars -> | |
follow t1 next | |
| _ -> | |
next | |
) set S.empty | |
in | |
let res = | |
try | |
(* We try to find [set'] in [memo] to get the associated [t2] if we already found this state from another | |
transition. *) | |
Memo.find set' !memo | |
with Not_found -> | |
(* If not, we need to build a new [t2]. *) | |
let t2 = { transitions = new_hashtbl (); set = set' } in | |
(* We then remember the association [set'] --> [t2] in [memo] in case we get to [set'] afterward from another | |
transition. *) | |
memo := Memo.add set' t2 !memo; | |
t2 | |
in | |
(* We remember the new transition [set] --[c]--> [set'] in the DFA. *) | |
Hashtbl.add transitions c res; | |
res | |
in | |
let rec full_match l t2 = | |
match l with | |
| [] -> | |
S.mem (make_t1 Match) t2.set | |
| c :: r -> | |
step t2 c | |
|> full_match r | |
in | |
let res = full_match (explode s) t2 in | |
Printf.printf "states: %d, hits: %d, misses: %d\n" (Memo.cardinal !memo) !hit !miss; | |
res | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment