Skip to content

Instantly share code, notes, and snippets.

@kandu
Created October 18, 2014 17:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kandu/6fb5777c92bc0369076c to your computer and use it in GitHub Desktop.
Save kandu/6fb5777c92bc0369076c to your computer and use it in GitHub Desktop.
str2regexp
let epsilon= "ϵ"
type buffer= {
str: string;
len: int;
pos: int;
}
let initBuffer str=
{ str;
len= String.length str;
pos= 0;
}
type token=
| OpC | OpO
| TkE | TkC of char | TkS of string
type regexp=
| Ce
| C of char (* char *)
| A of regexp * regexp (* concatenation *)
| O of regexp * regexp (* alternation *)
| Cl of regexp (* closure aka Kleene Star *)
let getToken buf=
let rec paired str count pos=
if count > 0 then
match str.[pos] with
| '(' -> paired str (count+1) (pos+1)
| ')' -> paired str (count-1) (pos+1)
| _ -> paired str count (pos+1)
else pos-1
in
match buf.str.[buf.pos] with
| '('->
let tail= paired buf.str 1 (buf.pos+1) in
let paren= String.sub buf.str (buf.pos+1) (tail-buf.pos-1)
in (TkS paren, {buf with pos= tail+1})
| '*'-> (OpC, {buf with pos= buf.pos+1})
| '|'-> (OpO, {buf with pos= buf.pos+1})
| '#'-> (TkE, {buf with pos= buf.pos+1})
| c -> (TkC c, {buf with pos= buf.pos+1}) (* TODO: escape character *)
let rec str2regexp buf=
let getItem= function
| TkE-> Ce
| TkC c-> C c
| TkS s -> str2regexp @@ initBuffer s
| other -> raise @@ Failure
(Printf.sprintf "match %s"
(match other with OpC -> "OpC" | OpO -> "OpO" | _ -> "Tk"))
in
let rec parse curr nextbuf=
if nextbuf.pos >= nextbuf.len then
curr
else
let nextTk, lastbuf= getToken nextbuf in
match nextTk with
| OpO -> O (curr, str2regexp lastbuf)
| OpC-> let curr= match curr with Cl r-> curr | _ -> Cl curr in
parse curr lastbuf
| _ ->
let next= getItem nextTk in
if lastbuf.pos >= lastbuf.len then
A (curr, next)
else
let lastTk, tmpbuf= getToken lastbuf in
match lastTk with
| OpC-> parse (A (curr, Cl next)) tmpbuf
| OpO->
O (A (curr, next), str2regexp tmpbuf)
| _ -> parse (A (curr, next)) lastbuf
in
let curr, buf= getToken buf in
parse (getItem curr) buf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment