Created
July 4, 2009 22:31
-
-
Save mzp/140754 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
e = . | | |
e* | | |
e e | | |
( e ) | | |
c | |
*) | |
#load "dynlink.cma";; | |
#load "camlp4o.cma";; | |
type 'a regexp = | |
Any | |
| Many of 'a regexp | |
| Char of 'a | |
| Concat of 'a regexp list | |
| Select of 'a regexp list | |
let (@@) f g = f g | |
let explode str = | |
let n = | |
String.length str in | |
let rec iter i = | |
if i = n then | |
[] | |
else | |
str.[i]::iter (i+1) in | |
iter 0 | |
let implode xs = | |
let n = | |
List.length xs in | |
let s = | |
String.make n ' ' in | |
let rec iter i = | |
function | |
[] -> () | |
| x::xs -> s.[i] <- x; iter (i+1) xs in | |
iter 0 xs; | |
s | |
(* parsec *) | |
let char x stream = | |
match Stream.peek stream with | |
Some y -> | |
if x = y then begin | |
Stream.junk stream; | |
x | |
end else | |
raise Stream.Failure | |
| _ -> | |
raise Stream.Failure | |
let rec many p = | |
parser | |
[< x = p; xs = many p >] -> | |
x::xs | |
| [<>] -> | |
[] | |
let rec sep_by p c stream = | |
match stream with parser | |
[< x = p >] -> | |
begin match Stream.peek stream with | |
Some sep -> | |
if sep = c then | |
x::sep_by p c stream | |
else | |
[x] | |
| _ -> | |
[x] | |
end | |
| [<>] -> | |
[] | |
let letter stream = | |
match Stream.peek stream with | |
Some ('a'..'z') | Some ('A'..'Z') | Some ('0'..'9') -> | |
Stream.next stream | |
| Some _ | None -> | |
raise Stream.Failure | |
(* parser *) | |
let rec pAtom = parser | |
[< _ = char '.'>] -> | |
Any | |
| [< _ = char '('; e = pRegexp; _ = char ')' >] -> | |
e | |
| [< c = letter >] -> | |
Char c | |
and pMany stream = match stream with parser | |
[< e = pAtom>] -> | |
begin match stream with parser | |
[< _ = char '*' >] -> Many e | |
| [<>] -> e | |
end | |
and pConcat = | |
parser [< es = many pMany>] -> | |
match es with | |
[e] -> e | |
| _ -> Concat es | |
and pSelect = | |
parser [< es = sep_by pConcat '|'>] -> | |
match es with | |
[e] -> e | |
| _ -> Select es | |
and pRegexp s = | |
pSelect s | |
let regexp s = | |
pRegexp @@ Stream.of_string s | |
let rec break f = | |
function | |
[] -> | |
[],[] | |
| x::xs as xss-> | |
if f x then | |
let (ys,zs) = | |
break f xs in | |
(x::ys,zs) | |
else | |
[],xss | |
let rec find f = | |
function | |
[] -> None | |
| x::xs -> | |
match f x with | |
Some _ as s -> | |
s | |
| None -> | |
find f xs | |
(* compile *) | |
let rec compile = | |
function | |
Any -> | |
begin function | |
[] -> None | |
| x::xs -> Some ([x],xs) | |
end | |
| Char c -> | |
begin function | |
x::xs when x = c-> | |
Some ([x],xs) | |
| _ -> | |
None | |
end | |
| Many e -> | |
let rec compose f g xs = | |
match f xs with | |
Some (ys,zs) -> | |
begin | |
match g zs with | |
Some (ws,vs) -> | |
Some (ys @ ws,vs) | |
| None -> | |
Some (ys,zs) | |
end | |
| None -> | |
Some ([],xs) in | |
let f = | |
compile e in | |
let rec loop xs = | |
compose f loop xs in | |
loop | |
| Concat es -> | |
let rec compose f g xs = | |
match f xs with | |
Some (ys,zs) -> | |
begin | |
match g zs with | |
Some (ws,vs) -> | |
Some (ys @ ws,vs) | |
| None -> | |
None | |
end | |
| None -> | |
None in | |
let fs = | |
List.map compile es in | |
List.fold_left compose (List.hd fs) (List.tl fs) | |
| Select es -> | |
let fs = | |
List.map compile es in | |
begin fun xs -> | |
find (fun f -> f xs) fs | |
end | |
(* example *) | |
let f () = | |
let r = | |
compile @@ regexp "(foo)*" in | |
let s = | |
explode "foofoo" in | |
match r s with | |
Some (matched,rest) -> | |
Printf.printf "matched = %s¥n" @@ implode matched; | |
Printf.printf "rest = %s¥n" @@ implode rest | |
| None -> | |
print_endline "no match" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment