Skip to content

Instantly share code, notes, and snippets.

@jneen
Created April 25, 2016 06:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jneen/93eb107ef5ca3293a90f2d2b7077754d to your computer and use it in GitHub Desktop.
Save jneen/93eb107ef5ca3293a90f2d2b7077754d to your computer and use it in GitHub Desktop.
open Batteries.Printf
open Batteries
type 't tokstate = Start | Tok of 't | Err of string | Eof
type 't state = {
mutable index : int ;
mutable head : 't tokstate ;
mutable behind : 't tokstate list ;
mutable ahead : 't tokstate list ;
mutable bt : int ;
read : unit -> 't tokstate ;
formatter : 't -> string ;
}
let new_state read formatter = {
index = 0 ;
head = Start ;
behind = [] ;
ahead = [] ;
bt = 0 ;
read = read ;
formatter = formatter ;
}
let format_tok st = function
| Start -> "\\a"
| Tok t -> st.formatter t
| Err s -> "!" ^ s
| Eof -> "\\z"
let format_toks st toks = toks |> List.map (format_tok st) |> String.join ""
let string_of_state st =
let head = format_tok st st.head
in let ahead = format_toks st st.ahead
in let behind = format_toks st (List.rev st.behind)
in sprintf "%i [%s](%s)[%s]" st.index behind head ahead
type parse_error = int * string list
exception ParseError of parse_error
let empty_error = (-1, [])
let merge_errors = function
| (e, (_, [])) -> e
| ((_, []), e) -> e
| ((p, m), (p', m')) when p < p' -> (p', m')
| ((p, m), (p', m')) when p > p' -> (p, m)
| ((p, m), (p', m')) when p = p' -> (p, List.sort_uniq compare (m @ m'))
| _ -> empty_error (* unreachable *)
type consumed = Consumed | Empty
let merge_consumed = function
| (Empty, Empty) -> Empty
| _ -> Consumed
type 'a result =
| Ok of 'a * parse_error
| Err of parse_error
let empty_result = Err empty_error
let error_of = function Ok (_, e) -> e | Err e -> e
let merge_results = function
| (Ok (a, e), r) -> Ok (a, merge_errors (e, error_of r))
| (Err e, r) -> Err (merge_errors (e, error_of r))
type 'a reply = consumed * 'a result
let ok (c:consumed) (v:'a) = ((c, Ok (v, empty_error)) : 'a reply)
let okc v = ok Consumed v
let oke v = ok Empty v
let nokc st msg = (Consumed, Err (st.index, msg))
let noke st msg = (Empty, Err (st.index, msg))
let empty_reply = (Empty, Err empty_error)
let map_reply f = function
| (c, Ok (v, e)) -> (c, Ok (f v, e))
| (c, Err e) -> (c, Err e)
let merge_replies ((c, r), (c', r')) = (merge_consumed (c, c'), merge_results (r, r'))
let read st = st.read ()
let advance1 st =
(* string_of_state st |> printf "> %s\n"; *)
st.index <- st.index + 1;
if st.bt > 0 then st.behind <- st.head :: st.behind;
(match st.ahead with
| (x :: xs) -> st.ahead <- xs; st.head <- x
| [] -> st.head <- read st)
let rewind1 st =
(* st |> string_of_state |> printf "< %s\n"; *)
st.index <- st.index - 1;
st.ahead <- st.head :: st.ahead;
match st.behind with
| (x :: xs) -> st.behind <- xs; st.head <- x
| [] -> raise (Failure "can't rewind any more!")
let rec advance n st = match n with 0 -> () | _ -> advance1 st; advance (n-1) st
let rec rewind n st = match n with 0 -> () | _ -> rewind1 st; rewind (n-1) st
let lookahead n st =
advance n st; rewind n st; (* populate the lookahead buffer *)
List.take n st.ahead
let rewind_to idx st = rewind (st.index - idx) st
let incr_bt st = st.bt <- st.bt + 1
let decr_bt st = st.bt <- st.bt - 1 ; if st.bt = 0 then st.behind <- []
type ('a, 't) parser = { mutable exec : 't state -> 'a reply }
let pure x = let rep = oke x in { exec = fun _ -> rep }
let fail m = { exec = fun st -> noke st m }
let backtracking p = { exec = fun st ->
let backtrack_index = st.index
in incr_bt st;
let reply = p.exec st
in match reply with
| (_, Err e) -> rewind_to backtrack_index st; decr_bt st; (Empty, Err e)
| _ -> decr_bt st; reply;
}
let satisfy p =
{ exec = fun st -> match st.head with
| Tok t when p t -> advance1 st; okc t
| _ -> noke st [] }
let exact toks = { exec = fun st ->
let bt_idx = st.index
in let consumed = match toks with [] -> Empty | _ -> Consumed
in let err_msg = toks |> List.map (fun t -> Tok t) |> format_toks st
in let rec check_for toks = match toks with
| [] -> ok consumed toks
| (t::ts) -> match st.head with
| Tok h when t = h -> advance1 st; check_for ts
| _ -> rewind_to bt_idx st; noke st [err_msg]
in check_for toks
}
let eq c =
{ exec = fun st -> match st.head with
| Tok t when c = t -> advance1 st; okc t
| _ -> noke st [] }
let oneOf l = { exec = fun st -> match st.head with
| Tok t when List.mem t l -> advance1 st; okc t
| _ -> noke st [] }
let eof () = { exec = fun st -> match st.head with
| Eof -> oke ()
| _ -> noke st ["eof"] }
let start () = { exec = fun st -> match st.head with
| Start -> advance1 st; okc ()
| _ -> noke st ["start of stream"] }
let alt parsers =
let exec st =
let rec loop = function
| (last, (p :: ps)) ->
(match p.exec st with
| (Empty, Err e) -> loop (merge_results (Err e, last), ps)
| (consumed, other) -> (consumed, merge_results (other, last)))
| (last, []) -> (Empty, last)
in loop (empty_result, parsers)
in { exec = exec }
(* this can't contain a reply because polymorphic exceptions are illegal *)
exception SeqBreak of consumed * parse_error
let seq p' p =
let exec st =
let r = p.exec st
in match r with
| (c, Ok (v, e)) ->
merge_replies (p'.exec st, r) |> map_reply (fun v' -> (v, v'))
| (c, Err e) -> (c, Err e)
in { exec = exec }
let map f p = { exec = fun st -> map_reply f (p.exec st) }
let result r p = map (fun _ -> r) p
let default x p = alt [ p; pure x ]
let opt p = p |> map (fun x -> Some x) |> default None
let ignore p = p |> result ()
let follow p' p = p |> seq p' |> map (fun (_, x) -> x)
let skip p' p = p |> seq p' |> map (fun (x, _) -> x)
let many p =
let exec st =
let rec loop last accum =
let reply = merge_replies (p.exec st, last)
in match reply with
| (_, Ok (v, _)) -> loop reply (v :: accum)
| _ -> merge_replies (oke (List.rev accum), reply)
in loop empty_reply []
in { exec = exec }
let many1 p = p |> seq (many p) |> map (fun (x, xs) -> (x :: xs))
let sep_by sep p =
let p_sep = sep |> follow p
in p |> seq (many p_sep) |> map (fun (x, xs) -> (x :: xs))
let defer f = { exec = fun st -> (Lazy.force f).exec st }
let desc s p = { exec = fun st -> match p.exec st with
(* replace all messages generated by p *)
| (c, Err (i, _)) -> (c, Err (i, [s]))
| reply -> reply }
exception NotImplemented
let declare () = { exec = fun _ -> raise NotImplemented }
let implement p p' = p.exec <- p'.exec
let parse p ?(formatter = (fun _ -> "")) stream =
let with_eof = start () |> follow p |> skip (eof ())
in let st = new_state stream formatter
in match with_eof.exec st with
| (_, Ok (v, _)) -> v
| (_, Err e) -> raise (ParseError e)
module Unicode = struct
type uchar_token = UChar.t tokstate
type 'a uchar_parser = ('a, UChar.t) parser
let format c = c |> UTF8.of_char |> UTF8.escaped
let string_of_uchars uch = uch |> List.map UChar.char_of
|> List.map String.of_char
|> String.join ""
let str s = exact (String.to_list s |> List.map UChar.of_char) |> result s
|> desc (sprintf "\"%s\"" s)
let ch c =
let uc = UChar.of_char c
in let test x = UChar.eq uc x
in satisfy test |> result uc |> desc (sprintf "'%s'" (format uc))
let range low high =
let ulow = UChar.of_char low
in let uhigh = UChar.of_char high
in let description = sprintf "'%s'..'%s'" (format ulow) (format uhigh)
in let test c = let open UChar in compare c ulow >= 0 && compare c uhigh <= 0
in satisfy test |> desc description
let nch c =
let uc = UChar.of_char c
in satisfy (fun c' -> uc != c') |> desc (sprintf "[^%s]" (format uc))
let parse p stream = parse p ~formatter:format stream
let noneOf str =
let test c = not (UTF8.contains str c)
in satisfy test |> desc (sprintf "none of [%s]" (UTF8.escaped str))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment