-
-
Save jobjo/13376aaea1151100dd7915dedb35d9d7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* Helpers *) | |
let id x = x | |
let const x _ = x | |
let (>>) f g x = g (f x) | |
let string_of_list = List.to_seq >> String.of_seq | |
let list_of_string = String.to_seq >> List.of_seq | |
module CS = Set.Make (Char) | |
module Option = struct | |
let map f = function | |
| None -> None | |
| Some x -> Some (f x) | |
let bind o f = | |
match o with | |
| Some x -> f x | |
| None -> None | |
let product o1 o2 = | |
match o1, o2 with | |
| Some x, Some y -> Some (x,y) | |
| _ -> None | |
module Syntax = struct | |
let (let+) x f = map f x | |
let (and+) o1 o2 = product o1 o2 | |
let (let*) x f = bind x f | |
end | |
end | |
module Parser = struct | |
type 'a t = | |
| Fail : string -> 'a t | |
| Empty : unit t | |
| Return : 'a -> 'a t | |
| One_of : CS.t -> char t | |
| Forget : 'a t -> 'a t | |
| Map : ('a -> 'b) * 'a t -> 'b t | |
| Product : 'a t * 'b t -> ('a * 'b) t | |
| Either : 'a t * 'a t -> 'a t | |
| Fix : ('a t -> 'a t) -> 'a t | |
| Raw : (char list -> ('a * char list) option) -> 'a t | |
let rec show : type a. a t -> string = function | |
| Fail msg -> | |
msg | |
| Empty -> | |
"empty" | |
| One_of _ -> | |
Printf.sprintf "one_of []" | |
| Forget p -> | |
Printf.sprintf "forget (%s)" (show p) | |
| Return x -> | |
"return ?" | |
| Map (f, p) -> | |
Printf.sprintf "map ? (%s)" (show p) | |
| Product (p,q) -> | |
Printf.sprintf "product (%s) (%s)" (show p) (show q) | |
| Either (p,q) -> | |
Printf.sprintf "either (%s) (%s)" (show p) (show q) | |
| Fix f -> | |
Printf.sprintf "let rec f () = %s) in f ()" @@ show (f (Fail "f ()")) | |
| Raw _ -> | |
failwith "Cannot show internal node" | |
let rec eval : type a. a t -> char list -> (a * char list) option = fun p -> | |
let open Option.Syntax in | |
match p with | |
| Fail _ -> | |
const None | |
| Empty -> | |
fun cs -> | |
( match cs with | |
| [] -> Some ((), []) | |
| _ -> None | |
) | |
| Return x -> | |
fun cs -> Some (x, cs) | |
| One_of s -> | |
fun cs -> | |
( match cs with | |
| c :: cs when CS.mem c s -> Some (c, cs) | |
| _ -> None | |
) | |
| Forget p -> | |
let ep = eval p in | |
fun cs -> | |
( match ep cs with | |
| Some (x, _) -> Some (x, cs) | |
| None -> None | |
) | |
| Map (f, p) -> | |
let ep = eval p in | |
fun cs -> | |
let+ (x, cs) = ep cs in | |
(f x, cs) | |
| Product (p, q) -> | |
let ep = eval p in | |
let eq = eval q in | |
fun cs -> | |
let* (x, cs) = ep cs in | |
let* (y, cs) = eq cs in | |
Some ((x,y), cs) | |
| Either (p, q) -> | |
let ep = eval p in | |
let eq = eval q in | |
fun cs -> | |
( match ep cs with | |
| Some r -> Some r | |
| None -> eq cs | |
) | |
| Fix f -> | |
let rec k = lazy (eval @@ f (Raw (fun cs -> Lazy.force k cs))) in | |
Lazy.force k | |
| Raw f -> | |
f | |
let eval' p = String.to_seq >> List.of_seq >> eval p | |
let eval p = String.to_seq >> List.of_seq >> eval p | |
let symbols p = | |
let rec aux : type a. a t -> CS.t = function | |
| Fail _ -> CS.empty | |
| Empty -> CS.empty | |
| Return _ -> CS.empty | |
| Map (_, p) -> aux p | |
| Product (p,q) -> CS.union (aux p) (aux q) | |
| Either (p,q) -> CS.union (aux p) (aux q) | |
| Fix f -> aux @@ f @@ Fix (const (Fail "")) | |
| Forget p -> aux p | |
| One_of s -> s | |
| Raw _ -> failwith "Internal function" | |
in | |
aux p |> CS.to_seq |> List.of_seq | |
let empty = Empty | |
let fix f = Fix f | |
let fail m = Fail m | |
let return x = Return x | |
let symbol t = One_of (CS.singleton t) | |
let forget p = Forget p | |
let map f x = Map (f, x) | |
let product p q = Product (p,q) | |
let either p q = Either (p,q) | |
let choice ps = List.fold_left either (fail "") ps | |
(* Syntax *) | |
module Syntax = struct | |
let (let+) p f = map f p | |
let (and+) p q = product p q | |
end | |
(* Infix operators *) | |
module Ops = struct | |
open Syntax | |
let ( <$> ) f p = map f p | |
let ( <|> ) p q = either p q | |
let ( <*> ) pf px = let+ f = pf and+ x = px in f x | |
let ( *> ) p q = (fun _ x -> x) <$> p <*> q | |
let ( <* ) p q = const <$> p <*> q | |
end | |
let one_of s = One_of ( CS.of_seq @@ String.to_seq s) | |
let many p = | |
let open Ops in | |
fix @@ fun many -> | |
either (List.cons <$> p <*> many) (return []) | |
let many_one p = Ops.(List.cons <$> p <*> many p) | |
let between ps pe p = Ops.(ps *> p <* pe) | |
end | |
module Parsers = struct | |
open Parser | |
open Parser.Ops | |
open Parser.Syntax | |
let string s = | |
let accum c p = | |
let+ x = symbol c | |
and+ xs = p in | |
Printf.sprintf "%c%s" x xs | |
in | |
List.fold_right accum (list_of_string s) (return "") | |
let digit = one_of "0123456789" | |
let int = | |
let+ ds = many_one digit in | |
int_of_string @@ string_of_list ds | |
let float = | |
let aux m1 s m2 = | |
let+ ds1 = m1 digit | |
and+ s = symbol s | |
and+ ds2 = m2 digit in | |
float_of_string @@ | |
Printf.sprintf "%s%c%s" (string_of_list ds1) s (string_of_list ds2) | |
in | |
choice | |
[ aux many_one 'e' many_one | |
; aux many_one '.' many | |
; aux many '.' many_one | |
] | |
let float = | |
(* Ex: 123. *) | |
let p1 = | |
let+ ds = many_one digit | |
and+ d = symbol '.' in | |
ds @ [d] | |
in | |
(* Ex: 123.45 *) | |
let p2 = | |
let+ ds1 = p1 | |
and+ ds2 = many_one digit in | |
ds1 @ ds2 | |
in | |
(* Ex: 12.34e56 or 12e34 *) | |
let p3 = | |
let+ ds1 = p2 <|> many_one digit | |
and+ e = symbol 'e' | |
and+ ds2 = many_one digit in | |
ds1 @ [e] @ ds2 | |
in | |
let fol cs = float_of_string @@ string_of_list cs in | |
choice [ fol <$> p3 ; fol <$> p2 ; fol <$> p1 ] | |
end | |
module SExp = struct | |
open Parser | |
open Parser.Ops | |
open Parsers | |
type atom = | |
| Int of int | |
| Float of float | |
| String of string | |
| Symbol of string | |
type sexp = Atom of atom | List of sexp list | |
let space = one_of "\n\t\r " | |
let left_paren = symbol '(' | |
let right_paren = symbol ')' | |
let regular_char = | |
let special_char = list_of_string "()\n\r\t\" " in | |
let is_regular c = not @@ List.mem c special_char in | |
List.init 256 Char.chr |> List.filter is_regular |> string_of_list |> one_of | |
let regular_string = string_of_list <$> many_one regular_char | |
let quoted_string = | |
let quote = symbol '\"' in | |
let string = many (regular_char <|> space <|> left_paren <|> right_paren) in | |
string_of_list <$> between quote quote string | |
let atom = | |
let end_num = | |
forget (empty <|> map ignore space <|> map ignore right_paren) | |
in | |
choice | |
[ map (fun i -> Int i ) int <* end_num | |
; map (fun f -> Float f) float <* end_num | |
; map (fun s -> String s) quoted_string | |
; map (fun s -> Symbol s) regular_string | |
] | |
let sexpr = | |
fix @@ fun sexpr -> | |
let expr = | |
either | |
((fun es -> List es) <$> between left_paren right_paren (many sexpr)) | |
((fun a -> Atom a ) <$> atom) | |
in | |
between (many space) (many space) expr | |
let parse s = Option.map fst @@ eval sexpr s | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment