Created
March 4, 2018 16:33
-
-
Save jdh30/41103ac7700fccc51636b7384c98c28f to your computer and use it in GitHub Desktop.
Port of an camlp4-based inline parser to vanilla ML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See: https://gist.github.com/jdh30/6130c615b5945fd57fc0ea74fcb87e05 | |
open System.Text.RegularExpressions | |
type BinOp = Add | Sub | Le | |
type expr = | |
| Int of int | |
| Var of string | |
| BinOp of expr * BinOp * expr | |
| If of expr * expr * expr | |
| Apply of expr * expr | |
type defn = | |
| LetRec of string * string * expr | |
type Elt<'c> = | |
| S of string | |
| R of Regex | |
| C of 'c | |
| List0 of Elt<'c> list | |
type Result = | |
| L of string | |
| E of expr | |
| D of defn | |
| Rs of Result list list | |
| P of defn list * expr | |
type Grammar<'c when 'c: comparison> = Map<'c, (Elt<'c> list * (Result list -> Result)) list> | |
type Cs = Expr | Defn | Program | |
let reint = R(Regex "[0-9]+") | |
let relident = R(Regex "[a-z][a-zA-Z0-9]*") | |
let grammar : Grammar<Cs> = | |
[ | |
Expr, | |
[ [ S"if"; C Expr; S"then"; C Expr; S"else"; C Expr ], fun [E p; E t; E f] -> E(If(p, t, f)) | |
[ C Expr; S"<="; C Expr ], fun [E e1; E e2] -> E(BinOp(e1, Le, e2)) | |
[ C Expr; S"+"; C Expr ], fun [E e1; E e2] -> E(BinOp(e1, Add, e2)) | |
[ C Expr; S"-"; C Expr ], fun [E e1; E e2] -> E(BinOp(e1, Sub, e2)) | |
[ C Expr; C Expr ], fun [E f; E x] -> E(Apply(f, x)) | |
[ relident ], fun [L v] -> E(Var v) | |
[ reint ], fun [L n] -> E(Int(int n)) | |
[ S"("; C Expr; S")" ], fun [e] -> e ] | |
Defn, | |
[ [ S"let"; S"rec"; relident; relident; S"="; C Expr ], fun [L f; L x; E body] -> | |
D(LetRec(f, x, body)) ] | |
Program, | |
[ [ List0[C Defn]; S"do"; C Expr ], fun [Rs defns; E run] -> P([for [D defn] in defns -> defn], run) ] | |
] | |
|> Map.ofSeq | |
let keywords = set["let"; "rec"; "if"; "then"; "else"; "do"] | |
let rec pmatches grammar c toks = | |
pmatchesAux grammar c (Map.find c grammar) toks | |
and pmatchesAux grammar c precs toks = | |
match precs with | |
| [] -> None | |
| ([], _)::precs -> pmatchesAux grammar c precs toks | |
| (elt::elts, action)::precs -> | |
let result = | |
match pmatch (Map.add c precs grammar) [] [elt] toks with | |
| None -> None | |
| Some(rs, toks) -> | |
match pmatch grammar rs elts toks with | |
| None -> None | |
| Some(rs, toks) -> Some(action rs, toks) | |
match result with | |
| None -> pmatchesAux grammar c precs toks | |
| Some(rs, toks) -> Some(rs, toks) | |
and pmatch grammar results patt toks = | |
match patt, toks with | |
| [], toks -> Some(List.rev results, toks) | |
| S p::patt, s::toks -> | |
if p <> s then None else | |
pmatch grammar results patt toks | |
| R p::patt, s::toks -> | |
if not(keywords.Contains s) && p.IsMatch s then | |
pmatch grammar (L s::results) patt toks | |
else None | |
| C c::patt, toks -> | |
match pmatches grammar c toks with | |
| None -> None | |
| Some(v, toks) -> pmatch grammar (v::results) patt toks | |
| List0 patt::patts, toks -> | |
let rec loop rss toks = | |
match pmatch grammar [] patt toks with | |
| None -> pmatch grammar (Rs(List.rev rss)::results) patts toks | |
| Some(rs, toks) -> loop (rs::rss) toks | |
loop [] toks | |
| _, [] -> None | |
let tokens = | |
"let rec fib n = | |
if n <= 2 then 1 else | |
fib ( n - 1 ) + fib ( n - 2 ) | |
do fib 40" | |
.Replace("\n", " ") | |
.Split([|' '|], System.StringSplitOptions.RemoveEmptyEntries) | |
|> List.ofSeq | |
pmatches grammar Program tokens |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment