Last active
March 18, 2021 09:40
-
-
Save mrange/0d02928784d60d6384fbb39c55578605 to your computer and use it in GitHub Desktop.
F# simple and slow parser combinators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Inspired by: http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf | |
// A Parser is a function that given a string and an index into that string | |
// produces a parsed value and the index of the first unparsed character in the string | |
type Parser<'T> = P of (string -> int -> ('T*int) option) | |
module Parser = | |
open System | |
// Runs the parser on a string | |
let parse (P t) s = t s 0 | |
// A parser that always fails | |
let fail () = P <| fun s i -> | |
None | |
// A parser that always succeeds with v | |
let value v = P <| fun s i -> | |
Some (v, i) | |
// Some grammars are recursive which requires us to be able to | |
// create a "placeholder" parser that we use when building our parsers. | |
// When we are ready we replace the placeholder parser using the setter function | |
let placeholder<'T> () : Parser<'T>*(Parser<'T> -> unit) = | |
let mutable gp = fail () | |
let pre = | |
P <| fun s i -> | |
let (P p) = gp | |
p s i | |
let upd p = gp <- p | |
pre, upd | |
// The monadic bind combinator | |
// A powerful way to bind parsers together | |
let (>>=) (P t) uf = P <| fun s i -> | |
match t s i with | |
| None -> None | |
| Some (tv, ti) -> | |
let (P u) = uf tv | |
u s ti | |
// Either-or combinator | |
// If t succeeds return that result otherwise try u | |
let (<|>) (P t) (P u) = P <| fun s i -> | |
match t s i with | |
| None -> u s i | |
| o -> o | |
// Applicative functor apply | |
let (<*>) f t = f >>= (fun fv -> t >>= (fun tv -> value (fv tv))) | |
// Map operation | |
let (|>>) t f = t >>= (fun tv -> value (f tv)) | |
// If t succeeds return v | |
let (|>!) t v = t >>= (fun tv -> value v) | |
// Combines result of two parsers using m | |
let combine m f s = value m <*> f <*> s | |
// Combines two parsers and return result as pair | |
let (.>>.) f s = combine (fun x y -> (x, y)) f s | |
// Combines two parsers but only keep result of first | |
let (.>>) f s = combine (fun x _ -> x) f s | |
// Combines two parsers but only keep result of second | |
let (>>.) f s = combine (fun _ x -> x) f s | |
// Repeats t until it fails and returns all successfully parsed values as list | |
let many t = | |
let rec loop a = | |
let next = t >>= (fun tv -> loop (tv::a)) | |
next <|> value a | |
loop [] |>> List.rev | |
// As many but requires at least 1 successful value | |
let many1 t = | |
let c f r = f::r | |
value c <*> t <*> many t | |
// Chains parsers separated by an operator parser | |
// Left associative | |
let chainl t o = | |
let rec loop a = | |
let next = o >>= (fun ov -> t >>= (fun tv -> loop (ov a tv))) | |
next <|> value a | |
t >>= loop | |
// Succeeds if EOF | |
let eof = P <| fun s i -> | |
if i < s.Length then None else Some ((), i) | |
// Extracts a single char unless at EOF | |
let char = P <| fun s i -> | |
if i < s.Length then Some (s.[i], i + 1) else None | |
// Checks if the extracted char satisfies t | |
let satisfy t = char >>= (fun ch -> if t ch then value ch else fail ()) | |
// Skips a specific character | |
let skipChar c = char >>= (fun ch -> if ch = c then value () else fail ()) | |
// Ensures runs o before e and c after e. Only keeps result of e | |
let between o e c = o >>. e .>> c | |
let digit = satisfy Char.IsDigit | |
let letter = satisfy Char.IsLetter | |
let whitespace = satisfy Char.IsWhiteSpace | |
let whitespaces = many whitespace | |
module Calculator = | |
open Parser | |
// The expression tree | |
type AST = | |
| Number of int | |
| Identifier of string | |
| Operation of char*(int -> int -> int)*AST*AST | |
// For the group parser we the inner expression which almost the full | |
// expression, therefore it's a recursive grammar and we predefine the inner | |
// parser and sets it later | |
let inner, setInner = placeholder<AST> () | |
// Parses a group term | |
let group = between (skipChar '(' .>> whitespaces) inner (skipChar ')' .>> whitespaces) | |
// Parses an identifier term | |
let identifier = | |
many1 letter | |
|>> (fun cs -> cs |> List.toArray |> System.String |> Identifier) | |
// Parses an number term | |
let number = | |
let folder s v = 10*s + (int v - int '0') | |
many1 digit | |
|>> (fun cs -> cs |> List.fold folder 0 |> Number) | |
// Combines group, identifer and number in a term parser | |
let term = | |
group <|> identifier <|> number | |
.>> whitespaces | |
let op c f = skipChar c |>! (fun l r -> Operation (c, f, l, r)) | |
// Defines the different operator parsers | |
let op_plus = op '+' ( + ) | |
let op_minus = op '-' ( - ) | |
let op_multiply = op '*' ( * ) | |
let op_divide = op '/' ( / ) | |
// Chains terms with mulitply and divide | |
let op0 = chainl term (op_multiply <|> op_divide .>> whitespaces) | |
// Chains previous with plus and minus | |
// by splitting in different steps we get different precedence | |
let op1 = chainl op0 (op_plus <|> op_minus .>> whitespaces) | |
// We can now set the inner express to op1 | |
do setInner op1 | |
// The full parser | |
let expr = between whitespaces op1 eof | |
// Evals an expression tree given an environment map | |
let eval env e = | |
let rec loop t = | |
match t with | |
| Number n -> n | |
| Identifier i -> env |> Map.find i | |
| Operation (_,f, l, r) -> f (loop l) (loop r) | |
loop e | |
open Parser | |
open Calculator | |
[<EntryPoint>] | |
let main argv = | |
let env = | |
[| | |
"x" , 3 | |
"y" , 4 | |
"xyz" , 5 | |
|] | |
|> Map.ofArray | |
let tests = [| | |
"1" | |
"123" | |
"x" | |
"xyz" | |
"x+2" | |
"x + 2*y" | |
"(x + 2)*y" | |
|] | |
for test in tests do | |
match parse expr test with | |
| None -> printfn "%s: Failed to parse" test | |
| Some (e, _) -> printfn "%s: eval: %d, expr: %A" test (eval env e) e | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment