Skip to content

Instantly share code, notes, and snippets.

@JasonKleban
Created November 5, 2019 02:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JasonKleban/c5d610c8aafd9b9ea2efff63a2e0b9b4 to your computer and use it in GitHub Desktop.
Save JasonKleban/c5d610c8aafd9b9ea2efff63a2e0b9b4 to your computer and use it in GitHub Desktop.
Minimal monadic parsing combinator, example grammar, and test cases
#nowarn "40"
open System
// Complete Monadic Parsing "Library"
type Parser<'r> = Parser of (char list -> ('r*char list) list)
let parse (Parser p) = p
let (>>=) p f = Parser(fun cs ->
List.concat [for (r,cs') in parse p cs -> parse (f r) cs'])
let (>>) p q = p >>= fun _ -> q
let (<|>) p q = Parser(fun cs ->
match parse p cs with
| [] -> parse q cs
| rs -> rs)
let mreturn r = Parser(fun cs -> [(r,cs)])
let lambda = Parser(fun _ -> [])
let item = Parser(fun cs ->
match cs with [] -> [] | c::cs' -> [(c,cs')])
let sat cond =
item >>= fun c -> if cond c then mreturn c else lambda
let char c = sat ((=)c)
let rec many0 p = many1 p <|> mreturn []
and many1 p = p >>= fun r -> many0 p >>= fun rs -> mreturn (r::rs)
// Command-line style Argument tokenizer (sort of)
let rec parseLine s = // parse a `line` from a sequence of chars that make up the string
fst <| Seq.head (parse line <| Seq.toList s) // parse returns the parsed construction (multiple, if ambiguous, so we take the `Seq.head`) AND the remaining unmatched characters
// in a tuple, but we just want the former of those two components (`fst`)
and line =
many0 (char ' ') // eat any leading space characters
>> many0 value // expect zero or more `value`s
>>= fun values -> mreturn values
and value =
(quotedValue <|> unquotedValue) // a value can be a quoted or unquoted value
>>= fun v -> many0 (char ' ') >> mreturn v
and quotedValue =
char '\"' // starts with a "
>> many0 ((char '\\' >> char '\"') <|> sat (fun c -> c <> '\"')) // anything until a ", unless it is escaped
>>= fun v -> char '\"' >> mreturn (String.Concat(v)) // ends with a "
and unquotedValue =
many1 ((char '\\' >> char '\"') <|> sat (fun c -> c <> '\"' && not <| Char.IsWhiteSpace(c))) // anything until whitespace or " unless it is escaped
>>= fun v -> mreturn (String.Concat(v))
// Try: (parse line <| Seq.toList "\"One A\" \"Two B\" \"???") |> printfn "%A" // Incomplete match example
parseLine "" |> printfn "%A" // []
parseLine " " |> printfn "%A" // []
parseLine "One" |> printfn "%A" // ["One"]
parseLine "One Two" |> printfn "%A" // ["One"; "Two"]
parseLine "\"One A\"" |> printfn "%A" // ["One A"]
parseLine "\"One A\" \"Two B\"" |> printfn "%A" // ["One A"; "Two B"]
parseLine " Zero \"One A\" Two\"Thre\\\"e C\"Four " |> printfn "%A" // ["Zero"; "One A"; "Two"; "Thre\"e C"; "Four"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment