Skip to content

Instantly share code, notes, and snippets.

@leetschau
Created July 14, 2021 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leetschau/2bbfaf6f2c5175a4526a004d06fadcbe to your computer and use it in GitHub Desktop.
Save leetschau/2bbfaf6f2c5175a4526a004d06fadcbe to your computer and use it in GitHub Desktop.
Understanding Parser Combinators Series
/// This is the implementation of the first 2 parts of the wonderful 4-parts
/// posts "Understanding Parser Combinators" of
/// [F# for fun and profit](https://fsharpforfunandprofit.com/):
module ParserLib1 =
open System
/// Type that represents Success/Failure in parsing
type ParseResult<'a> =
| Success of 'a
| Failure of string
/// Type that wraps a parsing function
type Parser<'T> = Parser of (string -> ParseResult<'T * string>)
/// Parse a single character
let pchar charToMatch =
// define a nested inner function
let innerFn str =
if String.IsNullOrEmpty(str) then
Failure "No more input"
else
let first = str.[0]
if first = charToMatch then
let remaining = str.[1..]
Success (charToMatch,remaining)
else
let msg = sprintf "Expecting '%c'. Got '%c'" charToMatch first
Failure msg
// return the "wrapped" inner function
Parser innerFn
/// Run a parser with some input
let run parser input =
// unwrap parser to get inner function
let (Parser innerFn) = parser
// call inner function with input
innerFn input
/// Combine two parsers as "A andThen B"
let andThen parser1 parser2 =
let innerFn input =
// run parser1 with the input
let result1 = run parser1 input
// test the result for Failure/Success
match result1 with
| Failure err ->
// return error from parser1
Failure err
| Success (value1, remaining1) ->
// run parser2 with the remaining input
let result2 = run parser2 remaining1
// test the result for Failure/Success
match result2 with
| Failure err ->
// return error from parser2
Failure err
| Success (value2, remaining2) ->
// combine both values as a pair
let newValue = (value1, value2)
// return remaining input after parser2
Success (newValue, remaining2)
// return the inner function
Parser innerFn
/// Infix version of andThen
let ( .>>. ) = andThen
/// Combine two parsers as "A orElse B"
let orElse parser1 parser2 =
let innerFn input =
// run parser1 with the input
let result1 = run parser1 input
// test the result for Failure/Success
match result1 with
| Success result ->
// if success, return the original result
result1
| Failure err ->
// if failed, run parser2 with the input
let result2 = run parser2 input
// return parser2's result
result2
// return the inner function
Parser innerFn
/// Infix version of orElse
let ( <|> ) = orElse
/// Choose any of a list of parsers
let choice listOfParsers =
List.reduce ( <|> ) listOfParsers
/// Choose any of a list of characters
let anyOf listOfChars =
listOfChars
|> List.map pchar // convert into parsers
|> choice
open ParserLib1
module ParserLib2 =
let mapP f parser =
let innerFn input =
// run parser with the input
let result = run parser input
// test the result for Failure/Success
match result with
| Success (value, remaining) ->
// if success, return the value transformed by f
let newValue = f value
Success (newValue, remaining)
| Failure err ->
// if failed, return the error
Failure err
// return the inner function
Parser innerFn
let ( <!> ) = mapP
let ( |>> ) x f = mapP f x
let parseDigit = anyOf ['0'..'9']
// 注意此函数中 .>>. 与 |>> 的对应关系:
// 每次 .>>. 都会生成一个 (x, y),(见 L61)
let parseThreeDigitsAsStr =
(parseDigit .>>. parseDigit .>>. parseDigit) |>>
fun ((c1, c2), c3) -> System.String [| c1; c2; c3 |]
let returnP x =
let innerFn input =
// ignore the input and return x
Success (x, input)
// return the inner function
Parser innerFn
let applyP fP xP =
// create a Parser containing a pair (f, x)
// 这里的 fP 是一个 Parser<'a -> 'b>,就是说当这个 Parser 解析成功后生成一个函数,
// 比如普通解析器遇到 'A` 返回 Success("A") 之类的具体值,
// 这个解析器遇到 'A' 返回 Success(fun x -> x + 1, remaining-string)
// 之类的函数作为解析结果
(fP .>>. xP)
// map the pair by applying f to x,
// 即将 fP 里包裹的函数取出来 apply 到 xP 里包裹的值上
|> mapP (fun (f, x) -> f x)
let ( <*> ) = applyP
// returnP f 将普通函数 f 转换为解析器包裹的函数,然后再 applyP 到后面的数值上
let lift2 f xP yP = returnP f <*> xP <*> yP
// 用这种方法可以将任何 OOP 式的方法转换为 FP 式的函数调用
let startsWith (str: string) (prefix: string) = str.StartsWith(prefix)
let startsWithP = lift2 startsWith
let rec sequence parserList =
// define the "cons" function, which is a two parameter function
// 用这种方法可以将任何不方便用管道符的方法(比如 ::、 .[0])转换为 FP 式的函数调用
let cons head tail = head::tail
// lift it to Parser World
let consP = lift2 cons
// process the list of parsers recursively
match parserList with
| [] ->
returnP []
| head::tail ->
consP head (sequence tail)
let parsers = [ pchar 'A'; pchar 'B'; pchar 'C' ]
let combined = sequence parsers
let charListToStr charList = charList |> List.toArray |> System.String
// match a specific string
let pstring str =
str
// convert to list of char
|> List.ofSeq
// map each char to a pchar
|> List.map pchar
// convert to Parser<char list>
|> sequence
// convert Parser<char list> to Parser<string>
|> mapP charListToStr
let parseABC = pstring "ABC"
let rec parseZeroOrMore parser input =
// run parser with the input
let firstResult = run parser input
// test the result for Failure/Success
match firstResult with
| Failure err -> ([], input)
// if parse fails, return empty list
| Success (firstValue, inputAfterFirstParse) ->
// if parse succeeds, call recursively
// to get the subsequent values
let (subsequentValues, remainingInput) =
parseZeroOrMore parser inputAfterFirstParse
let values = firstValue :: subsequentValues
(values, remainingInput)
/// match zero or more occurrences of the specified parser
let many parser =
let innerFn input =
// parse the input -- wrap in Success as it always succeeds
Success (parseZeroOrMore parser input)
Parser innerFn
let manyA = many (pchar 'A')
/// match one or more occurrences of the specified parser
let many1 parser =
let innerFn input =
// run parser with the input
let firstResult = run parser input
// test the result for Failure/Success
match firstResult with
| Failure err ->
Failure err // failed
| Success (firstValue, inputAfterFirstParse) ->
// if first found, look for zeroOrMore now
let (subsequentValues, remainingInput) =
parseZeroOrMore parser inputAfterFirstParse
let values = firstValue :: subsequentValues
Success (values, remainingInput)
Parser innerFn
let pint =
let resultToInt digitList =
// ignore int overflow for now
digitList |> List.toArray |> System.String |> int
// define parser for one digit
let digit = anyOf ['0'..'9']
// define parser for one or more digits
let digits = many1 digit
// map the digits to an int
digits |> mapP resultToInt
let opt p =
let some = p |>> Some
let none = returnP None
some <|> none
let pint2 =
let resultToInt (sign,charList) =
let i = charList |> List.toArray |> System.String |> int
match sign with
| Some ch -> -i // negate the int
| None -> i
// define parser for one digit
let digit = anyOf ['0'..'9']
// define parser for one or more digits
let digits = many1 digit
// parse and convert
opt (pchar '-') .>>. digits |>> resultToInt
/// Keep only the result of the left side parser
let (.>>) p1 p2 =
// create a pair
p1 .>>. p2
// then only keep the first value
|> mapP (fun (a,b) -> a)
/// Keep only the result of the right side parser
let (>>.) p1 p2 =
// create a pair
p1 .>>. p2
// then only keep the second value
|> mapP (fun (a,b) -> b)
let between p1 p2 p3 =
p1 >>. p2 .>> p3
let sepBy1 p sep =
let sepThenP = sep >>. p
p .>>. many sepThenP
|>> fun (p,pList) -> p::pList
/// Parses zero or more occurrences of p separated by sep
let sepBy p sep =
sepBy1 p sep <|> returnP []
/// "bindP" takes a parser-producing function f, and a parser p
/// and passes the output of p into f, to create a new parser
let bindP f p =
let innerFn input =
let result1 = run p input
match result1 with
| Failure err ->
// return error from parser1
Failure err
| Success (value1, remainingInput) ->
// apply f to get a new parser
let p2 = f value1
// run parser with remaining input
run p2 remainingInput
Parser innerFn
let (>>=) p f = bindP f p
(*
To run in fsi, you need to run:
#load "myParser.fsx";;
open MyParser;;
open MyParser.ParserLib1;;
open MyParser.ParserLib2;;
*)
open ParserLib1
open ParserLib2
System.String [| '1'; '3'; '5' |] // output: System.String = "135"
run parseThreeDigitsAsStr "123A"
let addP = lift2 (+)
run combined "ABCD"
run parseABC "ABCDE"
run parseABC "A|CDE"
run parseABC "AB|CDE"
// test some success cases
run manyA "ABCD" // Success (['A'], "BCD")
run manyA "AACD" // Success (['A'; 'A'], "CD")
run manyA "AAAD" // Success (['A'; 'A'; 'A'], "D")
// test a case with no matches
run manyA "|BCD" // Success ([], "|BCD")
let manyAB = many (pstring "AB")
run manyAB "ABCD" // Success (["AB"], "CD")
run manyAB "ABABCD" // Success (["AB"; "AB"], "CD")
run manyAB "ZCD" // Success ([], "ZCD")
run manyAB "AZCD" // Success ([], "AZCD")
let whitespaceChar = anyOf [' '; '\t'; '\n']
let whitespace = many whitespaceChar
run whitespace "ABC" // Success ([], "ABC")
run whitespace " ABC" // Success ([' '], "ABC")
run whitespace "\tABC" // Success (['\t'], "ABC")
// define parser for one digit
let digit = anyOf ['0'..'9']
// define parser for one or more digits
let digits = many1 digit
run digits "1ABC" // Success (['1'], "ABC")
run digits "12BC" // Success (['1'; '2'], "BC")
run digits "123C" // Success (['1'; '2'; '3'], "C")
run digits "1234" // Success (['1'; '2'; '3'; '4'], "")
run digits "ABC" // Failure "Expecting '9'. Got 'A'"
run pint "1ABC" // Success (1, "ABC")
run pint "12BC" // Success (12, "BC")
run pint "123C" // Success (123, "C")
run pint "1234" // Success (1234, "")
run pint "ABC" // Failure "Expecting '9'. Got 'A'"
let digitThenSemicolon = digit .>>. opt (pchar ';')
run digitThenSemicolon "1;" // Success (('1', Some ';'), "")
run digitThenSemicolon "1" // Success (('1', None), "")
run pint2 "123C"
run pint2 "-123C"
// use .>> below
let digitThenSemicolon2 = digit .>> opt (pchar ';')
run digitThenSemicolon2 "1;" // Success ('1', "")
run digitThenSemicolon2 "1" // Success ('1', "")
let ab = pstring "AB"
let cd = pstring "CD"
let ab_cd = (ab .>> whitespace) .>>. cd
run ab_cd "AB \t\nCD" // Success (("AB", "CD"), "")
let pdoublequote = pchar '"'
let quotedInteger = between pdoublequote pint pdoublequote
run quotedInteger "\"1234\"" // Success (1234, "")
run quotedInteger "1234" // Failure "Expecting '"'. Got '1'"
let comma = pchar ','
let zeroOrMoreDigitList = sepBy digit comma
let oneOrMoreDigitList = sepBy1 digit comma
run oneOrMoreDigitList "1;" // Success (['1'], ";")
run oneOrMoreDigitList "1,2;" // Success (['1'; '2'], ";")
run oneOrMoreDigitList "1,2,3;" // Success (['1'; '2'; '3'], ";")
run oneOrMoreDigitList "Z;" // Failure "Expecting '9'. Got 'Z'"
run zeroOrMoreDigitList "1;" // Success (['1'], ";")
run zeroOrMoreDigitList "1,2;" // Success (['1'; '2'], ";")
run zeroOrMoreDigitList "1,2,3;" // Success (['1'; '2'; '3'], ";")
run zeroOrMoreDigitList "Z;" // Success ([], "Z;")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment