Skip to content

Instantly share code, notes, and snippets.

@joshcough
Created October 31, 2011 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshcough/1327907 to your computer and use it in GitHub Desktop.
Save joshcough/1327907 to your computer and use it in GitHub Desktop.
Parser Combinator in F#
type ParseResult<'a> =
| Success of 'a * string
| Failure of string
type Parser<'a> =
abstract Parse: string -> ParseResult<'a>
type MappedParser<'a, 'b>(f: 'a -> 'b, m:Parser<'a>) =
interface Parser<'b> with
member p.Parse(s) = match m.Parse(s) with
| Success (a, rest) -> Success(f(a), rest)
| Failure (message) -> Failure(message)
type DiscardingParser<'a, 'b>(o:Parser<'a>, v:Lazy<'b>) =
interface Parser<'b> with
member p.Parse(s) = match o.Parse(s) with
| Success (_, rest) -> Success(v.Force(), rest)
| Failure (message) -> Failure(message)
type OptionalParser<'a>(o:Parser<'a>) =
interface Parser<Option<'a>> with
member p.Parse(s) = match o.Parse(s) with
| Success (v, rest) -> Success(Some(v), rest)
| Failure (message) -> Success(None, s)
type WordParser(findMe:string) =
interface Parser<string> with
member p.Parse(s) =
if findMe.Length <= s.Length && s.Substring(0, findMe.Length).Equals(findMe)
then Success(findMe, s.Substring(findMe.Length))
else Failure("didn't find: " + findMe)
type AndParser<'a, 'b>(left:Lazy<Parser<'a>>, right:Lazy<Parser<'b>>) =
interface Parser<'a * 'b> with
member p.Parse(s) = match left.Force().Parse(s) with
| Success (al, rest) -> match right.Force().Parse(rest) with
| Success (ar, rest) -> Success ((al, ar), rest)
| Failure (message) -> Failure (message)
| Failure (message) -> Failure (message)
type OrParser<'a>(left:Parser<'a>, right:Parser<'a>) =
interface Parser<'a> with
member p.Parse(s) = match left.Parse(s) with
| Success (al, rest) -> Success(al, rest)
| Failure (leftMessage) -> match right.Parse(s) with
| Success (ar, rest) -> Success (ar, rest)
| Failure (rightMessage) -> Failure (leftMessage + " and " + rightMessage)
type NeverMatch<'a>() =
interface Parser<'a> with
member p.Parse(s) = Failure("never")
let inline (^^) (p:Parser<'a>)(f: 'a -> 'b) = MappedParser<'a, 'b>(f, p) :> Parser<'b>
let inline (^^^) (p:Parser<'a>)(v:Lazy<'b>) = DiscardingParser<'a, 'b>(p, v) :> Parser<'b>
let inline (|||) (l:Parser<'a>)(r:Parser<'b>) = OrParser<'a>(l, r) :> Parser<'a> /// hmm, b is weird here
let inline (++) (l:Parser<'a>)(r:Parser<'b>) = AndParser(lazy(l), lazy(r))
let inline (+++) (l:Parser<'a>)(r:Lazy<Parser<'b>>) = AndParser(lazy(l),r)
let opt<'a>(p:Parser<'a>) = OptionalParser(p) :> Parser<Option<'a>>
let never<'a> () = NeverMatch<'a>() :> Parser<'a>
let rec oneOf<'a> (parsers: List<Parser<'a>>) : Parser<'a> =
match parsers with
| p :: ps -> p ||| oneOf<'a>(ps)
| _ -> never()
let cons (x, xs) = x :: xs
let matchChar (c:char) = WordParser(string c) ^^ (fun s -> s.[0])
let emptyString = WordParser("") :> Parser<string>
let rec zeroOrMore<'a> (p:Parser<'a>): Parser<List<'a>> =
(p +++ lazy(zeroOrMore<'a>(p)) ^^ cons) ||| (emptyString ^^^ lazy([]))
let oneOrMore<'a> (p:Parser<'a>): Parser<List<'a>> = p ++ zeroOrMore<'a>(p) ^^ cons
let repsep<'a, 'b> (pa:Parser<'a>, pb:Parser<'b>) : Parser<List<'a>> =
let absParser = pa ++ pb
let asParser = absParser ^^ fst
let manyAsParser = zeroOrMore(asParser)
let optAParser = opt(pa) ^^ Option.toList
manyAsParser ++ optAParser ^^ (fun (l, r) -> l @ r)
let oneOfChars (cs:List<char>) : Parser<char> = oneOf (cs |> List.map matchChar)
let oneToNine = oneOfChars ['1'..'9']
let one = matchChar '1'
let zeroToNine = oneOfChars ['0'..'9']
let digit = zeroToNine
let charListToString (cs: List<char>) : string = cs |> List.map string |> List.reduce (+)
let charListToInt (cs: List<char>) : int = cs |> charListToString |> int
let number: Parser<int> = oneOrMore(digit) ^^ charListToInt
let space = oneOfChars [' '; '\n'; '\t']
let spaces = zeroOrMore space
let numbers = repsep(number, spaces)
let letter = (oneOfChars ['a'..'z']) ||| (oneOfChars ['a'..'z'])
let underscore = matchChar '_'
let idBody = zeroOrMore(oneOf([letter; digit; underscore]))
let id = letter ++ idBody ^^ (fun (x, xs) -> x :: xs |> charListToString)
type SExpr =
| Number of int
| Atom of string
| SList of List<SExpr>
let rec sexpr: Parser<SExpr> = oneOf [number ^^ Number; id ^^ Atom; list ^^ SList]
and listStart<'a> = matchChar('(') ^^^ lazy([])
and listEnd<'a> = matchChar(')') ^^^ lazy([])
and list = listStart +++ lazy(repsep(sexpr, spaces)) +++ lazy(listEnd) ^^ (fun ((_, l), _) -> l)
printfn "x in xxx: %A" (matchChar('x').Parse("xxx"))
printfn "x or y in yaa: %A" ((oneOf [(matchChar 'x'); (matchChar 'y')]).Parse("yaa"))
printfn "x in xxx: %A" (matchChar('x').Parse("xxx"))
printfn "digit? 234: %A" (digit.Parse("234"))
printfn "zero or more 1's in 1222: %A" ((zeroOrMore(one)).Parse("1222"))
printfn "zero or more 1's in 111222: %A" ((zeroOrMore(one)).Parse("111222"))
printfn "zero or more 1's in 222: %A" ((zeroOrMore(one)).Parse("222"))
printfn "zero or more 1's in 111: %A" ((zeroOrMore(one)).Parse("111"))
printfn "zero or more 1's in empty string: %A" ((zeroOrMore(one)).Parse(""))
printfn "one or more 1's in 1222: %A" ((oneOrMore(one)).Parse("1222"))
printfn "one or more 1's in 111222: %A" ((oneOrMore(one)).Parse("111222"))
printfn "one or more 1's in 222: %A" ((oneOrMore(one)).Parse("222"))
printfn "one or more 1's in 111: %A" ((oneOrMore(one)).Parse("111"))
printfn "one or more 1's in empty string: %A" ((oneOrMore(one)).Parse(""))
printfn "a or b in accc: %A" ((oneOfChars ['a';'b']).Parse("accc"))
printfn "number in 1234: %A" (number.Parse("1234"))
printfn "spaces in ' ' : %A" (spaces.Parse(" "))
printfn "numbers in '1 2 3 4 5' : %A" (numbers.Parse("1 2 3 4 5"))
printfn "numbers in '123 2 3673 4 5' : %A" (numbers.Parse("123 2 3673 4 5"))
printfn "id in 'x5' : %A" (id.Parse("x5"))
printfn "id in 'x_123' : %A" (id.Parse("x_123"))
printfn "id in 'x__1__z' : %A" (id.Parse("x__1__z"))
printfn "id in 'x__' : %A" (id.Parse("x__"))
printfn "sexpr in 'x__' : %A" (sexpr.Parse("x__"))
printfn "sexpr in 'x5' : %A" (sexpr.Parse("x5"))
printfn "sexpr in '5' : %A" (sexpr.Parse("5"))
printfn "sexpr in '(5)' : %A" (sexpr.Parse("(5)"))
printfn "sexpr in '(5 x)' : %A" (sexpr.Parse("(5 x)"))
printfn "sexpr in '(5 x x 5)' : %A" (sexpr.Parse("(5 x x 5)"))
printfn "sexpr in '(5 (x x) 5)' : %A" (sexpr.Parse("(5 (x x) 5)"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment