Created
November 28, 2012 19:55
-
-
Save muratg/4163717 to your computer and use it in GitHub Desktop.
Monadic parsing in F# - translated from "Monadic parsing in Haskell" paper by Graham Hutton & Erik Meijer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// =================== | |
// Monadic parsing in F# | |
// by Murat Girgin | |
// Translated from "Monadic parsing in Haskell" paper by Graham Hutton & Erik Meijer | |
// Source: http://www.cs.nott.ac.uk/~gmh/pearl.pdf | |
// =================== | |
#nowarn "40" | |
// --------------------------------------------------------------------------------- | |
// newtype Parser a = Parser (String -> [(a,String)]) | |
// | |
type 'a Parser = Parser of (char list -> ('a * char list) list) | |
// --------------------------------------------------------------------------------- | |
// item :: Parser Char | |
// item = Parser (\cs -> case cs of | |
// "" -> [] | |
// (c:cs) -> [(c,cs)]) | |
// | |
let item = Parser (fun cs -> match cs with | |
| [] -> [] | |
| c::cs -> [c, cs]) | |
// --------------------------------------------------------------------------------- | |
// parse (Parser p) = p | |
// | |
let parse (Parser p) = p | |
// --------------------------------------------------------------------------------- | |
// instance Monad Parser where | |
// return a = Parser (\cs -> [(a,cs)]) | |
// p >>= f = Parser (\cs -> concat [parse (f a) cs' | (a,cs') <- parse p cs]) | |
// -- and... | |
// instance MonadZero Parser where | |
// zero = Parser (\cs -> []) | |
// instance MonadPlus Parser where | |
// p ++ q = Parser (\cs -> parse p cs ++ parse q cs) | |
// | |
type ParserBuilder () = | |
member x.Return a = Parser (fun cs -> [a, cs]) | |
member x.ReturnFrom a = a | |
member x.Bind (p, f) = Parser (fun cs -> List.concat [for (a, cs') in parse p cs -> parse (f a) cs']) | |
// and... | |
member x.Zero () = Parser (fun cs -> []) | |
static member (++) (p, q) = Parser (fun cs -> (parse p cs) @ (parse q cs)) | |
let (++) p q = ParserBuilder.(++) (p, q) | |
// --------------------------------------------------------------------------------- | |
// (+++) :: Parser a -> Parser a -> Parser a | |
// p +++ q = Parser (\cs -> case parse (p ++ q) cs of | |
// [] -> [] | |
// (x:xs) -> [x]) | |
// | |
let (+++) p q = Parser (fun cs -> match (parse (p ++ q) cs) with | |
| [] -> [] | |
| x::xs -> [x]) | |
let parser = new ParserBuilder() | |
// --------------------------------------------------------------------------------- | |
// sat :: (Char -> Bool) -> Parser Char | |
// sat p = do {c <- item; if p c then return c else zero} | |
// | |
let sat p = parser {let! c = item | |
if p c then return c} // no need for "else Zero" in F# -- see x.Zero definition above | |
// --------------------------------------------------------------------------------- | |
// -- Example: a parser for special characters can be defined as follows: | |
// char :: Char -> Parser Char | |
// char c = sat (c ==) | |
// | |
let char' c = sat ((=) c) | |
// --------------------------------------------------------------------------------- | |
// string :: String -> Parser String | |
// string "" = return "" | |
// string (c:cs) = do {char c; string cs; return (c:cs)} | |
// | |
let rec string' = function | |
| [] -> parser {return []} | |
| c::cs -> parser {let! _ = char' c | |
let! _ = string' cs | |
return c::cs} | |
// --------------------------------------------------------------------------------- | |
// many :: Parser a -> Parser [a] | |
// many p = many1 p +++ return [] | |
// | |
// many1 :: Parser a -> Parser [a] | |
// many1 p = do {a <- p; as <- many p; return (a:as)} | |
// | |
let rec many p = (many1 p) +++ parser {return []} | |
and many1 p = parser {let! a = p | |
let! as' = many p | |
return a::as'} | |
// --------------------------------------------------------------------------------- | |
// sepby :: Parser a -> Parser b -> Parser [a] | |
// p `sepby` sep = (p `sepby1` sep) +++ return [] | |
// | |
// sepby1 :: Parser a -> Parser b -> Parser [a] | |
// p `sepby1` sep = do a <- p | |
// as <- many (do {sep; p}) | |
// return (a:as) | |
// | |
let rec sepby p sep = (sepby1 p sep) +++ parser {return []} | |
and sepby1 p sep = parser {let! a = p; | |
let! as' = many (parser {let! _ = sep | |
let! _ = p | |
return! p}) | |
return a::as'} | |
// --------------------------------------------------------------------------------- | |
// chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a | |
// chainl p op a = (p `chainl1` op) +++ return a | |
// | |
// chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a | |
// p `chainl1` op = do {a <- p; rest a} | |
// where | |
// rest a = (do f <- op | |
// b <- p | |
// rest (f a b)) | |
// +++ return a | |
// | |
let rec chainl p op a = (chainl1 p op) +++ parser {return a;} | |
and chainl1 p op = let rec rest a = parser {let! f = op; | |
let! b = p; | |
return! rest (f a b)} +++ parser {return a} | |
parser {let! a = p | |
return! rest a} | |
// --------------------------------------------------------------------------------- | |
// space :: Parser String | |
// space = many (sat isSpace) | |
// | |
let space = | |
let isSpace c = List.exists ((=) c) [' '; '\t'; '\n'; '\r'] | |
many (sat isSpace) | |
// --------------------------------------------------------------------------------- | |
// token :: Parser a -> Parser a | |
// token p = do {a <- p; space; return a} | |
// | |
let token p = parser {let! a = p | |
let! _ = space | |
return a} | |
// --------------------------------------------------------------------------------- | |
// symb :: String -> Parser String | |
// symb cs = token (string cs) | |
// | |
let symb cs = token (string' cs) | |
// --------------------------------------------------------------------------------- | |
// apply :: Parser a -> String -> [(a,String)] | |
// apply p = parse (do {space; p}) | |
// | |
let apply p = parse (parser {let! _ = space | |
let! ret = p | |
return ret}) | |
// =================== | |
// Demo | |
// =================== | |
// --------------------------------------------------------------------------------- | |
// We illustrate the combinators defined in this article with a simple example. Consider | |
// the standard grammar for arithmetic expressions built up from single digits using | |
// the operators +, -, * and /, together with parentheses (Aho et al., 1986): | |
// | |
// expr ::= expr addop term j term | |
// term ::= term mulop factor j factor | |
// factor ::= digit j ( expr ) | |
// digit ::= 0 j 1 j : : : j 9 | |
// addop ::= + j - | |
// mulop ::= * j / | |
// | |
// Using the chainl1 combinator to implement the left-recursive production rules for | |
// expr and term, this grammar can be directly translated into a Haskell program | |
// that parses expressions and evaluates them to their integer value: | |
// | |
// expr :: Parser Int | |
// addop :: Parser (Int -> Int -> Int) | |
// mulop :: Parser (Int -> Int -> Int) | |
// | |
// expr = term `chainl1` addop | |
// term = factor `chainl1` mulop | |
// factor = digit +++ do {symb "("; n <- expr; symb ")"; return n} | |
// digit = do {x <- token (sat isDigit); return (ord x - ord '0')} | |
// | |
// addop = do {symb "+"; return (+)} +++ do {symb "-"; return (-)} | |
// mulop = do {symb "*"; return (*)} +++ do {symb "/"; return (div)} | |
// | |
let addOp = parser {let! _ = symb ['+'] | |
return (+)} +++ parser {let! _ = symb ['-'] | |
return (-)} | |
let mulOp = parser {let! _ = symb ['*'] | |
return (*)} +++ parser {let! _ = symb ['/'] | |
return (/)} | |
let digit = | |
let isDigit c = c >= '0' && c <= '9' | |
let c2f (c:char) = System.Double.Parse(c.ToString()) | |
parser {let! x = token (sat isDigit) | |
return c2f x} | |
let rec expr = chainl1 term addOp | |
and term = chainl1 factor mulOp | |
and factor = digit +++ parser {let! _ = symb ['('] | |
let! n = expr | |
let! _ = symb [')'] | |
return n} | |
let (* string to char list *) s2cl (x:string) = x.ToCharArray() |> List.ofArray | |
// --------------------------------------------------------------------------------- | |
// For example, evaluating apply expr " 1 - 2 * 3 + 4 " gives the singleton list | |
// of results [(-1,"")], which is the desired behaviour. | |
// | |
apply expr (s2cl " 1 - 2 * 3 + 4 ") | |
|> printfn "evaluating << apply expr \" 1 - 2 * 3 + 4 \" >> returns << %A >>" | |
// basic evaluation test | |
let evalTest s = | |
match apply expr s with | |
| [] -> failwith "failed to parse" | |
| (ret, _)::xs -> ret | |
evalTest (s2cl "2*4+(5-3)") | |
|> printfn "%f should be equal to 10.0" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment