Created
October 27, 2012 21:21
-
-
Save ion1/3966338 to your computer and use it in GitHub Desktop.
Toy parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Examples where | |
import FooParser | |
import Control.Applicative | |
import Control.Arrow | |
import Control.Monad | |
import Data.Char | |
-- | Parse a word with zero or more pairs of parentheses surrounding it. The | |
-- result contains the number of paren pairs and the word itself. | |
-- | |
-- > runParser (example0 <* end) "(((foo)))" == [((3,"foo"),"")] | |
example0 :: Parser Char (Integer, String) | |
example0 = token '(' *> (first (+1) <$> example0) <* token ')' | |
<|> (,) 0 <$> word | |
-- | Parse a number, some whitespace and then as many characters as the number | |
-- indicated. The result contains the characters. | |
-- | |
-- > runParser example1 "6 foobarbaz" == [("foobar","baz")] | |
example1 :: Parser Char String | |
example1 = do | |
n <- fromInteger <$> number | |
_ <- spaces | |
replicateM n anyToken | |
-- An s-expr subset parser. | |
-- http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-7.html | |
data SExpr = Identifier String | |
| Number Integer | |
| List (SList SExpr) | |
deriving (Eq, Show, Read) | |
data SList a = SList [a] | |
| SDotted [a] a | |
deriving (Eq, Show, Read, Functor) | |
sexpr :: Parser Char SExpr | |
sexpr = identifier <|> sNumber <|> list | |
identifier :: Parser Char SExpr | |
identifier = Identifier <$> liftA2 (:) initial (many subsequent) <* spaces | |
where | |
initial = letter <|> symbol | |
subsequent = letter <|> digit <|> symbol | |
-- 4.2.4 Identifiers | |
letter = satisfy isAlpha | |
digit = satisfy isDigit | |
symbol = satisfy (`elem` "!$%&*+-./:<=>?@^_~") | |
sNumber :: Parser Char SExpr | |
sNumber = Number <$> number <* spaces | |
list :: Parser Char SExpr | |
list = bra *> (List <$> entry) | |
where | |
bra = token '(' <* spaces | |
ket = token ')' <* spaces | |
dot = token '.' <* spaces | |
-- Right after ( or after a previous item in the list. | |
entry = nil <|> notNil | |
nil = SList [] <$ ket | |
notNil = liftA2 cons sexpr (dotted <|> entry) | |
where | |
cons x (SList xs) = SList (x:xs) | |
cons x (SDotted xs y) = SDotted (x:xs) y | |
dotted = dot *> (dottedList <|> dottedSExpr) <* ket | |
-- An inner list follows the dot: just a cons. | |
dottedList = bra *> entry | |
-- Some other sexpr follows the dot. | |
dottedSExpr = SDotted [] <$> sexpr | |
word :: Parser Char String | |
word = some (satisfy isAlpha) | |
number :: Parser Char Integer | |
number = read <$> some (satisfy isDigit) | |
spaces :: Parser Char String | |
spaces = many (satisfy isSpace) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module FooParser | |
( Parser (..) | |
, failure | |
, satisfy | |
, end | |
, anyToken | |
, token | |
) where | |
import Control.Applicative | |
import Control.Arrow | |
import Control.Monad | |
-- [i]: input tokens. | |
-- (o, [i]): parsed output and remaining input. | |
newtype Parser i o = Parser { runParser :: [i] -> [(o, [i])] } | |
-- | A parse failure. | |
failure :: Parser i o | |
failure = Parser (const []) | |
-- Result in the input value without consuming any input. | |
retP :: o -> Parser i o | |
retP o = Parser (\i -> [(o, i)]) | |
-- Map a function to the result values of a parser. | |
mapP :: (a -> b) -> Parser i a -> Parser i b | |
mapP f p = Parser (map (first f) . runParser p) | |
-- For the Monad instance. | |
joinP :: Parser i (Parser i o) -> Parser i o | |
joinP pOuter = Parser $ \i0 -> do | |
(pInner, i1) <- runParser pOuter i0 | |
runParser pInner i1 | |
-- If the first parser fails try the second one. | |
orP :: Parser i o -> Parser i o -> Parser i o | |
orP xp yp = Parser $ \i -> | |
case runParser xp i of | |
[] -> runParser yp i | |
res -> res | |
-- | Match a token if the predicate returns True. | |
satisfy :: (i -> Bool) -> Parser i i | |
satisfy f = Parser $ \i -> | |
case i of | |
(x:xs) | f x -> [(x, xs)] | |
_ -> [] | |
-- | Match the end of input. | |
end :: Parser i () | |
end = Parser $ \i -> | |
case i of | |
[] -> [((), i)] | |
_ -> [] | |
-- | Match any token. | |
anyToken :: Parser i i | |
anyToken = satisfy (const True) | |
-- | Match a token that is equal to the parameter. | |
token :: Eq i => i -> Parser i i | |
token t = satisfy (==t) | |
instance Functor (Parser i) where | |
fmap = mapP | |
instance Applicative (Parser i) where | |
pure = retP | |
(<*>) = ap | |
instance Alternative (Parser i) where | |
empty = failure | |
(<|>) = orP | |
instance Monad (Parser i) where | |
return = retP | |
p >>= f = joinP (mapP f p) | |
fail _ = failure | |
instance MonadPlus (Parser i) where | |
mzero = failure | |
mplus = orP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment