Last active
March 21, 2021 18:20
-
-
Save gabrielelana/24acb76ce56a1d252dc0c50d604f077d to your computer and use it in GitHub Desktop.
Parser Combinators from zero in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser.Combinators where | |
import Test.Hspec | |
import Data.Either | |
import Data.Char | |
import Control.Applicative | |
import Data.Foldable | |
newtype Parser a = Parser (String -> (Either String a, String)) | |
-- TODO | |
-- Exercise: Complete chemical formulas (see commented tests) | |
-- Challenge: track position | |
-- Challenge: better errors through composition | |
--------------------------------------------------------------------------------- | |
-- Typeclass Instances -- | |
--------------------------------------------------------------------------------- | |
instance Semigroup a => Semigroup (Parser a) where | |
(<>) = liftA2 (<>) | |
instance Monoid a => Monoid (Parser a) where | |
mempty = pure mempty | |
instance Functor Parser where | |
fmap f (Parser pa) = Parser p | |
where p s = case pa s of | |
(Right a, s') -> | |
(Right $ f a, s') | |
(Left e, _) -> | |
(Left e, s) | |
instance Applicative Parser where | |
pure = Parser . (,) . Right | |
(Parser pf) <*> (Parser pa) = Parser p | |
where p s = case pf s of | |
(Right f, s') -> | |
case pa s' of | |
(Right a, s'') -> | |
(Right $ f a, s'') | |
(Left e, _) -> | |
(Left e, s) | |
(Left e, _) -> | |
(Left e, s) | |
instance Alternative Parser where | |
empty = neverP "unexpected error" | |
(Parser px) <|> (Parser py) = Parser p | |
where p s = case px s of | |
(Right x, s') -> | |
(Right x, s') | |
(Left ex, _) -> | |
case py s of | |
(Right y, s') -> | |
(Right y, s') | |
(Left ey, _) -> | |
(Left $ ex <> ey, s) | |
instance Monad Parser where | |
(Parser pa) >>= f = Parser p | |
where p s = case pa s of | |
(Right a, s') -> | |
runParser (f a) s' | |
(Left e, _) -> | |
(Left e, s) | |
--------------------------------------------------------------------------------- | |
-- Terminal Parsers -- | |
--------------------------------------------------------------------------------- | |
whileP :: (Char -> Bool) -> Parser String | |
whileP f = Parser p | |
where p s = let (ls, rs) = span f s | |
in (Right ls, rs) | |
charP :: Char -> Parser String | |
charP c = Parser p | |
where p "" = (Left "unexpected end of input", "") | |
p s@(x:xs) | |
| x == c = (Right [x], xs) | |
| otherwise = (Left $ "expected `" ++ [c] ++ "` given `" ++ [x] ++ "`", s) | |
neverP :: String -> Parser a | |
neverP = Parser . (,) . Left | |
alwaysP :: a -> Parser a | |
alwaysP = pure | |
--------------------------------------------------------------------------------- | |
-- Combinators -- | |
--------------------------------------------------------------------------------- | |
litP :: String -> Parser String | |
litP = foldMap charP | |
lexP :: String -> Parser String | |
lexP s = litP s <* spacesP | |
intP :: Parser Int | |
intP = read <$> whileP isDigit | |
spacesP :: Parser String | |
spacesP = whileP (`elem` " \n\t") | |
while1P :: (Char -> Bool) -> Parser String | |
while1P f = whenP (whileP f) (not . null) "not empty" | |
whenP :: Parser a -> (a -> Bool) -> String -> Parser a | |
whenP p f e = p >>= (\a -> if f a then alwaysP a else neverP e) | |
manyP :: Parser a -> Parser [a] | |
manyP = many | |
notP :: Parser a -> Parser String | |
notP (Parser pa) = Parser p | |
where p s@(x:xs) = case pa s of | |
(Right _, _) -> | |
(Left $ "unexpected `" ++ [x] ++ "`", s) | |
(Left _, _) -> | |
(Right [x], xs) | |
betweenP' :: Parser a -> Parser String -> Parser String | |
betweenP' l = betweenP l (neverP "no escape") | |
betweenP :: Parser a -> Parser String -> Parser String -> Parser String | |
betweenP l e r = mconcat <$> (l *> manyP ((e *> r) <|> notP r) <* r) | |
--------------------------------------------------------------------------------- | |
-- Chemical Formulas Example -- | |
--------------------------------------------------------------------------------- | |
referenceP :: Parser (String, Int) | |
referenceP = let elementP = whileP isUpper | |
quantityP = intP | |
in (,) <$> elementP <*> quantityP | |
-- See exercises and commented tests | |
--------------------------------------------------------------------------------- | |
-- XML Example -- | |
--------------------------------------------------------------------------------- | |
type Name = String | |
type Value = String | |
type XMLAttribute = (Name, Value) | |
data XML = XMLElement String [XMLAttribute] [XML] | |
| XMLComment String | |
| XMLText String | |
deriving (Eq, Show) | |
xmlP :: Parser XML | |
xmlP = xmlElementP <|> xmlCommentP <|> xmlTextP | |
xmlCommentP :: Parser XML | |
xmlCommentP = XMLComment <$> betweenP' (litP "<!--") (lexP "-->") | |
xmlTextP :: Parser XML | |
xmlTextP = XMLText <$> while1P (/= '<') | |
xmlElementP :: Parser XML | |
xmlElementP = do | |
(n, a) <- openP | |
c <- manyP xmlP | |
_ <- closeP n | |
return $ XMLElement n a c | |
where | |
openP = do | |
n <- lexP "<" *> whileP isLetter <* spacesP | |
a <- manyP xmlAttributeP | |
lexP ">" | |
return (n, a) | |
closeP n = lexP "</" *> litP n <* spacesP <* lexP ">" | |
xmlAttributeP :: Parser XMLAttribute | |
xmlAttributeP = do | |
n <- nameP <* spacesP | |
lexP "=" | |
v <- valueP <* spacesP | |
return (n, v) | |
where nameP = while1P isLetter | |
valueP = betweenP (litP "\"") (litP "\\") (litP "\"") | |
--------------------------------------------------------------------------------- | |
-- Run Parser -- | |
--------------------------------------------------------------------------------- | |
runParser :: Parser a -> String -> (Either String a, String) | |
runParser (Parser f) = f | |
execParser :: Parser a -> String -> Either String a | |
execParser p = fst . runParser p | |
parse :: Parser a -> String -> Either String a | |
parse p = execParser (spacesP *> p) | |
--------------------------------------------------------------------------------- | |
-- Tests -- | |
--------------------------------------------------------------------------------- | |
tests :: IO () | |
tests = hspec $ | |
describe "Parser Combinators" $ do | |
it "shall pass" $ 1 + 2 `shouldBe` 3 | |
it "will parse a char" $ do | |
runParser (charP 'a') "a" `shouldBe` (Right "a", "") | |
runParser (charP 'a') "aaa" `shouldBe` (Right "a", "aa") | |
runParser (charP 'a') "bbb" `shouldBe` (Left "expected `a` given `b`", "bbb") | |
it "will parse a string" $ do | |
runParser (litP "aaa") "aaa" `shouldBe` (Right "aaa", "") | |
execParser (litP "aaa") "aab" `shouldSatisfy` isLeft | |
it "will parser an integer" $ runParser intP "123aaa" `shouldBe` (Right 123, "aaa") | |
it "will parse a chemical reference" $ do | |
execParser referenceP "H2" `shouldBe` Right ("H", 2) | |
-- execParser referenceP "O" `shouldBe` Right ("O", 1) | |
-- execParser referenceP "Cl4" `shouldBe` Right ("Cl", 4) | |
-- execParser formulaP "H2O" `shouldBe` Right [("H", 2), ("O", 1)] | |
-- execParser referenceP "NaCl4" `shouldBe` Right [("Na", 1), ("Cl", 4)] | |
it "will parse an XML" $ do | |
runParser xmlP "<foo></foo>" `shouldBe` (Right (XMLElement "foo" [] []), "") | |
execParser xmlP "<foo>bar</foo>" `shouldBe` Right (XMLElement "foo" [] [XMLText "bar"]) | |
parse xmlP " <foo > bar bar </ foo>" `shouldBe` Right (XMLElement "foo" [] [XMLText "bar bar "]) | |
parse xmlP "<foo><bar></bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] []]) | |
parse xmlP "<foo><bar>\n</bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] []]) | |
parse xmlP "<foo><bar><!-- woot --></bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] [XMLComment " woot "]]) | |
parse xmlP "<foo foo=\"bar\" foz=\"baz\"></foo>" `shouldBe` Right (XMLElement "foo" [("foo", "bar"), ("foz", "baz")] []) |
La gist è stata aggiornata con la seconda parte che trovate qui. Potete ancora accedere alla versione precedente relativa al primo video con il link "Revisions" che trovate in alto
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
🇮🇹 Per chi arrivasse qui per caso in qualche modo, c'è un video dell'implementazione di questo codice qui.
https://www.youtube.com/watch?v=V8rXQhYwXJc&t=1427s