Skip to content

Instantly share code, notes, and snippets.

@gabrielelana
Last active March 21, 2021 18:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gabrielelana/24acb76ce56a1d252dc0c50d604f077d to your computer and use it in GitHub Desktop.
Save gabrielelana/24acb76ce56a1d252dc0c50d604f077d to your computer and use it in GitHub Desktop.
Parser Combinators from zero in Haskell
module Parser.Combinators where
import Test.Hspec
import Data.Either
import Data.Char
import Control.Applicative
import Data.Foldable
newtype Parser a = Parser (String -> (Either String a, String))
-- TODO
-- Exercise: Complete chemical formulas (see commented tests)
-- Challenge: track position
-- Challenge: better errors through composition
---------------------------------------------------------------------------------
-- Typeclass Instances --
---------------------------------------------------------------------------------
instance Semigroup a => Semigroup (Parser a) where
(<>) = liftA2 (<>)
instance Monoid a => Monoid (Parser a) where
mempty = pure mempty
instance Functor Parser where
fmap f (Parser pa) = Parser p
where p s = case pa s of
(Right a, s') ->
(Right $ f a, s')
(Left e, _) ->
(Left e, s)
instance Applicative Parser where
pure = Parser . (,) . Right
(Parser pf) <*> (Parser pa) = Parser p
where p s = case pf s of
(Right f, s') ->
case pa s' of
(Right a, s'') ->
(Right $ f a, s'')
(Left e, _) ->
(Left e, s)
(Left e, _) ->
(Left e, s)
instance Alternative Parser where
empty = neverP "unexpected error"
(Parser px) <|> (Parser py) = Parser p
where p s = case px s of
(Right x, s') ->
(Right x, s')
(Left ex, _) ->
case py s of
(Right y, s') ->
(Right y, s')
(Left ey, _) ->
(Left $ ex <> ey, s)
instance Monad Parser where
(Parser pa) >>= f = Parser p
where p s = case pa s of
(Right a, s') ->
runParser (f a) s'
(Left e, _) ->
(Left e, s)
---------------------------------------------------------------------------------
-- Terminal Parsers --
---------------------------------------------------------------------------------
whileP :: (Char -> Bool) -> Parser String
whileP f = Parser p
where p s = let (ls, rs) = span f s
in (Right ls, rs)
charP :: Char -> Parser String
charP c = Parser p
where p "" = (Left "unexpected end of input", "")
p s@(x:xs)
| x == c = (Right [x], xs)
| otherwise = (Left $ "expected `" ++ [c] ++ "` given `" ++ [x] ++ "`", s)
neverP :: String -> Parser a
neverP = Parser . (,) . Left
alwaysP :: a -> Parser a
alwaysP = pure
---------------------------------------------------------------------------------
-- Combinators --
---------------------------------------------------------------------------------
litP :: String -> Parser String
litP = foldMap charP
lexP :: String -> Parser String
lexP s = litP s <* spacesP
intP :: Parser Int
intP = read <$> whileP isDigit
spacesP :: Parser String
spacesP = whileP (`elem` " \n\t")
while1P :: (Char -> Bool) -> Parser String
while1P f = whenP (whileP f) (not . null) "not empty"
whenP :: Parser a -> (a -> Bool) -> String -> Parser a
whenP p f e = p >>= (\a -> if f a then alwaysP a else neverP e)
manyP :: Parser a -> Parser [a]
manyP = many
notP :: Parser a -> Parser String
notP (Parser pa) = Parser p
where p s@(x:xs) = case pa s of
(Right _, _) ->
(Left $ "unexpected `" ++ [x] ++ "`", s)
(Left _, _) ->
(Right [x], xs)
betweenP' :: Parser a -> Parser String -> Parser String
betweenP' l = betweenP l (neverP "no escape")
betweenP :: Parser a -> Parser String -> Parser String -> Parser String
betweenP l e r = mconcat <$> (l *> manyP ((e *> r) <|> notP r) <* r)
---------------------------------------------------------------------------------
-- Chemical Formulas Example --
---------------------------------------------------------------------------------
referenceP :: Parser (String, Int)
referenceP = let elementP = whileP isUpper
quantityP = intP
in (,) <$> elementP <*> quantityP
-- See exercises and commented tests
---------------------------------------------------------------------------------
-- XML Example --
---------------------------------------------------------------------------------
type Name = String
type Value = String
type XMLAttribute = (Name, Value)
data XML = XMLElement String [XMLAttribute] [XML]
| XMLComment String
| XMLText String
deriving (Eq, Show)
xmlP :: Parser XML
xmlP = xmlElementP <|> xmlCommentP <|> xmlTextP
xmlCommentP :: Parser XML
xmlCommentP = XMLComment <$> betweenP' (litP "<!--") (lexP "-->")
xmlTextP :: Parser XML
xmlTextP = XMLText <$> while1P (/= '<')
xmlElementP :: Parser XML
xmlElementP = do
(n, a) <- openP
c <- manyP xmlP
_ <- closeP n
return $ XMLElement n a c
where
openP = do
n <- lexP "<" *> whileP isLetter <* spacesP
a <- manyP xmlAttributeP
lexP ">"
return (n, a)
closeP n = lexP "</" *> litP n <* spacesP <* lexP ">"
xmlAttributeP :: Parser XMLAttribute
xmlAttributeP = do
n <- nameP <* spacesP
lexP "="
v <- valueP <* spacesP
return (n, v)
where nameP = while1P isLetter
valueP = betweenP (litP "\"") (litP "\\") (litP "\"")
---------------------------------------------------------------------------------
-- Run Parser --
---------------------------------------------------------------------------------
runParser :: Parser a -> String -> (Either String a, String)
runParser (Parser f) = f
execParser :: Parser a -> String -> Either String a
execParser p = fst . runParser p
parse :: Parser a -> String -> Either String a
parse p = execParser (spacesP *> p)
---------------------------------------------------------------------------------
-- Tests --
---------------------------------------------------------------------------------
tests :: IO ()
tests = hspec $
describe "Parser Combinators" $ do
it "shall pass" $ 1 + 2 `shouldBe` 3
it "will parse a char" $ do
runParser (charP 'a') "a" `shouldBe` (Right "a", "")
runParser (charP 'a') "aaa" `shouldBe` (Right "a", "aa")
runParser (charP 'a') "bbb" `shouldBe` (Left "expected `a` given `b`", "bbb")
it "will parse a string" $ do
runParser (litP "aaa") "aaa" `shouldBe` (Right "aaa", "")
execParser (litP "aaa") "aab" `shouldSatisfy` isLeft
it "will parser an integer" $ runParser intP "123aaa" `shouldBe` (Right 123, "aaa")
it "will parse a chemical reference" $ do
execParser referenceP "H2" `shouldBe` Right ("H", 2)
-- execParser referenceP "O" `shouldBe` Right ("O", 1)
-- execParser referenceP "Cl4" `shouldBe` Right ("Cl", 4)
-- execParser formulaP "H2O" `shouldBe` Right [("H", 2), ("O", 1)]
-- execParser referenceP "NaCl4" `shouldBe` Right [("Na", 1), ("Cl", 4)]
it "will parse an XML" $ do
runParser xmlP "<foo></foo>" `shouldBe` (Right (XMLElement "foo" [] []), "")
execParser xmlP "<foo>bar</foo>" `shouldBe` Right (XMLElement "foo" [] [XMLText "bar"])
parse xmlP " <foo > bar bar </ foo>" `shouldBe` Right (XMLElement "foo" [] [XMLText "bar bar "])
parse xmlP "<foo><bar></bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] []])
parse xmlP "<foo><bar>\n</bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] []])
parse xmlP "<foo><bar><!-- woot --></bar></foo>" `shouldBe` Right (XMLElement "foo" [] [XMLElement "bar" [] [XMLComment " woot "]])
parse xmlP "<foo foo=\"bar\" foz=\"baz\"></foo>" `shouldBe` Right (XMLElement "foo" [("foo", "bar"), ("foz", "baz")] [])
@kandros
Copy link

kandros commented Mar 21, 2021

🇮🇹 Per chi arrivasse qui per caso in qualche modo, c'è un video dell'implementazione di questo codice qui.
https://www.youtube.com/watch?v=V8rXQhYwXJc&t=1427s

@gabrielelana
Copy link
Author

gabrielelana commented Mar 21, 2021

La gist è stata aggiornata con la seconda parte che trovate qui. Potete ancora accedere alla versione precedente relativa al primo video con il link "Revisions" che trovate in alto

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment