Created
November 13, 2012 13:47
-
-
Save fedelebron/4065841 to your computer and use it in GitHub Desktop.
Simple LL(1) parser for a sums-and-products language in Haskell.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Map (Map, lookup, fromList) | |
import Data.Maybe | |
import Prelude hiding (lookup) | |
type ParserState s = [s] -- Stack | |
type ParserStateTransformer s = ParserState s -> ParserState s | |
type ParsingTable s = Map (s, s) (ParserStateTransformer s) | |
type ParsingError = String | |
data Parser s = MakeParser (ParsingTable s) (ParserState s) | |
getState :: Parser s -> ParserState s | |
getState (MakeParser _ x) = x | |
parse :: (Ord s, Show s) => ParsingTable s -> [s] -> [s] -> Either ParsingError (Parser s) | |
parse table initial = foldl changeState newParser | |
where | |
changeState mp ptr = do | |
p <- mp | |
let stack = getState p | |
let top = head stack | |
if ptr == top then do | |
makeParser (tail stack) | |
else do | |
case lookup (top, ptr) table of | |
Nothing -> Left ("Expected " ++ (show top) ++ ", found " ++ (show ptr)) | |
Just f -> changeState (makeParser (f stack)) ptr | |
makeParser = return . MakeParser table | |
newParser = makeParser initial | |
data Symbol = T_PLUS | | |
T_MULT | | |
T_VAR | | |
T_OPEN_PAREN | | |
T_CLOSE_PAREN | | |
T_EOF | | |
NT_EXPR | | |
NT_PEXPR | | |
NT_TERM | | |
NT_FACTOR | | |
NT_PTERM deriving (Show, Eq, Ord) | |
toSymbol :: Char -> Symbol | |
toSymbol x = case x of | |
'(' -> T_OPEN_PAREN | |
')' -> T_CLOSE_PAREN | |
'+' -> T_PLUS | |
'*' -> T_MULT | |
_ -> T_VAR | |
toSymbolList :: String -> [Symbol] | |
toSymbolList = (++ [T_EOF]) . (map toSymbol) | |
table :: ParsingTable Symbol | |
table = fromList [((NT_EXPR, T_OPEN_PAREN), f1), | |
((NT_EXPR, T_VAR), f1), | |
((NT_PEXPR, T_PLUS), f2), | |
((NT_PEXPR, T_CLOSE_PAREN), epsilon), | |
((NT_PEXPR, T_EOF), epsilon), | |
((NT_TERM, T_OPEN_PAREN), f3), | |
((NT_TERM, T_VAR), f3), | |
((NT_PTERM, T_PLUS), epsilon), | |
((NT_PTERM, T_MULT), f4), | |
((NT_PTERM, T_CLOSE_PAREN), epsilon), | |
((NT_PTERM, T_EOF), epsilon), | |
((NT_FACTOR, T_OPEN_PAREN), f5), | |
((NT_FACTOR, T_VAR), f6)] | |
where | |
f1 (x:xs) = [NT_TERM, NT_PEXPR] ++ xs | |
f2 (x:xs) = [T_PLUS, NT_TERM, NT_PEXPR] ++ xs | |
f3 (x:xs) = [NT_FACTOR, NT_PTERM] ++ xs | |
f4 (x:xs) = [T_MULT, NT_FACTOR, NT_PTERM] ++ xs | |
f5 (x:xs) = [T_OPEN_PAREN, NT_EXPR, T_CLOSE_PAREN] ++ xs | |
f6 (x:xs) = [T_VAR] ++ xs | |
epsilon = tail | |
syntaxChecker = ((either failure success .) .) . parse | |
where | |
failure = (\x -> "Syntax error: " ++ x) | |
success = (\_ -> "String belongs to the language.") | |
str = toSymbolList "(a+(b*c))" | |
main = putStrLn $ syntaxChecker table [NT_EXPR, T_EOF] str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment