Created
May 6, 2018 08:19
-
-
Save CMCDragonkai/1d46c0860d6bce857d516a35fab23d52 to your computer and use it in GitHub Desktop.
Using Megaparsec on the WHILE Language #haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module While where | |
import Data.Void (Void) | |
import Text.Megaparsec ((<|>)) | |
import qualified Text.Megaparsec as Parsec | |
import qualified Text.Megaparsec.Char as CharParser | |
import qualified Text.Megaparsec.Char.Lexer as Lexer | |
import qualified Text.Megaparsec.Expr as ExprParser | |
{- | |
WHILE Language Grammar | |
a ::= x | n | - a | a opa a | |
b ::= true | false | not b | b opb b | a opr a | |
opa ::= + | - | * | / | |
opb ::= and | or | |
opr ::= > | < | |
S ::= x := a | skip | S1; S2 | ( S ) | if b then S1 else S2 | while b do S | |
-} | |
-- in haskell we represent the abstract syntax tree | |
-- or BNF grammar as algebraic data types | |
data ArithExpr = Var String | |
| IntLit Integer | |
| Neg ArithExpr | |
| ArithBinary ArithOp ArithExpr ArithExpr | |
deriving (Show) | |
data BoolExpr = BoolLit Bool | |
| Not BoolExpr | |
| BoolBinary BoolOp BoolExpr BoolExpr | |
| RelBinary RelOp ArithExpr ArithExpr | |
deriving (Show) | |
data ArithOp = Add | |
| Subtract | |
| Multiply | |
| Divide | |
deriving (Show) | |
data BoolOp = And | |
| Or | |
deriving (Show) | |
data RelOp = Greater | |
| Less | |
deriving (Show) | |
data Stmt = Seq [Stmt] | |
| Assign String ArithExpr | |
| If BoolExpr Stmt | |
Stmt | |
| While BoolExpr Stmt | |
| Skip | |
deriving (Show) | |
-- there is little separation between lexer and parser here | |
-- the operations of lexer and parser is interleaved | |
-- a parsing function at the very end calls lexing functions | |
-- you can see it through the usage of the `lexeme` function | |
-- by the `reserved` and `identifier` parser | |
-- the parser can always do the job of the lexer | |
-- however usually, the 2 are separated | |
-- so that the lexer can reduce the amount of information noise | |
-- the parser has to deal with | |
-- using Haskell megaparsec means you get flexibility | |
-- and with functional abstraction, the parsers | |
-- are not dealing with the low level noise | |
-- the "lexer" are just lower level parsers | |
-- so this is parser abstraction! | |
-- reduces the amount of things you need to deal with | |
-- also this can do single pass parsing | |
-- in terms of building up a symbol table | |
-- this can also be done by layering the monad | |
-- instead of just maintaining the state of the input stream | |
-- we can also maintain the state of a symbol table | |
type Parser = Parsec.Parsec Void String | |
-- here we have the "low level" lexical parsers | |
space :: Parser () | |
space = Lexer.space CharParser.space1 lineComment blockComment | |
where | |
lineComment = Lexer.skipLineComment "//" | |
blockComment = Lexer.skipBlockComment "/*" "*/" | |
symbol :: String -> Parser String | |
symbol = Lexer.symbol space | |
lexeme :: Parser a -> Parser a | |
lexeme = Lexer.lexeme space | |
parens :: Parser a -> Parser a | |
parens = Parsec.between (symbol "(") (symbol ")") | |
integer :: Parser Integer | |
integer = lexeme Lexer.decimal | |
semicolon :: Parser String | |
semicolon = symbol ";" | |
-- here we have the "mid level" parsers | |
-- dealing with more "composite" syntax | |
reserved :: String -> Parser () | |
reserved w = | |
(lexeme . Parsec.try) | |
(CharParser.string w *> | |
Parsec.notFollowedBy CharParser.alphaNumChar) | |
identifier :: Parser String | |
identifier = (lexeme . Parsec.try) (p >>= check) | |
where | |
p = (:) <$> CharParser.letterChar <*> Parsec.many CharParser.alphaNumChar | |
check x = if x `elem` reservedWords | |
then fail $ "keyword " ++ show x ++ " cannot be an identifier" | |
else return x | |
reservedWords = | |
[ | |
"if", | |
"then", | |
"else", | |
"while", | |
"do", | |
"skip", | |
"true", | |
"false", | |
"not", | |
"and", | |
"or" | |
] | |
-- here we have the "high level" parsers | |
-- doing double duty in doing "code generation" | |
-- they are not just parsing strings/integers | |
-- they are not parsing __into__ the abstract syntax tree | |
stmt :: Parser Stmt | |
stmt = f <$> Parsec.sepBy1 stmt' semicolon | |
where | |
f l = if length l == 1 then head l else Seq l | |
stmt' = ifStmt <|> | |
whileStmt <|> | |
skipStmt <|> | |
assignStmt <|> | |
parens stmt | |
ifStmt :: Parser Stmt | |
ifStmt = do | |
reserved "if" | |
condition <- boolExpr | |
reserved "then" | |
stmt1 <- stmt | |
reserved "else" | |
stmt2 <- stmt | |
return (If condition stmt1 stmt2) | |
whileStmt :: Parser Stmt | |
whileStmt = do | |
reserved "while" | |
cond <- boolExpr | |
reserved "do" | |
stmt1 <- stmt | |
return (While cond stmt1) | |
assignStmt :: Parser Stmt | |
assignStmt = do | |
var <- identifier | |
symbol ":=" | |
expr <- arithExpr | |
return (Assign var expr) | |
skipStmt :: Parser Stmt | |
skipStmt = Skip <$ reserved "skip" | |
boolExpr :: Parser BoolExpr | |
boolExpr = ExprParser.makeExprParser boolTerm boolOperators | |
boolTerm :: Parser BoolExpr | |
boolTerm = parens boolExpr <|> | |
(BoolLit True <$ reserved "true") <|> | |
(BoolLit False <$ reserved "false") <|> | |
relExpr | |
boolOperators :: [[ExprParser.Operator Parser BoolExpr]] | |
boolOperators = | |
[ | |
[ExprParser.Prefix (Not <$ reserved "not")], | |
[ | |
ExprParser.InfixL (BoolBinary And <$ reserved "and"), | |
ExprParser.InfixL (BoolBinary Or <$ reserved "or") | |
] | |
] | |
arithExpr :: Parser ArithExpr | |
arithExpr = ExprParser.makeExprParser arithTerm arithOperators | |
arithTerm :: Parser ArithExpr | |
arithTerm = parens arithExpr <|> | |
Var <$> identifier <|> | |
IntLit <$> integer | |
arithOperators :: [[ExprParser.Operator Parser ArithExpr]] | |
arithOperators = | |
[ | |
[ExprParser.Prefix (Neg <$ symbol "-")], | |
[ | |
ExprParser.InfixL (ArithBinary Multiply <$ symbol "*"), | |
ExprParser.InfixL (ArithBinary Divide <$ symbol "/") | |
], | |
[ | |
ExprParser.InfixL (ArithBinary Add <$ symbol "+"), | |
ExprParser.InfixL (ArithBinary Subtract <$ symbol "-") | |
] | |
] | |
relExpr :: Parser BoolExpr | |
relExpr = do | |
a1 <- arithExpr | |
op <- relOperators | |
a2 <- arithExpr | |
return (RelBinary op a1 a2) | |
relOperators :: Parser RelOp | |
relOperators = (symbol ">" *> pure Greater) <|> | |
(symbol "<" *> pure Less) | |
-- here we have the parser entry point | |
-- this will parse into our WHILE language AST | |
whileParser :: Parser Stmt | |
whileParser = Parsec.between space Parsec.eof stmt | |
-- some tests of our parser | |
main :: IO () | |
main = do | |
ast1 <- Parsec.parseTest whileParser | |
"a := 3" | |
ast2 <- Parsec.parseTest whileParser | |
"\ | |
\a:= 1;\ | |
\while a < 4 do\ | |
\ a := 5;\ | |
\ if 4 < 3 then\ | |
\ skip\ | |
\ else\ | |
\ (b := 4)\ | |
\" | |
print ast1 | |
print ast2 | |
{- | |
Assign "a" (IntLit 3) | |
Seq [Assign "a" (IntLit 1),While (RelBinary Less (Var "a") (IntLit 4)) (Seq [Assign "a" (IntLit 5),If (RelBinary Less (IntLit 4) (IntLit 3)) Skip (Assign "b" (IntLit 4))])] | |
-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Perhaps you can answer this question?