-
-
Save kamil-adam/bc9f146d73fd16d9b87ee55660c0ad32 to your computer and use it in GitHub Desktop.
C-like language parser in Haskell.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser(parseProgram) where | |
import Text.Parsec.Expr | |
import Text.ParserCombinators.Parsec | |
import Text.ParserCombinators.Parsec.Error | |
import qualified Text.ParserCombinators.Parsec.Token as P | |
import Text.ParserCombinators.Parsec.Language | |
import Control.Monad | |
import Tokens | |
def = emptyDef { | |
commentStart = "/*", commentEnd = "*/", commentLine = "//", | |
identStart = lower, | |
identLetter = alphaNum <|> (char '_'), | |
reservedOpNames = [ | |
"++", "--", | |
"+", "-", "*", "/", "%", | |
"<=", "<", ">=", ">", "==", "!=", | |
"&&", "||", "!", | |
"=", "?", ":", ",", ".", "->" ], | |
reservedNames = [ | |
"true", "false", | |
"if", "then", "else", "while", "for", "return", "sizeof", "def" ], | |
caseSensitive = True | |
} | |
isReserved name | |
= scan (reservedNames def) | |
where | |
scan [] = False | |
scan (r:rs) = case (compare r name) of | |
LT -> scan rs | |
EQ -> True | |
GT -> False | |
lexer = P.makeTokenParser def | |
parens = P.parens lexer | |
braces = P.braces lexer | |
brackets = P.brackets lexer | |
identifier = P.identifier lexer | |
parseType = do { | |
x <- try ( do{ c <- upper; cs <- many (P.identLetter def); return (c:cs) } <?> "identifier" ); | |
whiteSpace; | |
case x of | |
"Int" -> return TInt | |
"Float" -> return TFloat | |
"Bool" -> return TBool | |
"String"-> return TString | |
"Auto" -> return TAuto | |
"Void" -> return TVoid | |
name -> return $ TUser name } | |
--reservedOp = P.reservedOp lexer | |
reservedOp name = do { try (string name); whiteSpace; } | |
reserved = P.reserved lexer | |
semiSep1 = P.semiSep1 lexer | |
semi = P.semi lexer | |
integer = P.integer lexer | |
naturalOrFloat = P.naturalOrFloat lexer | |
stringLiteral = P.stringLiteral lexer | |
whiteSpace = P.whiteSpace lexer | |
parseCondition ex = do | |
reservedOp "?" | |
a <- parseExpr | |
reservedOp ":" | |
b <- parseExpr | |
return $ ECondition ex a b | |
parseExpr :: Parser Expr | |
parseExpr = do | |
ex <- buildExpressionParser exprOps exprTerms <?> "expression" | |
try (parseCondition ex) <|> return ex | |
parseExprList = sepBy parseExpr (reservedOp ",") | |
exprOps = [ | |
[Postfix ( reservedOp "++" >> return (EUnaryOp "_++")) ], | |
[Postfix ( reservedOp "--" >> return (EUnaryOp "_--")) ], | |
[Postfix ( brackets (do idx <- parseExpr; return (\e -> EAccess e idx)) )], | |
[Postfix ( do | |
reservedOp "."; | |
name <- identifier; | |
( -- TODO parsec ma problemy ze zlozeniem operatorow tego samego typu | |
try ( do -- a.b.c | |
reservedOp "."; | |
name1 <- identifier; | |
( | |
try ( do --a.b.c.d | |
reservedOp "."; | |
name2 <- identifier; | |
return (\e -> EMember name2 $ EMember name1 $ EMember name e) ) | |
<|> ( return (\e -> EMember name1 $ EMember name e) ) ) ) | |
<|> ( return $ EMember name) ) | |
) ], | |
[Prefix ( reservedOp "++" >> return (EUnaryOp "++_")) ], | |
[Prefix ( reservedOp "-" >> return (EUnaryOp "-" ))], | |
[Prefix ( reservedOp "--" >> return (EUnaryOp "--_")) ], | |
[Prefix (do ttype <- try (parens parseComplexType); return $ EConvert ttype)], | |
[Prefix (reservedOp "!" >> return (EUnaryOp "!"))], | |
[Prefix (reservedOp "*" >> return (EUnaryOp "*"))], | |
[Prefix (reservedOp "&" >> return (EUnaryOp "&"))], | |
[Infix (reservedOp "*" >> return (EBinaryOp "*")) AssocLeft], | |
[Infix (reservedOp "/" >> return (EBinaryOp "/")) AssocLeft], | |
[Infix (reservedOp "%" >> return (EBinaryOp "%")) AssocLeft], | |
[Infix (reservedOp "+" >> return (EBinaryOp "+")) AssocLeft], | |
[Infix (reservedOp "-" >> return (EBinaryOp "-")) AssocLeft], | |
[Infix (reservedOp ">=" >> return (EBinaryOp ">=")) AssocLeft], | |
[Infix (reservedOp "<=" >> return (EBinaryOp "<=")) AssocLeft], | |
[Infix (reservedOp ">" >> return (EBinaryOp ">" )) AssocLeft], | |
[Infix (reservedOp "<" >> return (EBinaryOp "<" )) AssocLeft], | |
[Infix (reservedOp "==" >> return (EBinaryOp "==")) AssocLeft], | |
[Infix (reservedOp "!=" >> return (EBinaryOp "!=")) AssocLeft], | |
[Infix (reservedOp "||" >> return (EBinaryOp "||")) AssocLeft], | |
[Infix (reservedOp "&&" >> return (EBinaryOp "&&")) AssocLeft] ] | |
exprTerms = parens parseExpr | |
<|> ( do | |
id <- identifier | |
try ( do | |
list <- (parens $ sepBy parseExpr $ reservedOp ",") | |
return $ EFuncCall id list ) | |
<|> return (EIdent id) ) | |
<|> ( do | |
x <- naturalOrFloat | |
case x of | |
Left v -> return $ EInt v | |
Right v -> return $ EFloat v ) | |
<|> liftM EString stringLiteral | |
<|> (reserved "true" >> return (EBool True)) | |
<|> (reserved "false" >> return (EBool False)) | |
parseStmt :: Parser Stmt | |
parseStmt = parseIf <|> parseWhile <|> parseReturn <|> parseSeq <|> parseDecl <|> parseSExpression <|> parseNop | |
parsePointer_ [] base = base | |
parsePointer_ (h : t) base = parsePointer_ t (TPointer base) | |
parsePointer base = do | |
list <- many $ reservedOp "*" | |
return $ parsePointer_ list base | |
parseTypeWithPointer = do | |
ttype0 <- parseType | |
parsePointer ttype0 | |
parseComplexType = do | |
ttype0 <- parseType | |
ttype1 <- parseArray ttype0 | |
parsePointer ttype1 | |
parseArray base = do | |
try ( do size <- brackets integer; parseArray $ TArray base size ) <|> return base | |
parseDecl :: Parser Stmt | |
parseDecl = do | |
dtype <- parseType | |
list <- sepBy ( | |
try ( do | |
ttype0 <- parsePointer dtype | |
name <- identifier | |
ttype <- parseArray ttype0 | |
reservedOp "=" | |
expr <- parseExpr | |
return (ttype, name, Just expr) ) | |
<|> ( do | |
ttype0 <- parsePointer dtype | |
name <- identifier | |
ttype <- parseArray ttype0 | |
return (ttype, name, Nothing) ) ) | |
(reservedOp ",") | |
semi | |
return $ SDecl list | |
parseSExpression :: Parser Stmt | |
parseSExpression = do | |
expr <- parseExpr | |
( do | |
try ( reservedOp "=" ) | |
expr2 <- parseExpr | |
semi | |
return $ SAssign expr expr2 ) <|> ( semi >> return (SExpression expr) ) | |
parseReturn :: Parser Stmt | |
parseReturn = do | |
reserved "return" | |
expr <- parseExpr | |
semi | |
return $ SReturn expr | |
parseIf :: Parser Stmt | |
parseIf = do | |
reserved "if" | |
cond <- (parens parseExpr) | |
s1 <- parseStmt | |
s2 <- ( try (reserved "else" >> parseStmt) <|> (do return SNop) ) | |
return $ SIf cond s1 s2 | |
parseWhile :: Parser Stmt | |
parseWhile = do | |
reserved "while" | |
cond <- (parens parseExpr) | |
st <- parseStmt | |
return $ SWhile cond st | |
parseSeq :: Parser Stmt | |
parseSeq = braces $ do | |
list <- (many parseStmt) | |
return $ if length list == 0 then SNop else SSeq list | |
parseNop :: Parser Stmt | |
parseNop = semi >> return SNop | |
parseFuncArgs :: Parser [ (Type, String) ] | |
parseFuncArgs = do | |
sepBy ( do | |
try ( do | |
ttype0 <- parseTypeWithPointer | |
name <- identifier | |
ttype <- parseArray ttype0 | |
return (ttype, name) ) | |
<|> ( do | |
name <- identifier | |
return (TAuto, name) ) ) | |
(reservedOp ",") | |
parseStructMembers :: Parser [ (Type, String) ] | |
parseStructMembers = do | |
lists <- many ( do | |
dtype <- parseType | |
list <- sepBy ( do { | |
ttype0 <- parsePointer dtype; | |
name <- identifier; | |
ttype <- parseArray ttype0; | |
return (ttype, name) } ) | |
(reservedOp ",") | |
semi | |
return list ) | |
return $ foldl (++) [] lists | |
parseFunc :: Parser Definition | |
parseFunc = do | |
reserved "def" | |
name <- identifier | |
args <- (parens parseFuncArgs) | |
(rtype, stmt) <- ( do | |
try ( do | |
(reservedOp "->"); | |
rtype0 <- parseTypeWithPointer; | |
rtype <- parseArray rtype0; | |
stmt <- parseSeq; | |
return (rtype, stmt) ) | |
<|> ( do stmt <- parseSeq; return (TAuto, stmt) ) ) | |
case stmt of | |
SSeq _ -> return $ DFunc name rtype args stmt | |
_ -> return $ DFunc name rtype args (SSeq [stmt]) | |
parseStruct :: Parser Definition | |
parseStruct = do | |
reserved "struct" | |
tname <- parseType | |
members <- (braces parseStructMembers) | |
semi | |
case tname of | |
TUser name -> return $ DStruct name members | |
_ -> unexpected $ show tname | |
parseDefinition :: Parser Definition | |
parseDefinition = parseFunc <|> parseStruct | |
parseProgram input = parse (whiteSpace >> many parseDefinition) "myparser" input | |
--TODO sprawdzanie wielkosci tablicy |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment