Created
October 21, 2014 19:51
-
-
Save anonymous/fc23b362754031c1fdad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- | |
This code parses a subset of JavaScript and detects usages of undeclared variables. | |
It's an example of a Parsec parser; it's by no means complete. | |
This source code primarily uses the do-syntax for writing the parser, | |
but that's not the only way to write code with Parsec. | |
This code was written in a hurry by Joakim Ahnfelt-Rønne during the meetup at 2014-21-10. | |
USAGE: | |
runhaskell Main.hs example.js | |
example.js: | |
function fib(n) { | |
var fib1 = fib(n - 1); | |
var fib2 = fib(n - 2); | |
if(n <= 1) return 1 | |
else return fib1 + fib3 | |
} | |
-} | |
import Text.ParserCombinators.Parsec | |
import Control.Monad | |
import Control.Monad.Error | |
import Control.Monad.Identity | |
import System.IO (readFile) | |
import System.Environment (getArgs) | |
import Data.Set (Set) | |
import qualified Data.Set as Set | |
-- Abstract syntax tree | |
data Term | |
= Function String [String] Term | |
| Call Term [Term] | |
| Return Term | |
| Minus Term Term | |
| Plus Term Term | |
| LessThanOrEqualTo Term Term | |
| If Term Term (Maybe Term) | |
| Number Double | |
| Var String Term | |
| Variable String | |
| Sequence Term Term | |
| Assign Term Term | |
| Undefined | |
deriving Show | |
-- Utility functions | |
keywords :: Set String | |
keywords = Set.fromList ["function", "var", "for", "while", "return", "break", "continue", "if", "else", "switch", "case", "try", "catch", "finally", "throw", "in", "with", "delete", "eval", "instanceof", "void", "new", "do", "null", "true", "false"] | |
skipWhitespace :: Parser () | |
skipWhitespace = many space >> return () | |
-- Terminals (consume whitespace after each token) | |
tokenIdentifier :: Parser String | |
tokenIdentifier = try $ do | |
c <- letter | |
cs <- many alphaNum | |
let x = c:cs | |
when (Set.member x keywords) (unexpected ("keyword '" ++ x ++ "'")) | |
skipWhitespace | |
return x | |
tokenKeyword :: String -> Parser () | |
tokenKeyword keyword = do | |
try $ string keyword | |
skipWhitespace | |
return () | |
tokenNumber :: Parser Term | |
tokenNumber = do | |
ds <- try $ many1 digit | |
skipWhitespace | |
return (Number (read ds)) | |
-- Non-terminals (doesn't mention whitespace) | |
parseCurly :: Parser Term | |
parseCurly = do | |
tokenKeyword "{" | |
statements <- sepBy parseTerm (tokenKeyword ";") | |
tokenKeyword "}" | |
case statements of | |
[] -> return Undefined | |
ss -> return $ | |
let sequence ss = case ss of | |
[s'] -> s' | |
(s':ss') -> Sequence s' (sequence ss') | |
in sequence ss | |
parseVariable :: Parser Term | |
parseVariable = do | |
identifier <- tokenIdentifier | |
return (Variable identifier) | |
parseFunction :: Parser Term | |
parseFunction = do | |
tokenKeyword "function" | |
name <- tokenIdentifier | |
tokenKeyword "(" | |
arguments <- sepBy tokenIdentifier (tokenKeyword ",") | |
tokenKeyword ")" | |
body <- parseCurly | |
return (Function name arguments body) | |
parseIf :: Parser Term | |
parseIf = do | |
tokenKeyword "if" | |
tokenKeyword "(" | |
condition <- parseTerm | |
tokenKeyword ")" | |
thenBranch <- parseCurly <|> parseTerm | |
elseBranchMaybe <- optionMaybe $ do | |
tokenKeyword "else" | |
parseCurly <|> parseTerm | |
return (If condition thenBranch elseBranchMaybe) | |
parseVar :: Parser Term | |
parseVar = do | |
tokenKeyword "var" | |
x <- tokenIdentifier | |
v <- (tokenKeyword "=" >> parseTerm) <|> (return Undefined) | |
return (Var x v) | |
parseReturn :: Parser Term | |
parseReturn = do | |
tokenKeyword "return" | |
body <- parseTerm | |
return (Return body) | |
parseOperator :: [(String, (Term -> Term -> Term))] -> Parser Term -> Parser Term | |
parseOperator keywordAndConstructors inner = do | |
let operators = map (\(k, c) -> tokenKeyword k >> return c) keywordAndConstructors | |
a <- inner | |
constructorsAndRightValues <- many $ do | |
constructor <- choice operators | |
b <- inner | |
return (constructor, b) | |
return (foldl (\x (o, y) -> o x y) a constructorsAndRightValues) | |
parseAtom = parseFunction <|> parseVar <|> parseIf <|> parseReturn <|> parseVariable <|> tokenNumber | |
parseCall = do | |
f <- parseAtom | |
argumentsMaybe <- optionMaybe $ do | |
tokenKeyword "(" | |
arguments <- sepBy1 parseTerm (tokenKeyword ",") | |
tokenKeyword ")" | |
return arguments | |
case argumentsMaybe of | |
Just arguments -> return (Call f arguments) | |
Nothing -> return f | |
parseAdditionOperator = parseOperator [("+", Plus), ("-", Minus)] parseCall | |
parseComparisonOperator = parseOperator [("<=", LessThanOrEqualTo)] parseAdditionOperator | |
parseAssignmentOperator = parseOperator [("=", Assign)] parseComparisonOperator | |
parseTerm = parseAssignmentOperator | |
parseProgram :: Parser Term | |
parseProgram = do | |
skipWhitespace | |
term <- parseTerm | |
eof | |
return term | |
-- Lint: detect undeclared variables | |
type Problem a = ErrorT String Identity a | |
lint :: Set String -> Term -> Problem () | |
lint declared (Function name arguments e1) = do | |
let declared' = Set.union (Set.union (Set.singleton name) (Set.fromList arguments)) declared | |
lint declared' e1 | |
lint declared (Call e1 es) = do | |
lint declared e1 | |
mapM_ (lint declared) es | |
lint declared (Return e1) = lint declared e1 | |
lint declared (Minus e1 e2) = do | |
lint declared e1 | |
lint declared e2 | |
lint declared (Plus e1 e2) = do | |
lint declared e1 | |
lint declared e2 | |
lint declared (LessThanOrEqualTo e1 e2) = do | |
lint declared e1 | |
lint declared e2 | |
lint declared (If e1 e2 e3) = do | |
lint declared e1 | |
lint declared e2 | |
case e3 of | |
Just e -> lint declared e | |
Nothing -> return () | |
lint _ (Number _) = return () | |
lint declared (Var x e1) = do | |
lint declared e1 | |
lint declared (Variable x) = do | |
when (not (Set.member x declared)) $ fail ("Not declared: " ++ x) | |
lint declared (Sequence (Var x e1) e2) = do | |
lint declared e1 | |
let declared' = Set.union (Set.singleton x) declared | |
lint declared' e2 | |
lint declared (Sequence e1 e2) = do | |
lint declared e1 | |
lint declared e2 | |
lint declared (Assign e1 e2) = do | |
lint declared e1 | |
lint declared e2 | |
lint _ Undefined = return () | |
-- Main function that parses a file, prints the syntax tree and detects undeclared variables | |
main = do | |
[fileName] <- getArgs | |
input <- readFile fileName | |
case parse parseProgram fileName input of | |
Left e -> putStrLn ("Error: " ++ show e) | |
Right e -> do | |
putStrLn (show e) | |
let problem = runIdentity (runErrorT (lint Set.empty e)) | |
case problem of | |
Left p -> putStrLn p | |
Right () -> putStrLn "No problems!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment