Skip to content

Instantly share code, notes, and snippets.

@kputnam
Created May 22, 2013 07:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kputnam/5625856 to your computer and use it in GitHub Desktop.
Save kputnam/5625856 to your computer and use it in GitHub Desktop.
AST, parser, and pretty printer for JS-like language
{-# LANGUAGE OverloadedStrings #-}
module Example
( Term(..)
, pretty
, parse
, testReparse
, normalize
) where
import Prelude hiding (takeWhile)
import Data.Text (Text, pack)
import Data.Monoid ((<>))
-- Used for parsing
import Data.Attoparsec.Expr
import Control.Applicative ((<$>), (<|>), (*>), (<*), (<*>))
import Data.Attoparsec.Text (char, decimal, inClass, string, takeWhile,
takeWhile1, parseOnly, skipSpace)
-- Used for testing
import Test.QuickCheck
data Term
= Var Text
| Num Int
| Str Text -- "..."
| Grp Term -- (a)
| Dot Term Term -- a.b
| Get Term Term -- a[b]
| New Term -- new a
| App Term Term -- f(x)
| Inc Term -- a ++
| Dec Term -- a --
| Not Term -- !a
| Pos Term -- +a
| Neg Term -- -a
| Exp Term Term -- a ** b
| Mul Term Term -- a * b
| Div Term Term -- a / b
| Mod Term Term -- a % b
| Add Term Term -- a + b
| Sub Term Term -- a - b
| Shl Term Term -- a << b
| Shr Term Term -- a >> b
| Gt Term Term -- a > b
| Lt Term Term -- a < b
| Gte Term Term -- a >= b
| Lte Term Term -- a <= b
| Ne Term Term -- a != b
| Eq Term Term -- a == b
| Bnd Term Term -- a & b
| Bxo Term Term -- a ^ b
| Bor Term Term -- a | b
| Lnd Term Term -- a && b
| Lor Term Term -- a || b
| Lxo Term Term -- a ^^ b
| Set Term Term -- a := b
deriving (Show, Eq)
-- Printing
--------------------------------------------------------------------------------
pretty :: Term -> Text
pretty = walk 100
where
walk :: Int -> Term -> Text
walk _ (Var x) = x
walk _ (Num n) = pack (show n)
walk _ (Str s) = pack (show s)
walk p (Grp e) = paren p e
walk p e@(Dot a b) = lefty p e a "." b
walk p e@(Get a b)
| p < q = paren q e
| otherwise = walk q a <> bracket 100 b
where q = prec e
walk p e@(New a) = prefix p e "new " a
walk p e@(App a b) = lefty p e a "@" b
walk p e@(Inc a) = postfix p e a " ++"
walk p e@(Dec a) = postfix p e a " --"
walk p e@(Not a) = prefix p e "!" a
walk p e@(Pos a) = prefix p e "+" a
walk p e@(Neg a) = prefix p e "-" a
walk p e@(Exp a b) = righty p e a " ** " b
walk p e@(Mul a b) = lefty p e a " * " b
walk p e@(Div a b) = lefty p e a " / " b
walk p e@(Mod a b) = lefty p e a " % " b
walk p (Add a b@(Pos _))
= walk p (Add a (Grp b))
walk p e@(Add a b) = lefty p e a " + " b
walk p (Sub a b@(Neg _))
= walk p (Sub a (Grp b))
walk p e@(Sub a b) = lefty p e a " - " b
walk p e@(Shl a b) = lefty p e a " << " b
walk p e@(Shr a b) = lefty p e a " >> " b
walk p e@(Ne a b) = lefty p e a " != " b
walk p e@(Eq a b) = lefty p e a " == " b
walk p e@(Gt a b) = lefty p e a " > " b
walk p e@(Lt a b) = lefty p e a " < " b
walk p e@(Gte a b) = lefty p e a " >= " b
walk p e@(Lte a b) = lefty p e a " <= " b
walk p e@(Bnd a b) = lefty p e a " & " b
walk p e@(Bxo a b) = lefty p e a " ^ " b
walk p e@(Bor a b) = lefty p e a " | " b
walk p e@(Lnd a b) = lefty p e a " && " b
walk p e@(Lxo a b) = lefty p e a " ^^ " b
walk p e@(Lor a b) = lefty p e a " || " b
walk p e@(Set a b) = righty p e a " := " b
paren :: Int -> Term -> Text
paren p e = "(" <> walk p e <> ")"
bracket :: Int -> Term -> Text
bracket p e = "[" <> walk p e <> "]"
prefix :: Int -> Term -> Text -> Term -> Text
prefix p e op a
| p < q = paren q e
| otherwise = op <> special q a
where q = prec e
postfix :: Int -> Term -> Term -> Text -> Text
postfix p e a op
| p < q = paren q e
| otherwise = special q a <> op
where q = prec e
lefty :: Int -> Term -> Term -> Text -> Term -> Text
lefty p e a op b
| p < q = paren q e
| otherwise = walk q a <> op <> special q b
where q = prec e
righty :: Int -> Term -> Term -> Text -> Term -> Text
righty p e a op b
| p < q = paren q e
| otherwise = special q a <> op <> walk q b
where q = prec e
special :: Int -> Term -> Text
special p e
| p == prec e = paren p e
| otherwise = walk p e
prec :: Term -> Int
prec (Var _) = 0
prec (Num _) = 0
prec (Str _) = 0
prec (Grp _) = 0
prec (Dot _ _) = 1
prec (Get _ _) = 1
prec (New _) = 2
prec (App _ _) = 3
prec (Inc _) = 4
prec (Dec _) = 4
prec (Exp _ _) = 5
prec (Not _) = 6
prec (Pos _) = 6
prec (Neg _) = 6
prec (Mul _ _) = 7
prec (Div _ _) = 7
prec (Mod _ _) = 7
prec (Add _ _) = 8
prec (Sub _ _) = 8
prec (Shl _ _) = 9
prec (Shr _ _) = 9
prec (Gt _ _) = 10
prec (Lt _ _) = 10
prec (Gte _ _) = 10
prec (Lte _ _) = 10
prec (Ne _ _) = 11
prec (Eq _ _) = 11
prec (Bnd _ _) = 12
prec (Bxo _ _) = 13
prec (Bor _ _) = 14
prec (Lnd _ _) = 15
prec (Lxo _ _) = 16
prec (Lor _ _) = 17
prec (Set _ _) = 18
-- Parsing
--------------------------------------------------------------------------------
parse :: Text -> Term
parse t = either error id (parseOnly expr t)
where
expr = buildExpressionParser table term
term = next =<< trim (paren expr <|> num <|> var <|> str)
next e = (next =<< Get e <$> bracket expr) <|> return e
num = Num <$> decimal
var = Var <$> takeWhile1 (inClass "a-zA-Z")
str = Str <$> (char '"' *> takeWhile (inClass "a-zA-Z0-9 ") <* char '"')
table = [[ Infix (op "." Dot) AssocLeft ]
,[ Prefix (op "new " New) ]
,[ Infix (op "@" App) AssocLeft ]
,[ Postfix (op "++" Inc)
, Postfix (op "--" Dec) ]
,[ Infix (op "**" Exp) AssocRight ]
,[ Prefix (op "!" Not)
, Prefix (op "+" Pos)
, Prefix (op "-" Neg) ]
,[ Infix (op "*" Mul) AssocLeft
, Infix (op "/" Div) AssocLeft
, Infix (op "%" Mod) AssocLeft ]
,[ Infix (op "+" Add) AssocLeft
, Infix (op "-" Sub) AssocLeft ]
,[ Infix (op "<<" Shl) AssocLeft
, Infix (op ">>" Shr) AssocLeft ]
,[ Infix (op ">=" Gte) AssocLeft
, Infix (op "<=" Lte) AssocLeft
, Infix (op ">" Gt) AssocLeft
, Infix (op "<" Lt) AssocLeft ]
,[ Infix (op "!=" Ne) AssocLeft
, Infix (op "==" Eq) AssocLeft ]
,[ Infix (op "&" Bnd) AssocLeft ]
,[ Infix (op "^" Bxo) AssocLeft ]
,[ Infix (op "|" Bor) AssocLeft ]
,[ Infix (op "&&" Lnd) AssocLeft ]
,[ Infix (op "^^" Lxo) AssocLeft ]
,[ Infix (op "||" Lor) AssocLeft ]
,[ Infix (op ":=" Set) AssocRight ]]
op s ctor = trim (string s) *> return ctor
trim p = skipSpace *> p <* skipSpace
paren p = char '(' *> p <* char ')'
bracket p = char '[' *> p <* char ']'
-- Testing
--------------------------------------------------------------------------------
normalize :: Term -> Term
normalize = walk
where
walk (Var x) = Var x
walk (Num n) = Num n
walk (Str s) = Str s
walk (Grp e) = e
walk (Dot a b) = Dot (walk a) (walk b)
walk (Get a b) = Get (walk a) (walk b)
walk (New a) = New (walk a)
walk (App a b) = App (walk a) (walk b)
walk (Inc a) = Inc (walk a)
walk (Dec a) = Dec (walk a)
walk (Not a) = Not (walk a)
walk (Pos a) = Pos (walk a)
walk (Neg a) = Neg (walk a)
walk (Exp a b) = Exp (walk a) (walk b)
walk (Mul a (Mul b c))
= Mul (Mul (walk a) (walk b)) (walk c)
walk (Mul a b) = Mul (walk a) (walk b)
walk (Div a b) = Div (walk a) (walk b)
walk (Mod a b) = Mod (walk a) (walk b)
walk (Add a (Add b c))
= Add (Add (walk a) (walk b)) (walk c)
walk (Add a b) = Add (walk a) (walk b)
walk (Sub a b) = Sub (walk a) (walk b)
walk (Shl a b) = Shl (walk a) (walk b)
walk (Shr a b) = Shr (walk a) (walk b)
walk (Ne a b) = Ne (walk a) (walk b)
walk (Eq a b) = Eq (walk a) (walk b)
walk (Gt a b) = Gt (walk a) (walk b)
walk (Lt a b) = Lt (walk a) (walk b)
walk (Gte a b) = Gte (walk a) (walk b)
walk (Lte a b) = Lte (walk a) (walk b)
walk (Bnd a (Bnd b c))
= Add (Add (walk a) (walk b)) (walk c)
walk (Bnd a b) = Bnd (walk a) (walk b)
walk (Bxo a (Bxo b c))
= Bxo (Bxo (walk a) (walk b)) (walk c)
walk (Bxo a b) = Bxo (walk a) (walk b)
walk (Bor a (Bor b c))
= Bor (Bor (walk a) (walk b)) (walk c)
walk (Bor a b) = Bor (walk a) (walk b)
walk (Lnd a (Lnd b c))
= Lnd (Lnd (walk a) (walk b)) (walk c)
walk (Lnd a b) = Lnd (walk a) (walk b)
walk (Lxo a (Lxo b c))
= Lxo (Lxo (walk a) (walk b)) (walk c)
walk (Lxo a b) = Lxo (walk a) (walk b)
walk (Lor a (Lor b c))
= Lor (Lor (walk a) (walk b)) (walk c)
walk (Lor a b) = Lor (walk a) (walk b)
walk (Set a b) = Set (walk a) (walk b)
newtype ChrName = ChrName { getChrName :: Char }
newtype VarName = VarName { getVarName :: Text }
newtype StrText = StrText { getStrText :: Text }
instance Arbitrary ChrName where
arbitrary = ChrName <$> oneof [choose ('A', 'Z'), choose ('a', 'z')]
instance Arbitrary VarName where
arbitrary = VarName <$> pack <$> map getChrName <$> arbitrary `suchThat` (not . null)
instance Arbitrary StrText where
arbitrary = StrText <$> pack <$> map getChrName <$> arbitrary
instance Arbitrary Term where
arbitrary = frequency [(15, Var <$> getVarName <$> arbitrary)
,(15, Num <$> arbitrary `suchThat` (>= 0))
,( 2, Str <$> getStrText <$> arbitrary)
,( 1, New <$> arbitrary)
,( 4, Get <$> arbitrary <*> arbitrary)
,( 3, App <$> arbitrary <*> arbitrary)
,( 3, Inc <$> arbitrary)
,( 3, Dec <$> arbitrary)
,( 3, Not <$> arbitrary)
,( 3, Pos <$> arbitrary)
,( 3, Neg <$> arbitrary)
,( 1, Exp <$> arbitrary <*> arbitrary)
,( 1, Mul <$> arbitrary <*> arbitrary)
,( 1, Div <$> arbitrary <*> arbitrary)
,( 1, Mod <$> arbitrary <*> arbitrary)
,( 1, Add <$> arbitrary <*> arbitrary)
,( 1, Sub <$> arbitrary <*> arbitrary)
,( 1, Shl <$> arbitrary <*> arbitrary)
,( 1, Shr <$> arbitrary <*> arbitrary)
,( 1, Ne <$> arbitrary <*> arbitrary)
,( 1, Eq <$> arbitrary <*> arbitrary)
,( 1, Gt <$> arbitrary <*> arbitrary)
,( 1, Lt <$> arbitrary <*> arbitrary)
,( 1, Gte <$> arbitrary <*> arbitrary)
,( 1, Lte <$> arbitrary <*> arbitrary)
,( 1, Bnd <$> arbitrary <*> arbitrary)
,( 1, Bxo <$> arbitrary <*> arbitrary)
,( 1, Bor <$> arbitrary <*> arbitrary)
,( 1, Lnd <$> arbitrary <*> arbitrary)
,( 1, Lxo <$> arbitrary <*> arbitrary)
,( 1, Lor <$> arbitrary <*> arbitrary)
,( 1, Set <$> arbitrary <*> arbitrary)]
-- parse . pretty == id
testReparse :: Term -> Bool
testReparse ast = ast == parse (pretty ast)
-- Run the test: quickCheck testReparse
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment