Skip to content

Instantly share code, notes, and snippets.

@ab9rf
Created March 20, 2013 04:48
Show Gist options
  • Save ab9rf/5202374 to your computer and use it in GitHub Desktop.
Save ab9rf/5202374 to your computer and use it in GitHub Desktop.
Grammar mangler. This code is a program I'm working on that I'm using to take the benighted grammar that PHP uses, simplify it, and infer types for nonterminals. I wrote it because I realized that the work I was doing to adapt the grammar was repetitive and tedious, so of course the only logical thing to do was write code to do it. At the moment…
module Main where
import qualified Data.Text as T
import qualified Text.Regex.PCRE.Light as R
import qualified Data.ByteString.Char8 as B
import qualified Data.Map as Map
import Control.Monad.State
import Control.Monad (liftM)
import Data.Ord (comparing)
import Data.List (sortBy, mapAccumL)
import Data.Maybe
mangle t = "PT" ++ (T.unpack (T.concat (map ucfirst pieces)))
where pieces = T.split (=='_') (T.pack t)
ucfirst s = T.append (T.toUpper f) (T.toLower b)
where (f,b) = T.splitAt 1 s
notype "LT_HEREDOC_START" = True
notype "LT_HEREDOC_END" = True
notype _ = False
lextype "LT_VARNAME" = Just "PVVariableName"
lextype "LT_IDENT" = Just "PVIdent"
lextype "LT_VARNAME_IMBED" = Just "PVVariableNameImbed"
lextype "LT_INTEGER" = Just "PVInteger"
lextype "LT_DOUBLE" = Just "PVDouble"
lextype "LT_STRING" = Just "PVString"
lextype "INLINE_HTML" = Just "PVInline"
lextype _ = Nothing
lextypes = ["PVVariableName", "PVIdent", "PVVariableNameImbed",
"PVInteger", "PVDouble", "PVString", "PVInline"]
mangleline curr line = mangleline' curr m1 m2
where r1 = R.compile (B.pack "^(\\w+)\\s*::\\s*\\{.*\\}") []
r2 = R.compile (B.pack "^\\s*[:|]\\s*(.*)") []
m1 = liftM (map $ B.unpack) (R.match r1 line [])
m2 = liftM (map $ B.unpack) (R.match r2 line [])
mangleline' st (Just [_,nonterm]) _ =
(nonterm,[]):st
mangleline' ((nt,l):rst) Nothing (Just [_,prod]) =
(nt,(l ++ [tokens])):rst
where tokens = t' prod
t' "{- empty -}" = []
t' str = map T.unpack (T.words (T.pack str))
mangleline' st Nothing Nothing = st
removeTerminals :: [String] -> [String]
removeTerminals l = filter (\s -> (head s) /= '\'') l
type TypeMap = Map.Map String String
type ProdMap = Map.Map String [[String]]
getType :: ProdMap -> String -> State TypeMap String
getType m nt = do table <- get
t <- case nt `Map.lookup` table of
Just v -> return v
Nothing -> ty p''
put (Map.insert nt t table)
return t
where p = fromJust (nt `Map.lookup` m)
p' = (map removeTerminals p)
p'' = (sortBy (comparing length) p')
ty [] = return "()"
ty [[a]] = do t <- getType m a; return t
ty [[],[nt']] | nt == nt' = do return "Int"; {- self-circular -}
ty [[],[a]] = do t <- getType m a; return ("Maybe " ++ t)
ty [[],[nt',a]] | nt == nt' = do t <- getType m a; return ("[" ++ t ++ "]")
ty _ = return (mangle nt)
makeMap :: Ord a => [(a,b)] -> Map.Map a b
makeMap l = foldl (\m (k,v) -> Map.insert k v m) Map.empty l
main = do m <- f; print m
f = do file <- B.getContents
prods <- return (foldl mangleline [] (B.lines file))
m <- return (makeMap prods)
m' <- return (evalState (mapM (getType m) (Map.keys m)) Map.empty)
return m'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment