Skip to content

Instantly share code, notes, and snippets.

@heath
Created March 21, 2013 02:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heath/5210339 to your computer and use it in GitHub Desktop.
Save heath/5210339 to your computer and use it in GitHub Desktop.
HAML Parsers in Haskell
module Main where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Language
import Text.ParserCombinators.Parsec.Pos
import qualified Text.ParserCombinators.Parsec.Token as T
import Data.Char
import Data.List
import Data.Maybe
import System.IO.Unsafe
main = do s <- getContents
case (parse mainParser "stdin" s) of
Left err -> putStrLn "Error: " >> print err
Right hs -> putStrLn hs
-- Try to parse HAML, otherwise re-output raw lines
mainParser = do whiteSpace
ls <- many1 (hamlCode <|> tilEOL)
return $ unlines ls
--
-- * HAML lexer
--
hamlLexer = T.makeTokenParser emptyDef
whiteSpace= T.whiteSpace hamlLexer
lexeme = T.lexeme hamlLexer
symbol = T.symbol hamlLexer
natural = T.natural hamlLexer
parens = T.parens hamlLexer
semi = T.semi hamlLexer
squares = T.squares hamlLexer
stringLiteral= T.stringLiteral hamlLexer
identifier= T.identifier hamlLexer
reserved = T.reserved hamlLexer
reservedOp= T.reservedOp hamlLexer
commaSep1 = T.commaSep1 hamlLexer
--
-- * Main HAML parsers
--
-- hamlCode is just many identifiers followed by = followed by a hamlBlock
-- f a b c = %somehaml
hamlCode = try ( do is <- many1 identifier
symbol "="
currentPos <- getPosition
x <- manyTill1
(lexeme $ hamlBlock)
(notSameIndent currentPos)
return $ (concat $ intersperse " " is) ++
" = \n" ++
(concat $ (intersperse (indent currentPos ++ "+++\n") $ filter (not . null) $ x))
)
-- A Block may start with some whitespace, then has a valid bit of data
hamlBlock = do currentPos <- getPosition
bs <- manyTill1
(pTag <|> pText)
(notSameIndent currentPos)
return $ intercalate (indent currentPos ++ "+++\n") bs
pTag = do currentPos <- getPosition
try
(do t <- lexeme tagParser
ts <- (isInline currentPos >> char '/' >> return []) <|>
(hamlBlock)
return $ intercalate "\n" $ filter (not . null) $
[ (indent currentPos) ++ "((" ++ (if (null ts) then "i" else "") ++ t ++ ")"
, if null ts then [] else ts
, (indent currentPos) ++ ")\n"]
)
pText = lexeme stringParser
notSameIndent p = (eof >> return []) <|>
(do innerPos <- getPosition
case (sourceColumn p) == (sourceColumn innerPos) of
True -> pzero
False -> return []
)
--
-- * Various little parsers
--
tagParser :: CharParser () String
tagParser = do t <- optionMaybe tagParser'
i <- optionMaybe idParser
c <- optionMaybe (many1 classParser)
a <- optionMaybe attributesParser
if (isJust t || isJust i || isJust c || isJust a)
then
do return $ "tag \"" ++ (fromMaybe "div" t) ++ "\"" ++
(if not (isJust i || isJust c || isJust a) then "" else
concat $
[ "!["
, intercalate ", " $ filter (not . null)
[ (maybe "" (\i' -> "strAttr \"id\" \"" ++ i' ++ "\"") i)
, (maybe "" (\c' -> "strAttr \"class\" \"" ++ (intercalate " " c') ++ "\"") c)
, (maybe "" (\kv -> intercalate ", " $ map (\(k,v) -> "strAttr \"" ++ k ++ "\" (" ++ v ++ ")") kv) a)
]
, "]"]
)
else pzero
tagParser' :: CharParser () String
tagParser' = do char '%'
many1 termChar
idParser :: CharParser () String
idParser = do char '#'
many1 termChar
classParser :: CharParser () String
classParser = do char '.'
many1 termChar
attributesParser :: CharParser () [(String, String)]
attributesParser = squares (commaSep1 attributeParser)
attributeParser :: CharParser () (String, String)
attributeParser = do k <- identifier
symbol "="
cs <- many1 identifier
return (k, intercalate " " cs)
stringParser :: CharParser () String
stringParser = do currentPos <- getPosition
modifier <- optionMaybe (char '=' <|> char '-')
whiteSpace
c <- alphaNum
cs<- tilEOL
case modifier of
Just '-' -> return $ (indent currentPos) ++ "-" ++ c:cs
Just '=' -> return $ (indent currentPos) ++ "(stringToHtml " ++ c:cs ++ ")"
Nothing -> return $ (indent currentPos) ++ "(stringToHtml \"" ++ c:cs ++ "\")"
--
-- * Utility functions
--
isInline p = do p2 <- getPosition
case (sourceLine p ) == (sourceLine p2) of
True -> return []
False -> pzero
isSameIndent p1 p2 = (sourceColumn p1) == (sourceColumn p2)
tilEOL = manyTill1 (noneOf "\n") eol
eol = newline <|> (eof >> return '\n')
termChar = satisfy (\c -> (isAlphaNum c) || (c `elem` termPunctuation) )
termPunctuation = "-_"
indent p = take (sourceColumn (p) - 1) (repeat ' ')
manyTill1 p e = do ms <- manyTill p e
case (null ms) of
True -> pzero
False -> return ms
module Main where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import Text.ParserCombinators.Parsec.Perm
import qualified Text.ParserCombinators.Parsec.Token as T
import Data.Monoid
import Control.Applicative hiding (many, (<|>))
type StatefulCharParser a = CharParser Int a
type TagContent = String
data TagIdentifier = Class String | Id String | TagName String deriving (Show)
data Tag = MultiLineTag [TagIdentifier] | SingleLineTag [TagIdentifier] TagContent deriving (Show)
data Tree = BranchingNode String [Tree]
| Node String
deriving (Show)
lexer = T.makeTokenParser haskellDef
ident = T.identifier lexer
identifier = T.identifier lexer
dot = T.dot lexer
whiteSpace = T.whiteSpace lexer
lexeme = T.lexeme lexer
symbol = T.symbol lexer
natural = T.natural lexer
parens = T.parens lexer
semi = T.semi lexer
reservedOp = T.reservedOp lexer
whitespace = T.whiteSpace lexer
eol = lexeme (string "\n")
word :: CharParser Int String
word = do
many1 letter
manyTill1 p e = do ms <- manyTill p e
case (null ms) of
True -> pzero
False -> return ms
parseFile p file =
do
content <- readFile file
run p content
run p input
= case runParser p 0 "" input of
Right n -> putStrLn (concat (map (spitTree "") n))
Left err -> do{ putStr "parse error at "
; print err }
total :: StatefulCharParser [Tree]
total = do many1 consumeTopLevelChunk
consumeTopLevelChunk = do { setState 0
; topLevelChunk }
topLevelChunk :: StatefulCharParser Tree
topLevelChunk = try (do { spaces <- consumeSpaces
; line <- (manyTill anyChar (string "\n"))
; branches <- many1 doSubChunk
; return (BranchingNode line branches)
} )
<|>
do {consumeSpaces
; line <- (manyTill anyChar (string "\n"))
; return (Node line)}
doSubChunk :: StatefulCharParser Tree
doSubChunk = do
spaces <- getState
-- Check the next indentation level down
setState $ spaces + 2
chunk <- topLevelChunk
-- Reset back to the old indentation level
setState $ spaces
return chunk
consumeSpaces = try (do { spaces <- getState
; count spaces (string " ")
; return spaces })
spitTree :: String -> Tree -> String
spitTree prefix (BranchingNode content branches) = prefix ++ content ++ "\n" ++ (concat (map (spitTree (prefix ++ " ")) branches))
spitTree prefix (Node content) = prefix ++ content ++ "\n"
-- line :: CharParser Int String
-- line = do{ indentCount <- (countParser (string " "))
-- ; updateState (+1)
-- ; many letter
-- }
-- myIdentifier = identifier lexer
-- simple :: Parser Char
-- simple = letter
-- openClose :: Parser Char
-- openClose = do { char '('
-- ; char ')'
-- }
-- parens :: Parser ()
-- parens = do
-- char '('
-- parens
-- char ')'
-- parens
-- <|>
-- return ()
-- nesting :: Parser Int
-- nesting = do
-- char '('
-- n <- nesting
-- char ')'
-- m <- nesting
-- return (max (n+1) m)
-- <|>
-- return 0
-- word :: Parser String
-- word = many1 (letter <?> "") <?> "word"
-- separator :: Parser ()
-- separator = skipMany1 (space <|> char ',' <?> "")
-- sentence :: Parser [String]
-- sentence = do{ words <- sepBy1 word separator
-- ; oneOf ".?!" <?> "end of sentence"
-- ; return words
-- }
-- -- sentences :: Parser [[String]]
-- -- sentences = do{ sentences <- sepBy1 sentence separator
-- -- ; return sentences
-- -- }
-- expr :: Parser Integer
-- expr = buildExpressionParser table factor <?> "expression"
-- table = [[op "*" (*) AssocLeft, op "/" div AssocLeft],
-- [op "+" (+) AssocLeft, op "-" (-) AssocLeft]]
-- where op s f assoc = Infix (do{ reservedOp s; return f}) assoc
-- factor = parens expr
-- <|> natural
-- <?> "simple expression"
-- number :: Parser Integer
-- number = do{ ds <- many1 digit
-- ; return (read ds)
-- }
-- <?> "number"
-- klasses :: Parser [String]
-- klasses = do { many1 klass } <?> "classes"
-- klass :: Parser String
-- klass = do { dot
-- ; identifier
-- }
-- <?> "class"
-- -- blocks consisting of blocks or lines
-- -- lines consist of tag names, and/or class names and/or ids
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment