heath/kemp.hs

## kemp.hs
module Main where

import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Language
import Text.ParserCombinators.Parsec.Pos
import qualified Text.ParserCombinators.Parsec.Token as T
import Data.Char
import Data.List
import Data.Maybe
import System.IO.Unsafe

main = do s <- getContents
          case (parse mainParser "stdin" s) of
            Left  err -> putStrLn "Error: " >> print err
            Right hs  -> putStrLn hs

-- Try to parse HAML, otherwise re-output raw lines

mainParser = do whiteSpace
                ls <- many1 (hamlCode <|> tilEOL)
                return $ unlines ls
--
-- * HAML lexer
--
hamlLexer = T.makeTokenParser emptyDef
whiteSpace= T.whiteSpace hamlLexer
lexeme    = T.lexeme hamlLexer
symbol    = T.symbol hamlLexer
natural   = T.natural hamlLexer
parens    = T.parens hamlLexer
semi      = T.semi hamlLexer
squares   = T.squares hamlLexer
stringLiteral= T.stringLiteral hamlLexer
identifier= T.identifier hamlLexer
reserved  = T.reserved hamlLexer
reservedOp= T.reservedOp hamlLexer
commaSep1 = T.commaSep1 hamlLexer
--
-- * Main HAML parsers
--

-- hamlCode is just many identifiers followed by = followed by a hamlBlock
-- f a b c = %somehaml
hamlCode = try ( do is <- many1 identifier
                    symbol "="
                    currentPos <- getPosition
                    x <- manyTill1
                          (lexeme $ hamlBlock)
                          (notSameIndent currentPos)
                    return $ (concat $ intersperse " " is) ++
                             " = \n" ++
                             (concat $ (intersperse (indent currentPos ++ "+++\n")  $ filter (not . null) $ x))
                  )

-- A Block may start with some whitespace, then has a valid bit of data
hamlBlock   = do currentPos <- getPosition
                 bs <- manyTill1
                      (pTag <|> pText)
                      (notSameIndent currentPos)
                 return $ intercalate (indent currentPos ++ "+++\n") bs

pTag    = do    currentPos <- getPosition
                try
                    (do t  <- lexeme tagParser
                        ts <- (isInline currentPos >> char '/' >> return []) <|>
                              (hamlBlock)
                        return $ intercalate "\n" $ filter (not . null) $
                          [ (indent currentPos) ++ "((" ++ (if (null ts) then "i" else "") ++ t  ++ ")"
                          , if null ts then [] else ts
                          , (indent currentPos) ++ ")\n"]
                    )

pText = lexeme stringParser

notSameIndent p = (eof >> return []) <|>
                  (do innerPos <- getPosition
                      case (sourceColumn p) == (sourceColumn innerPos) of
                                True  -> pzero
                                False -> return []
                  )

--
-- * Various little parsers
--

tagParser :: CharParser () String
tagParser = do     t <- optionMaybe tagParser'
                   i <- optionMaybe idParser
                   c <- optionMaybe (many1 classParser)
                   a <- optionMaybe attributesParser
                   if (isJust t || isJust i || isJust c || isJust a)
                     then
                       do return $ "tag \"" ++ (fromMaybe "div" t) ++ "\"" ++
                           (if not (isJust i || isJust c || isJust a) then "" else
                              concat $
                               [ "!["
                               , intercalate ", " $ filter (not . null)
                                   [ (maybe "" (\i' -> "strAttr \"id\" \"" ++ i' ++ "\"") i)
                                   , (maybe "" (\c' -> "strAttr \"class\" \"" ++ (intercalate " " c') ++ "\"") c)
                                   , (maybe "" (\kv -> intercalate ", " $ map (\(k,v) -> "strAttr \"" ++ k ++ "\" (" ++ v ++ ")") kv) a)
                                   ]
                               , "]"]
                           )
                     else pzero

tagParser' :: CharParser () String
tagParser' =  do char '%'
                 many1 termChar

idParser :: CharParser () String
idParser = do char '#'
              many1 termChar

classParser :: CharParser () String
classParser = do char '.'
                 many1 termChar

attributesParser :: CharParser () [(String, String)]
attributesParser = squares (commaSep1 attributeParser)

attributeParser :: CharParser () (String, String)
attributeParser = do k <- identifier
                     symbol "="
                     cs <- many1 identifier
                     return (k, intercalate " " cs)

stringParser :: CharParser () String
stringParser = do   currentPos <- getPosition
                    modifier <- optionMaybe (char '=' <|> char '-')
                    whiteSpace
                    c <- alphaNum
                    cs<- tilEOL
                    case modifier of
                      Just '-' -> return $ (indent currentPos) ++ "-" ++ c:cs
                      Just '=' -> return $ (indent currentPos) ++ "(stringToHtml " ++ c:cs ++ ")"
                      Nothing  -> return $ (indent currentPos) ++ "(stringToHtml \"" ++ c:cs ++ "\")"


--
-- * Utility functions
--

isInline     p = do p2 <- getPosition
                    case (sourceLine p  ) == (sourceLine p2) of
                      True -> return []
                      False -> pzero
isSameIndent p1 p2 = (sourceColumn p1) == (sourceColumn p2)

tilEOL = manyTill1 (noneOf "\n") eol
eol = newline <|> (eof >> return '\n')

termChar = satisfy (\c -> (isAlphaNum c) || (c `elem` termPunctuation) )
termPunctuation = "-_"
indent p = take (sourceColumn (p) - 1) (repeat ' ')

manyTill1 p e =  do ms <- manyTill p e
                    case (null ms) of
                      True  -> pzero
                      False -> return ms

## moreland.hs
module Main where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import Text.ParserCombinators.Parsec.Perm
import qualified Text.ParserCombinators.Parsec.Token as T

import Data.Monoid
import Control.Applicative hiding (many, (<|>))

type StatefulCharParser a = CharParser Int a

type TagContent = String

data TagIdentifier = Class String | Id String | TagName String deriving (Show)

data Tag = MultiLineTag [TagIdentifier] | SingleLineTag [TagIdentifier] TagContent deriving (Show)

data Tree = BranchingNode String [Tree]
          | Node String
          deriving (Show)

lexer = T.makeTokenParser haskellDef
ident = T.identifier    lexer

identifier = T.identifier lexer
dot = T.dot lexer
whiteSpace = T.whiteSpace lexer
lexeme = T.lexeme lexer
symbol = T.symbol lexer
natural = T.natural lexer
parens = T.parens lexer
semi = T.semi lexer
reservedOp = T.reservedOp lexer
whitespace = T.whiteSpace lexer
eol = lexeme (string "\n")

word :: CharParser Int String
word = do
  many1 letter

manyTill1 p e =  do ms <- manyTill p e
                    case (null ms) of
                      True  -> pzero
                      False -> return ms

parseFile p file =
  do
    content <- readFile file
    run p content

run p input
  = case runParser p 0 "" input of
      Right n  -> putStrLn (concat (map (spitTree "") n))
      Left err -> do{ putStr "parse error at "
; print err }


total :: StatefulCharParser [Tree]
total = do many1 consumeTopLevelChunk

consumeTopLevelChunk = do { setState 0
                          ; topLevelChunk }

topLevelChunk :: StatefulCharParser Tree
topLevelChunk = try (do { spaces <- consumeSpaces
                        ; line <- (manyTill anyChar (string "\n"))
                        ; branches <- many1 doSubChunk
                        ; return (BranchingNode line branches)
                        } )
                <|>
                do {consumeSpaces
                     ; line <- (manyTill anyChar (string "\n"))
                     ; return (Node line)}

doSubChunk :: StatefulCharParser Tree
doSubChunk = do
  spaces <- getState
  -- Check the next indentation level down
  setState $ spaces + 2
  chunk <- topLevelChunk
  -- Reset back to the old indentation level
  setState $ spaces
  return chunk

consumeSpaces = try (do { spaces <- getState
                        ; count spaces (string " ")
                        ; return spaces })


spitTree :: String -> Tree -> String
spitTree prefix (BranchingNode content branches) = prefix ++ content ++ "\n" ++ (concat (map (spitTree (prefix ++ "  ")) branches))
spitTree prefix (Node content) = prefix ++ content ++ "\n"


-- line :: CharParser Int String
-- line = do{ indentCount <- (countParser (string " "))
--          ; updateState (+1)
--          ; many letter
--          }


-- myIdentifier = identifier lexer

-- simple :: Parser Char
-- simple = letter

-- openClose :: Parser Char
-- openClose = do { char '('
--                ; char ')'
--                }

-- parens :: Parser ()
-- parens = do
--   char '('
--   parens
--   char ')'
--   parens
--   <|>
--     return ()

-- nesting :: Parser Int
-- nesting = do
--   char '('
--   n <- nesting
--   char ')'
--   m <- nesting
--   return (max (n+1) m)
--   <|>
--     return 0

-- word :: Parser String
-- word = many1 (letter <?> "") <?> "word"

-- separator :: Parser ()
-- separator = skipMany1 (space <|> char ',' <?> "")

-- sentence :: Parser [String]
-- sentence = do{ words <- sepBy1 word separator
--              ; oneOf ".?!" <?> "end of sentence"
--              ; return words
--              }

-- -- sentences :: Parser [[String]]
-- -- sentences = do{ sentences <- sepBy1 sentence separator
-- --               ; return sentences
-- --               }

-- expr :: Parser Integer
-- expr = buildExpressionParser table factor <?> "expression"

-- table = [[op "*" (*) AssocLeft, op "/" div AssocLeft],
--          [op "+" (+) AssocLeft, op "-" (-) AssocLeft]]
--           where op s f assoc = Infix (do{ reservedOp s; return f}) assoc

-- factor = parens expr
--          <|> natural
--          <?> "simple expression"

-- number :: Parser Integer
-- number = do{ ds <- many1 digit
--            ; return (read ds)
--            }
--          <?> "number"


-- klasses :: Parser [String]
-- klasses = do { many1 klass } <?> "classes"

-- klass :: Parser String
-- klass = do { dot
--            ; identifier
--            }
--         <?> "class"

-- -- blocks consisting of blocks or lines
-- -- lines consist of tag names, and/or class names and/or ids
	module Main where

	import Text.ParserCombinators.Parsec
	import Text.ParserCombinators.Parsec.Language
	import Text.ParserCombinators.Parsec.Pos
	import qualified Text.ParserCombinators.Parsec.Token as T
	import Data.Char
	import Data.List
	import Data.Maybe
	import System.IO.Unsafe

	main = do s <- getContents
	case (parse mainParser "stdin" s) of
	Left err -> putStrLn "Error: " >> print err
	Right hs -> putStrLn hs

	-- Try to parse HAML, otherwise re-output raw lines

	mainParser = do whiteSpace
	ls <- many1 (hamlCode <\|> tilEOL)
	return $ unlines ls
	--
	-- * HAML lexer
	--
	hamlLexer = T.makeTokenParser emptyDef
	whiteSpace= T.whiteSpace hamlLexer
	lexeme = T.lexeme hamlLexer
	symbol = T.symbol hamlLexer
	natural = T.natural hamlLexer
	parens = T.parens hamlLexer
	semi = T.semi hamlLexer
	squares = T.squares hamlLexer
	stringLiteral= T.stringLiteral hamlLexer
	identifier= T.identifier hamlLexer
	reserved = T.reserved hamlLexer
	reservedOp= T.reservedOp hamlLexer
	commaSep1 = T.commaSep1 hamlLexer
	--
	-- * Main HAML parsers
	--

	-- hamlCode is just many identifiers followed by = followed by a hamlBlock
	-- f a b c = %somehaml
	hamlCode = try ( do is <- many1 identifier
	symbol "="
	currentPos <- getPosition
	x <- manyTill1
	(lexeme $ hamlBlock)
	(notSameIndent currentPos)
	return $ (concat $ intersperse " " is) ++
	" = \n" ++
	(concat $ (intersperse (indent currentPos ++ "+++\n") $ filter (not . null) $ x))
	)

	-- A Block may start with some whitespace, then has a valid bit of data
	hamlBlock = do currentPos <- getPosition
	bs <- manyTill1
	(pTag <\|> pText)
	(notSameIndent currentPos)
	return $ intercalate (indent currentPos ++ "+++\n") bs

	pTag = do currentPos <- getPosition
	try
	(do t <- lexeme tagParser
	ts <- (isInline currentPos >> char '/' >> return []) <\|>
	(hamlBlock)
	return $ intercalate "\n" $ filter (not . null) $
	[ (indent currentPos) ++ "((" ++ (if (null ts) then "i" else "") ++ t ++ ")"
	, if null ts then [] else ts
	, (indent currentPos) ++ ")\n"]
	)

	pText = lexeme stringParser

	notSameIndent p = (eof >> return []) <\|>
	(do innerPos <- getPosition
	case (sourceColumn p) == (sourceColumn innerPos) of
	True -> pzero
	False -> return []
	)

	--
	-- * Various little parsers
	--

	tagParser :: CharParser () String
	tagParser = do t <- optionMaybe tagParser'
	i <- optionMaybe idParser
	c <- optionMaybe (many1 classParser)
	a <- optionMaybe attributesParser
	if (isJust t \|\| isJust i \|\| isJust c \|\| isJust a)
	then
	do return $ "tag \"" ++ (fromMaybe "div" t) ++ "\"" ++
	(if not (isJust i \|\| isJust c \|\| isJust a) then "" else
	concat $
	[ "!["
	, intercalate ", " $ filter (not . null)
	[ (maybe "" (\i' -> "strAttr \"id\" \"" ++ i' ++ "\"") i)
	, (maybe "" (\c' -> "strAttr \"class\" \"" ++ (intercalate " " c') ++ "\"") c)
	, (maybe "" (\kv -> intercalate ", " $ map (\(k,v) -> "strAttr \"" ++ k ++ "\" (" ++ v ++ ")") kv) a)
	]
	, "]"]
	)
	else pzero

	tagParser' :: CharParser () String
	tagParser' = do char '%'
	many1 termChar

	idParser :: CharParser () String
	idParser = do char '#'
	many1 termChar

	classParser :: CharParser () String
	classParser = do char '.'
	many1 termChar

	attributesParser :: CharParser () [(String, String)]
	attributesParser = squares (commaSep1 attributeParser)

	attributeParser :: CharParser () (String, String)
	attributeParser = do k <- identifier
	symbol "="
	cs <- many1 identifier
	return (k, intercalate " " cs)

	stringParser :: CharParser () String
	stringParser = do currentPos <- getPosition
	modifier <- optionMaybe (char '=' <\|> char '-')
	whiteSpace
	c <- alphaNum
	cs<- tilEOL
	case modifier of
	Just '-' -> return $ (indent currentPos) ++ "-" ++ c:cs
	Just '=' -> return $ (indent currentPos) ++ "(stringToHtml " ++ c:cs ++ ")"
	Nothing -> return $ (indent currentPos) ++ "(stringToHtml \"" ++ c:cs ++ "\")"


	--
	-- * Utility functions
	--

	isInline p = do p2 <- getPosition
	case (sourceLine p ) == (sourceLine p2) of
	True -> return []
	False -> pzero
	isSameIndent p1 p2 = (sourceColumn p1) == (sourceColumn p2)

	tilEOL = manyTill1 (noneOf "\n") eol
	eol = newline <\|> (eof >> return '\n')

	termChar = satisfy (\c -> (isAlphaNum c) \|\| (c `elem` termPunctuation) )
	termPunctuation = "-_"
	indent p = take (sourceColumn (p) - 1) (repeat ' ')

	manyTill1 p e = do ms <- manyTill p e
	case (null ms) of
	True -> pzero
	False -> return ms
	module Main where
	import Text.ParserCombinators.Parsec
	import Text.ParserCombinators.Parsec.Expr
	import Text.ParserCombinators.Parsec.Language
	import Text.ParserCombinators.Parsec.Perm
	import qualified Text.ParserCombinators.Parsec.Token as T

	import Data.Monoid
	import Control.Applicative hiding (many, (<\|>))

	type StatefulCharParser a = CharParser Int a

	type TagContent = String

	data TagIdentifier = Class String \| Id String \| TagName String deriving (Show)

	data Tag = MultiLineTag [TagIdentifier] \| SingleLineTag [TagIdentifier] TagContent deriving (Show)

	data Tree = BranchingNode String [Tree]
	\| Node String
	deriving (Show)

	lexer = T.makeTokenParser haskellDef
	ident = T.identifier lexer

	identifier = T.identifier lexer
	dot = T.dot lexer
	whiteSpace = T.whiteSpace lexer
	lexeme = T.lexeme lexer
	symbol = T.symbol lexer
	natural = T.natural lexer
	parens = T.parens lexer
	semi = T.semi lexer
	reservedOp = T.reservedOp lexer
	whitespace = T.whiteSpace lexer
	eol = lexeme (string "\n")

	word :: CharParser Int String
	word = do
	many1 letter

	manyTill1 p e = do ms <- manyTill p e
	case (null ms) of
	True -> pzero
	False -> return ms

	parseFile p file =
	do
	content <- readFile file
	run p content

	run p input
	= case runParser p 0 "" input of
	Right n -> putStrLn (concat (map (spitTree "") n))
	Left err -> do{ putStr "parse error at "
	; print err }


	total :: StatefulCharParser [Tree]
	total = do many1 consumeTopLevelChunk

	consumeTopLevelChunk = do { setState 0
	; topLevelChunk }

	topLevelChunk :: StatefulCharParser Tree
	topLevelChunk = try (do { spaces <- consumeSpaces
	; line <- (manyTill anyChar (string "\n"))
	; branches <- many1 doSubChunk
	; return (BranchingNode line branches)
	} )
	<\|>
	do {consumeSpaces
	; line <- (manyTill anyChar (string "\n"))
	; return (Node line)}

	doSubChunk :: StatefulCharParser Tree
	doSubChunk = do
	spaces <- getState
	-- Check the next indentation level down
	setState $ spaces + 2
	chunk <- topLevelChunk
	-- Reset back to the old indentation level
	setState $ spaces
	return chunk

	consumeSpaces = try (do { spaces <- getState
	; count spaces (string " ")
	; return spaces })


	spitTree :: String -> Tree -> String
	spitTree prefix (BranchingNode content branches) = prefix ++ content ++ "\n" ++ (concat (map (spitTree (prefix ++ " ")) branches))
	spitTree prefix (Node content) = prefix ++ content ++ "\n"



	-- line :: CharParser Int String
	-- line = do{ indentCount <- (countParser (string " "))
	-- ; updateState (+1)
	-- ; many letter
	-- }



	-- myIdentifier = identifier lexer

	-- simple :: Parser Char
	-- simple = letter

	-- openClose :: Parser Char
	-- openClose = do { char '('
	-- ; char ')'
	-- }

	-- parens :: Parser ()
	-- parens = do
	-- char '('
	-- parens
	-- char ')'
	-- parens
	-- <\|>
	-- return ()

	-- nesting :: Parser Int
	-- nesting = do
	-- char '('
	-- n <- nesting
	-- char ')'
	-- m <- nesting
	-- return (max (n+1) m)
	-- <\|>
	-- return 0

	-- word :: Parser String
	-- word = many1 (letter <?> "") <?> "word"

	-- separator :: Parser ()
	-- separator = skipMany1 (space <\|> char ',' <?> "")

	-- sentence :: Parser [String]
	-- sentence = do{ words <- sepBy1 word separator
	-- ; oneOf ".?!" <?> "end of sentence"
	-- ; return words
	-- }

	-- -- sentences :: Parser [[String]]
	-- -- sentences = do{ sentences <- sepBy1 sentence separator
	-- -- ; return sentences
	-- -- }

	-- expr :: Parser Integer
	-- expr = buildExpressionParser table factor <?> "expression"

	-- table = [[op "" () AssocLeft, op "/" div AssocLeft],
	-- [op "+" (+) AssocLeft, op "-" (-) AssocLeft]]
	-- where op s f assoc = Infix (do{ reservedOp s; return f}) assoc

	-- factor = parens expr
	-- <\|> natural
	-- <?> "simple expression"

	-- number :: Parser Integer
	-- number = do{ ds <- many1 digit
	-- ; return (read ds)
	-- }
	-- <?> "number"


	-- klasses :: Parser [String]
	-- klasses = do { many1 klass } <?> "classes"

	-- klass :: Parser String
	-- klass = do { dot
	-- ; identifier
	-- }
	-- <?> "class"

	-- -- blocks consisting of blocks or lines
	-- -- lines consist of tag names, and/or class names and/or ids