Skip to content

Instantly share code, notes, and snippets.

@deflexor
Created November 14, 2015 10:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save deflexor/247f97f4a59de0de5109 to your computer and use it in GitHub Desktop.
Save deflexor/247f97f4a59de0de5109 to your computer and use it in GitHub Desktop.
stack runghc Main.hs
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Debug.Trace (trace, traceM, traceStack, traceShowM)
import Control.Applicative
import Control.Monad (unless, forM_, forM, void, mzero)
import Data.Char (isAlpha, isAlphaNum)
import qualified Text.Parsec as P
import Text.Parsec.Text
import qualified Data.Text as T
import Data.Text.IO as TIO
data Elem = Tag { tagName :: T.Text
, tagAttrs :: [(T.Text, T.Text)]
, tagChildren :: [Elem] }
| Comment T.Text
| TagText T.Text
| TagHdr (Maybe T.Text)
deriving (Eq, Read, Show)
nonClosedTags = ["link", "meta", "input", "img", "br"]
nametoken :: Parser T.Text
nametoken = do
part1 <- P.letter
part2 <- P.many (P.alphaNum <|> P.char '-' <|> P.char '.')
return $ T.cons part1 (T.pack part2)
tagattr :: Parser (T.Text, T.Text)
tagattr = do
name <- nametoken
val <- tagattrval
return (name, val)
tagattrval :: Parser T.Text
tagattrval = do
P.spaces
P.char '='
P.choice [quotedval, nametoken]
quotedval :: Parser T.Text
quotedval = do
q <- P.satisfy (\c -> c == '"' || c == '\'')
val <- P.many $ P.satisfy (/= q)
P.skipMany (P.char q)
return $ T.pack val
closetag :: T.Text -> Parser ()
closetag name = do
P.char '<'
P.char '/' P.<?> "closing tag indicator"
P.string $ T.unpack name
void $ P.char '>'
textual :: Parser Elem
textual = do
text <- P.many $ P.satisfy (/= '<')
if null text then mzero else return $ TagText $ T.pack text
childtags :: T.Text -> Parser [Elem]
childtags name = do
ct <- P.sepBy (tagorcomment <|> textual) P.spaces
P.spaces
closetag name
return ct
comment :: Parser Elem
comment = do
P.spaces
P.string "<!--"
ctext <- T.pack <$> P.manyTill P.anyChar (P.try (P.string "-->"))
return $ Comment ctext
tagelement :: Parser Elem
tagelement = do
P.spaces
P.char '<'
name <- nametoken
traceShowM name
P.spaces
attrs <- P.sepBy tagattr P.spaces
P.spaces
selfClose <- P.option False ((const True) <$> P.char '/')
P.char '>'
childtags <- if selfClose || name `elem` nonClosedTags then
return []
else childtags name
return $ Tag name attrs childtags
tagorcomment :: Parser Elem
tagorcomment = (P.try tagelement) <|> (P.try comment)
markupParser :: Parser [Elem]
markupParser = do
tags <- P.many1 tagorcomment
P.spaces
return tags
parseMarkup :: T.Text -> Either P.ParseError [Elem]
parseMarkup = P.parse markupParser ""
testMarkup = "<html><head> \n\
\ <meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1251\"> \n\
\ <link rel=\"shortcut icon\" href=\"/favicon.ico\"> \n\
\ <!--[if lte IE 8]> \n\
\ <link rel=\"stylesheet\" type=\"text/css\" href=\"/_all/top/ie8.css?nocache=20140901\"> \n\
\ <![endif]--> \n\
\ <script type=\"text/javascript\" src=\"/_/js/jquery.min.js\" ></script> \n\
\ </head> \n\
\ <body> \n\
\ </body></html>"
testParser = parseMarkup testMarkup
main = print testParser
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment