Skip to content

Instantly share code, notes, and snippets.

@edsko
Last active July 26, 2019 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edsko/9146b6fea00eafa231a4309bde095ed0 to your computer and use it in GitHub Desktop.
Save edsko/9146b6fea00eafa231a4309bde095ed0 to your computer and use it in GitHub Desktop.
{-# LANGUAGE RecordWildCards #-}
module SoundSeries.Util.SoupParser (
SoupParser
, parseSoup
-- * Combinators
, satisfy
, anyToken
, skipUntil
, lookupAttr
, insideTag
-- * TagSoup auxiliary
, pairWithPosition
-- *** Re-exports
, module Text.Parsec
) where
import Control.Monad
import Text.Parsec hiding (satisfy, anyToken)
import Text.StringLike
import qualified Text.HTML.TagSoup as Soup
import qualified Text.Parsec.Pos as Parsec
{-------------------------------------------------------------------------------
Internal state
-------------------------------------------------------------------------------}
data ParserState str = ParserState {
-- | Just for error messages
psSource :: FilePath
-- | Restrict use of 'satisfy' within 'insideTag' to never match against
-- any of the corresponding closing tags
, psScope :: [str]
}
initParserState :: FilePath -> ParserState str
initParserState fp = ParserState fp []
{-------------------------------------------------------------------------------
Infrastructure for parsing the tag soup
-------------------------------------------------------------------------------}
type SoupParser str = Parsec [TagWithPos str] (ParserState str)
-- | Parse tag soup
parseSoup :: SoupParser str a
-> FilePath -- ^ Used for error messages only
-> [TagWithPos str]
-> Either ParseError a
parseSoup p fp = runParser p (initParserState fp) fp
satisfy :: forall str. StringLike str
=> (Soup.Tag str -> Bool)
-> SoupParser str (Soup.Tag str)
satisfy p = do
ParserState{..} <- getState
token
(show . fmap toString . fst)
(\(_tag, (line, col)) -> Parsec.newPos psSource line col)
(\(tag, _pos) -> if p' psScope tag then Just tag else Nothing)
where
p' :: [str] -> Soup.Tag str -> Bool
p' scope (Soup.TagClose tag) | tag `elem` scope = False
p' _ tag = p tag
anyToken :: StringLike str => SoupParser str (Soup.Tag str)
anyToken = satisfy (const True)
skipUntil :: StringLike str
=> String
-> (Soup.Tag str -> Bool)
-> SoupParser str (Soup.Tag str)
skipUntil err p = try $ do
skipMany $ satisfy (not . p)
satisfy p <?> err
lookupAttr :: StringLike str
=> str
-> [Soup.Attribute str]
-> SoupParser str str
lookupAttr attr attrs =
case lookup attr attrs of
Nothing -> parserFail $ "Attribute '" ++ toString attr ++ "' not found"
Just val -> return val
-- | Parse inside tag
--
-- E.g., given
--
-- > <ol>...</ol>
--
-- parse the stuff on the dots.
insideTag :: forall str a. StringLike str
=> str -> SoupParser str a -> SoupParser str a
insideTag tag = between openTag closeTag
where
openTag, closeTag :: SoupParser str ()
openTag = do
void $ skipUntil ("<" ++ toString tag ++ ">") (Soup.isTagOpenName tag)
modifyState $ \ps -> ps { psScope = tag : psScope ps }
closeTag = do
modifyState $ \ps -> ps { psScope = tail (psScope ps) }
void $ skipUntil ("</" ++ toString tag ++ ">") (Soup.isTagCloseName tag)
{-------------------------------------------------------------------------------
Additional TagSoup infrastructure
-------------------------------------------------------------------------------}
type TagWithPos a = (Soup.Tag a, (Int, Int))
pairWithPosition :: [Soup.Tag a] -> [TagWithPos a]
pairWithPosition = go
where
go :: [Soup.Tag a] -> [TagWithPos a]
go [] = []
go (Soup.TagPosition line col : tag : tags) = (tag, (line, col)) : go tags
go _otherwise = error "pairWithPosition: invalid"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment