Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
(WIP) parser
{-# LANGUAGE OverloadedStrings #-}
module TestScrapbox where
import Data.Maybe (isJust)
import Data.String (fromString)
import Data.Text (Text)
import qualified Data.Text as T
import Text.ParserCombinators.Parsec
data Segment =
CodeNotation !Text
| Link !(Maybe Text) !Url
| SimpleText !Text
| Hashtag !Text
deriving Show
newtype Url = Url Text
deriving Show
simpleText :: String -> Segment
simpleText = SimpleText . fromString
codeNotation :: String -> Segment
codeNotation = CodeNotation . fromString
link :: Maybe String -> String -> Segment
link mName url = Link (fromString <$> mName) (Url $ fromString url)
hashtag :: String -> Segment
hashtag = Hashtag . fromString
--------------------------------------------------------------------------------
--
--------------------------------------------------------------------------------
codeNotationParser :: Parser Segment
codeNotationParser = do
_ <- char '`'
content <- many1 (noneOf "`")
_ <- char '`'
return $ CodeNotation (fromString content)
hashTagParser :: Parser Segment
hashTagParser = do
_ <- char '#'
content <- many1 (noneOf " ")
return $ hashtag content
linkParser :: Parser Segment
linkParser = do
_ <- char '['
content <- many1 $ noneOf "]"
let eContent = parse (sepBy (many $ noneOf " ") (char ' ')) "link content parser" content
let (mName, someLink) = case eContent of
Left _ -> fail "Cannot parse link content"
Right contents ->
if length contents <= 1
then (Nothing, head contents)
else
( Just $ T.strip $ fromString $ foldr
(\some acc -> some <> " " <> acc)
mempty
(init contents)
, last contents)
_ <- char ']'
return $ Link mName (Url $ fromString someLink)
-- Want to refactor
simpleTextParser :: Parser Segment
simpleTextParser = do
content <- many1 $ noneOf "[`#"
someChar <- lookAheadMaybe [anyChar]
case someChar of
-- Nothing is behind
Nothing -> return $ simpleText content
-- Could be link so try to parse it
Just '[' -> do
mLink <- lookAheadMaybe [linkParser]
if isJust mLink
then return $ simpleText content
else do
rest <- many1 $ noneOf "`#"
return $ simpleText $ content <> rest
-- Could be code notation so try to parse it
Just '`' -> do
mCodeNotation <- lookAheadMaybe [codeNotationParser]
if isJust mCodeNotation
then return $ simpleText content
else do
rest <- many1 $ noneOf "[#"
return $ simpleText $ content <> rest
Just _ -> return $ simpleText content
where
lookAheadMaybe :: [Parser a] -> Parser (Maybe a)
lookAheadMaybe parsers = lookAhead . optionMaybe . choice $ map try parsers
segmentParser :: Parser Segment
segmentParser =
try codeNotationParser
<|> try linkParser
<|> try hashTagParser
<|> try simpleTextParser
<?> "something is wrong with section"
someParser :: Parser [Segment]
someParser = try (many1 segmentParser) <|> return []
textParser :: String -> Either ParseError [Segment]
textParser = parse someParser "Text parser"
-- > textParser "hello [hello yahoo link www.yahoo.co.jp] [hello] []"
-- Right
-- [ SimpleText "hello "
-- , Link ( Just "hello yahoo link" ) ( Url "www.yahoo.co.jp" )
-- , SimpleText " "
-- , Link Nothing ( Url "hello" )
-- , SimpleText " []"
-- ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment