Created
January 17, 2019 05:42
-
-
Save HirotoShioi/f75a547ff2730f7905a6b9a40d2d675d to your computer and use it in GitHub Desktop.
(WIP) parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module TestScrapbox where | |
import Data.Maybe (isJust) | |
import Data.String (fromString) | |
import Data.Text (Text) | |
import qualified Data.Text as T | |
import Text.ParserCombinators.Parsec | |
data Segment = | |
CodeNotation !Text | |
| Link !(Maybe Text) !Url | |
| SimpleText !Text | |
| Hashtag !Text | |
deriving Show | |
newtype Url = Url Text | |
deriving Show | |
simpleText :: String -> Segment | |
simpleText = SimpleText . fromString | |
codeNotation :: String -> Segment | |
codeNotation = CodeNotation . fromString | |
link :: Maybe String -> String -> Segment | |
link mName url = Link (fromString <$> mName) (Url $ fromString url) | |
hashtag :: String -> Segment | |
hashtag = Hashtag . fromString | |
-------------------------------------------------------------------------------- | |
-- | |
-------------------------------------------------------------------------------- | |
codeNotationParser :: Parser Segment | |
codeNotationParser = do | |
_ <- char '`' | |
content <- many1 (noneOf "`") | |
_ <- char '`' | |
return $ CodeNotation (fromString content) | |
hashTagParser :: Parser Segment | |
hashTagParser = do | |
_ <- char '#' | |
content <- many1 (noneOf " ") | |
return $ hashtag content | |
linkParser :: Parser Segment | |
linkParser = do | |
_ <- char '[' | |
content <- many1 $ noneOf "]" | |
let eContent = parse (sepBy (many $ noneOf " ") (char ' ')) "link content parser" content | |
let (mName, someLink) = case eContent of | |
Left _ -> fail "Cannot parse link content" | |
Right contents -> | |
if length contents <= 1 | |
then (Nothing, head contents) | |
else | |
( Just $ T.strip $ fromString $ foldr | |
(\some acc -> some <> " " <> acc) | |
mempty | |
(init contents) | |
, last contents) | |
_ <- char ']' | |
return $ Link mName (Url $ fromString someLink) | |
-- Want to refactor | |
simpleTextParser :: Parser Segment | |
simpleTextParser = do | |
content <- many1 $ noneOf "[`#" | |
someChar <- lookAheadMaybe [anyChar] | |
case someChar of | |
-- Nothing is behind | |
Nothing -> return $ simpleText content | |
-- Could be link so try to parse it | |
Just '[' -> do | |
mLink <- lookAheadMaybe [linkParser] | |
if isJust mLink | |
then return $ simpleText content | |
else do | |
rest <- many1 $ noneOf "`#" | |
return $ simpleText $ content <> rest | |
-- Could be code notation so try to parse it | |
Just '`' -> do | |
mCodeNotation <- lookAheadMaybe [codeNotationParser] | |
if isJust mCodeNotation | |
then return $ simpleText content | |
else do | |
rest <- many1 $ noneOf "[#" | |
return $ simpleText $ content <> rest | |
Just _ -> return $ simpleText content | |
where | |
lookAheadMaybe :: [Parser a] -> Parser (Maybe a) | |
lookAheadMaybe parsers = lookAhead . optionMaybe . choice $ map try parsers | |
segmentParser :: Parser Segment | |
segmentParser = | |
try codeNotationParser | |
<|> try linkParser | |
<|> try hashTagParser | |
<|> try simpleTextParser | |
<?> "something is wrong with section" | |
someParser :: Parser [Segment] | |
someParser = try (many1 segmentParser) <|> return [] | |
textParser :: String -> Either ParseError [Segment] | |
textParser = parse someParser "Text parser" | |
-- > textParser "hello [hello yahoo link www.yahoo.co.jp] [hello] []" | |
-- Right | |
-- [ SimpleText "hello " | |
-- , Link ( Just "hello yahoo link" ) ( Url "www.yahoo.co.jp" ) | |
-- , SimpleText " " | |
-- , Link Nothing ( Url "hello" ) | |
-- , SimpleText " []" | |
-- ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment