Last active
July 14, 2023 21:19
-
-
Save nicuveo/63cef24bef03ff76c299f8d1ced72306 to your computer and use it in GitHub Desktop.
Parsec with stacktraces
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BlockArguments #-} | |
{-# LANGUAGE ImportQualifiedPost #-} | |
{-# LANGUAGE LambdaCase #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
module Main where | |
import Data.Char | |
import Data.List (intercalate) | |
import Data.Map.Strict qualified as M | |
import Text.Printf | |
import Parser | |
-- json exmaples | |
data JValue = JObject (M.Map String JValue) | |
| JArray [JValue] | |
| JString String | |
| JNumber Double | |
| JBool Bool | |
| JNull | |
instance Show JValue where | |
show = \case | |
JNull -> "null" | |
JBool b -> toLower <$> show b | |
JNumber n -> printf "%g" n | |
JString s -> printf "\"%s\"" s | |
JArray a -> printf "[%s]" $ intercalate ", " $ show <$> a | |
JObject o -> printf "{%s}" $ intercalate ", " [show k <> ": " <> show v | (k,v) <- M.toList o] | |
json = spaces >> jvalue | |
jvalue = | |
label "JSON value" $ | |
lexeme $ | |
choice | |
[ JObject <$> jobject | |
, JArray <$> jarray | |
, JString <$> jstring | |
, JNumber <$> jnumber | |
, JBool <$> jbool | |
, JNull <$ string "null" | |
] <|> expected "JSON value" | |
jbool = | |
label "JSON boolean" $ | |
lexeme $ | |
choice | |
[ True <$ string "true" | |
, False <$ string "false" | |
] | |
jarray = | |
label "JSON array" $ | |
lexeme $ | |
between (symbol "[") (symbol "]") $ | |
jvalue `sepBy` symbol "," | |
jobject = | |
label "JSON object" $ | |
lexeme $ | |
between (symbol "{") (symbol "}") $ | |
fmap M.fromList $ | |
jmember `sepBy` symbol "," | |
where | |
jmember = lexeme do | |
k <- jstring | |
symbol ":" | |
v <- jvalue | |
pure (k,v) | |
jnumber = | |
label "JSON number" $ | |
lexeme $ | |
read <$> many1 digit | |
jstring = | |
label "JSON string" $ | |
lexeme $ | |
between (char '"') (char '"') $ fmap concat $ many jchar | |
jchar = | |
choice | |
[ try $ string "\\n" | |
, try $ string "\\t" | |
, try $ string "\\\"" | |
, try $ string "\\\\" | |
, pure <$> satisfy "string character" (/= '"') | |
] | |
main = do | |
either print print $ run json "{ \"foo\":\n [ { \"bar\": 42\n , \"baz\":\n }\n ]\n}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BlockArguments #-} | |
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | |
{-# LANGUAGE ImportQualifiedPost #-} | |
{-# LANGUAGE LambdaCase #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE TemplateHaskell #-} | |
{-# LANGUAGE TupleSections #-} | |
module Parser where | |
import Control.Applicative (liftA2) | |
import Control.Lens | |
import Control.Monad.Except | |
import Control.Monad.State | |
import Data.Char | |
import Data.Text (Text) | |
import Data.Text qualified as T | |
import Prelude hiding (any) | |
import Text.Printf | |
-- data types | |
type Tag = Text | |
data ParserError = ParserError | |
{ _peExpected :: Text | |
, _peFound :: Text | |
, _peStack :: [(Tag, Int, Int)] | |
} | |
makeLenses ''ParserError | |
instance Show ParserError where | |
show (ParserError e f s) = unlines $ header : stacktrace | |
where | |
header = printf "expected %s but found %s" (T.unpack e) (T.unpack f) | |
stacktrace = s <&> \(tag, line, col) -> | |
printf " while parsing %s at (%d, %d)" (T.unpack tag) line col | |
data ParserState = ParserState | |
{ _psInput :: Text -- ^ all remaining input | |
, _psPos :: (Int, Int) -- ^ current position | |
, _psStack :: [(Tag, Int, Int)] -- ^ tag stack | |
} | |
makeLenses ''ParserState | |
newtype Parser a = Parser { runParser :: ExceptT ParserError (State ParserState) a } | |
deriving ( Functor | |
, Applicative | |
, Monad | |
, MonadState ParserState | |
, MonadError ParserError | |
) | |
-- run | |
run :: Parser a -> Text -> Either ParserError a | |
run p s = runParser p | |
& runExceptT | |
& flip evalState (ParserState s (1,0) []) | |
-- elementary parsers | |
peek :: Parser (Maybe (Char, Text)) | |
peek = uses psInput T.uncons | |
parseError :: Text -> Text -> Parser a | |
parseError e f = do | |
stack <- use psStack | |
throwError $ ParserError e f stack | |
expected :: Text -> Parser a | |
expected e = do | |
f <- peek <&> \case | |
Nothing -> "the end of the input" | |
Just (c, _) -> T.singleton c | |
parseError e f | |
eof :: Parser () | |
eof = peek >>= \case | |
Nothing -> pure () | |
Just (c, _) -> parseError "the end of the input" (T.singleton c) | |
any :: Parser Char | |
any = peek >>= \case | |
Nothing -> parseError "any character" "the end of the input" | |
Just (c, s) -> do | |
ParserState _ (line, col) tags <- get | |
put $ case c of | |
'\n' -> ParserState s (line+1, 0) tags | |
_ -> ParserState s (line, col+1) tags | |
pure c | |
satisfy :: Text -> (Char -> Bool) -> Parser Char | |
satisfy description predicate = try $ do | |
c <- any | |
if predicate c | |
then pure c | |
else parseError description (T.singleton c) | |
-- labelling | |
label :: Tag -> Parser a -> Parser a | |
label t p = do | |
(line, col) <- use psPos | |
psStack %= ((t, line, col):) | |
p `finally` (psStack %= tail) | |
where | |
finally a b = do | |
r <- a `catchError` | |
\e -> b >> throwError e | |
b | |
pure r | |
-- backtracking | |
try :: Parser a -> Parser a | |
try p = do | |
s <- get | |
p `catchError` \e -> do | |
put s | |
throwError e | |
(<|>) :: Parser a -> Parser a -> Parser a | |
p1 <|> p2 = do | |
preP1Pos <- use psPos | |
p1 `catchError` \e -> do | |
postP1Pos <- use psPos | |
if preP1Pos == postP1Pos | |
then p2 | |
else throwError e | |
choice :: [Parser a] -> Parser a | |
choice = foldl1 (<|>) | |
-- characters | |
char c = satisfy (T.singleton c) (== c) | |
space = satisfy "space" isSpace | |
digit = satisfy "digit" isDigit | |
lower = satisfy "lowercase" isLower | |
letter = satisfy "letter" isLetter | |
string :: String -> Parser String | |
string = traverse char | |
-- repetition | |
many, many1 :: Parser a -> Parser [a] | |
many p = many1 p <|> pure [] | |
many1 p = liftA2 (:) p $ many p | |
sepBy, sepBy1 :: Parser a -> Parser s -> Parser [a] | |
sepBy p s = sepBy1 p s <|> pure [] | |
sepBy1 p s = liftA2 (:) p $ many (s >> p) | |
-- syntax | |
spaces :: Parser String | |
spaces = many space | |
lexeme :: Parser a -> Parser a | |
lexeme p = p <* spaces | |
symbol :: String -> Parser String | |
symbol = lexeme . string | |
between :: Parser open -> Parser close -> Parser a -> Parser a | |
between o c p = o *> p <* c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment