Skip to content

Instantly share code, notes, and snippets.

@nicuveo
Last active July 14, 2023 21:19
Show Gist options
  • Save nicuveo/63cef24bef03ff76c299f8d1ced72306 to your computer and use it in GitHub Desktop.
Save nicuveo/63cef24bef03ff76c299f8d1ced72306 to your computer and use it in GitHub Desktop.
Parsec with stacktraces
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Data.Char
import Data.List (intercalate)
import Data.Map.Strict qualified as M
import Text.Printf
import Parser
-- json exmaples
data JValue = JObject (M.Map String JValue)
| JArray [JValue]
| JString String
| JNumber Double
| JBool Bool
| JNull
instance Show JValue where
show = \case
JNull -> "null"
JBool b -> toLower <$> show b
JNumber n -> printf "%g" n
JString s -> printf "\"%s\"" s
JArray a -> printf "[%s]" $ intercalate ", " $ show <$> a
JObject o -> printf "{%s}" $ intercalate ", " [show k <> ": " <> show v | (k,v) <- M.toList o]
json = spaces >> jvalue
jvalue =
label "JSON value" $
lexeme $
choice
[ JObject <$> jobject
, JArray <$> jarray
, JString <$> jstring
, JNumber <$> jnumber
, JBool <$> jbool
, JNull <$ string "null"
] <|> expected "JSON value"
jbool =
label "JSON boolean" $
lexeme $
choice
[ True <$ string "true"
, False <$ string "false"
]
jarray =
label "JSON array" $
lexeme $
between (symbol "[") (symbol "]") $
jvalue `sepBy` symbol ","
jobject =
label "JSON object" $
lexeme $
between (symbol "{") (symbol "}") $
fmap M.fromList $
jmember `sepBy` symbol ","
where
jmember = lexeme do
k <- jstring
symbol ":"
v <- jvalue
pure (k,v)
jnumber =
label "JSON number" $
lexeme $
read <$> many1 digit
jstring =
label "JSON string" $
lexeme $
between (char '"') (char '"') $ fmap concat $ many jchar
jchar =
choice
[ try $ string "\\n"
, try $ string "\\t"
, try $ string "\\\""
, try $ string "\\\\"
, pure <$> satisfy "string character" (/= '"')
]
main = do
either print print $ run json "{ \"foo\":\n [ { \"bar\": 42\n , \"baz\":\n }\n ]\n}"
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
module Parser where
import Control.Applicative (liftA2)
import Control.Lens
import Control.Monad.Except
import Control.Monad.State
import Data.Char
import Data.Text (Text)
import Data.Text qualified as T
import Prelude hiding (any)
import Text.Printf
-- data types
type Tag = Text
data ParserError = ParserError
{ _peExpected :: Text
, _peFound :: Text
, _peStack :: [(Tag, Int, Int)]
}
makeLenses ''ParserError
instance Show ParserError where
show (ParserError e f s) = unlines $ header : stacktrace
where
header = printf "expected %s but found %s" (T.unpack e) (T.unpack f)
stacktrace = s <&> \(tag, line, col) ->
printf " while parsing %s at (%d, %d)" (T.unpack tag) line col
data ParserState = ParserState
{ _psInput :: Text -- ^ all remaining input
, _psPos :: (Int, Int) -- ^ current position
, _psStack :: [(Tag, Int, Int)] -- ^ tag stack
}
makeLenses ''ParserState
newtype Parser a = Parser { runParser :: ExceptT ParserError (State ParserState) a }
deriving ( Functor
, Applicative
, Monad
, MonadState ParserState
, MonadError ParserError
)
-- run
run :: Parser a -> Text -> Either ParserError a
run p s = runParser p
& runExceptT
& flip evalState (ParserState s (1,0) [])
-- elementary parsers
peek :: Parser (Maybe (Char, Text))
peek = uses psInput T.uncons
parseError :: Text -> Text -> Parser a
parseError e f = do
stack <- use psStack
throwError $ ParserError e f stack
expected :: Text -> Parser a
expected e = do
f <- peek <&> \case
Nothing -> "the end of the input"
Just (c, _) -> T.singleton c
parseError e f
eof :: Parser ()
eof = peek >>= \case
Nothing -> pure ()
Just (c, _) -> parseError "the end of the input" (T.singleton c)
any :: Parser Char
any = peek >>= \case
Nothing -> parseError "any character" "the end of the input"
Just (c, s) -> do
ParserState _ (line, col) tags <- get
put $ case c of
'\n' -> ParserState s (line+1, 0) tags
_ -> ParserState s (line, col+1) tags
pure c
satisfy :: Text -> (Char -> Bool) -> Parser Char
satisfy description predicate = try $ do
c <- any
if predicate c
then pure c
else parseError description (T.singleton c)
-- labelling
label :: Tag -> Parser a -> Parser a
label t p = do
(line, col) <- use psPos
psStack %= ((t, line, col):)
p `finally` (psStack %= tail)
where
finally a b = do
r <- a `catchError`
\e -> b >> throwError e
b
pure r
-- backtracking
try :: Parser a -> Parser a
try p = do
s <- get
p `catchError` \e -> do
put s
throwError e
(<|>) :: Parser a -> Parser a -> Parser a
p1 <|> p2 = do
preP1Pos <- use psPos
p1 `catchError` \e -> do
postP1Pos <- use psPos
if preP1Pos == postP1Pos
then p2
else throwError e
choice :: [Parser a] -> Parser a
choice = foldl1 (<|>)
-- characters
char c = satisfy (T.singleton c) (== c)
space = satisfy "space" isSpace
digit = satisfy "digit" isDigit
lower = satisfy "lowercase" isLower
letter = satisfy "letter" isLetter
string :: String -> Parser String
string = traverse char
-- repetition
many, many1 :: Parser a -> Parser [a]
many p = many1 p <|> pure []
many1 p = liftA2 (:) p $ many p
sepBy, sepBy1 :: Parser a -> Parser s -> Parser [a]
sepBy p s = sepBy1 p s <|> pure []
sepBy1 p s = liftA2 (:) p $ many (s >> p)
-- syntax
spaces :: Parser String
spaces = many space
lexeme :: Parser a -> Parser a
lexeme p = p <* spaces
symbol :: String -> Parser String
symbol = lexeme . string
between :: Parser open -> Parser close -> Parser a -> Parser a
between o c p = o *> p <* c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment