Last active
July 12, 2017 20:31
-
-
Save phadej/31bdb72c815766edde1eaf3efe8cf0ff to your computer and use it in GitHub Desktop.
Fancy encoding of JSON token stream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns, OverloadedStrings, RankNTypes #-} | |
{-# LANGUAGE TypeFamilies, GADTs, DataKinds, PolyKinds, TypeOperators #-} | |
{-# LANGUAGE StandaloneDeriving, UndecidableInstances #-} | |
module Data.Aeson.Stream.Fancy ( | |
Token (..), | |
Path (..), | |
TokenStream, | |
tokenStream, | |
--decodeWith, | |
--eitherDecodeWith, | |
--decodeValue, | |
--parseValue, | |
) where | |
import Prelude () | |
import Prelude.Compat | |
import Control.DeepSeq (NFData (..)) | |
import Control.Monad (ap) | |
import Data.Aeson.Parser.Internal (jstring_, scientific) | |
import Data.Aeson.Types.Internal (IResult(..), JSONPath, Result(..), Value(..)) | |
import Data.Scientific (Scientific) | |
import Data.Text (Text) | |
import qualified Data.Aeson.Types as Aeson | |
import qualified Data.ByteString.Lazy as BSL | |
import qualified Data.ByteString.Lazy.Char8 as BSL8 | |
import qualified Data.Attoparsec.ByteString.Lazy as A | |
import qualified Data.Vector as V | |
-- import Debug.Trace | |
------------------------------------------------------------------------------- | |
-- Fancy stuff | |
------------------------------------------------------------------------------- | |
-- This is essentially free Category, isn't it? | |
data Path e a b where | |
Nil :: Path e a a | |
(:>) :: e a b -> Path e b c -> Path e a c | |
infixr 5 :> | |
instance (ShowP2 e) => Show (Path e a b) where | |
showsPrec _ Nil = showString "Nil" | |
showsPrec d (h :> t) = showParen (d > 5) | |
$ showsPrec2P 6 h | |
. showString " :> " | |
. showsPrec 5 t | |
class ShowP2 e where | |
showsPrec2P :: Int -> e a b -> ShowS | |
------------------------------------------------------------------------------- | |
-- Token | |
------------------------------------------------------------------------------- | |
data Tag = ArrTag | ObjTag | |
-- value ::= scalar | list | array | |
-- scalar ::= null | true | false | text | number | |
-- list ::= [ value* ] | |
-- object ::= { pair* } | |
-- pair ::= key value | |
-- value ::= scalar | list | array | |
-- scalar ::= null | true | false | text | number | |
-- list ::= [ list' | |
-- list' ::= ] | value list' | |
-- object ::= { object' | |
-- object' ::= } | key value object' | |
-- To explain this indices one need to draw a graph of | |
-- stack automaton recognizing above language | |
-- | |
-- shortly: there are | |
-- - two states: "start" and "expecting key" (Either) | |
-- - stack language consist of nested elements | |
-- | |
-- Another (and probably more correct) encoding of tags | |
-- should be (Bool, [Tag]) | |
data Token e a b where | |
TkNull :: Token e ('Right a) ('Right a) | |
TkTrue :: Token e ('Right a) ('Right a) | |
TkFalse :: Token e ('Right a) ('Right a) | |
TkArrayOpen :: Token e ('Right a) ('Right ('ArrTag ': a)) | |
TkArrayClose :: Token e ('Right ('ArrTag ': a)) ('Right a) | |
TkObjectOpen :: Token e ('Right a) ('Right ('ObjTag ': a)) | |
TkObjectClose :: Token e ('Right ('ObjTag ': a)) ('Right a) | |
TkKey :: !Text -> Token e ('Right ('ObjTag ': a)) ('Left ('ObjTag ': a)) | |
-- TODO: rename to TkString | |
TkText :: !Text -> Token e ('Right a) ('Right a) | |
TkNumber :: !Scientific -> Token e ('Right a) ('Right a) | |
TkError :: e -> Token e a b | |
deriving instance Show e => Show (Token e a b) | |
instance Show e => ShowP2 (Token e) where | |
showsPrec2P = showsPrec | |
instance NFData (Token e a b) where | |
rnf !_ = () -- should do, error message not forced | |
type TokenStream' = Path (Token String) | |
-- token stream can be contain multiple correct values | |
-- (also none) | |
-- | |
-- to require *exactly* one, the encoding of indices | |
-- becomes more complicated | |
-- and one might need type-families at that point | |
-- (two states make things easy) | |
type TokenStream = TokenStream' ('Right '[]) ('Right '[]) | |
------------------------------------------------------------------------------- | |
-- Token recognize | |
------------------------------------------------------------------------------- | |
tkError :: String -> TokenStream' a b | |
tkError err = TkError err :> Nil | |
newtype TokenParser a b = TokenParser { runTokenParser :: | |
forall c. Path TokenParser b c -> BSL.ByteString -> TokenStream' a c } | |
end :: TokenParser a a | |
end = TokenParser $ \ !ks !bs -> case ks of | |
Nil -> | |
if BSL.null bs | |
then Nil | |
else tkError "expecting end-of-input" | |
_ :> _ -> tkError "asking for end even we have continuations" | |
pop :: Path TokenParser a b -> BSL.ByteString -> TokenStream' a b | |
pop Nil bs = tkError "Nothing to pop" -- runTokenParser end [] (skipSpace bs) | |
pop (k :> ks) bs = runTokenParser k ks (skipSpace bs) | |
-- | Parse input into a token stream. | |
-- | |
-- >>> tokenStream "[1, 2, 3]" | |
-- TkArrayOpen :> TkNumber 1.0 :> TkNumber 2.0 :> TkNumber 3.0 :> TkArrayClose :> Nil | |
tokenStream :: BSL.ByteString -> TokenStream | |
tokenStream = runTokenParser value (end :> Nil) . skipSpace | |
where | |
value :: TokenParser ('Right ts) ('Right ts) | |
value = TokenParser $ \ !ks !bs -> case BSL.uncons bs of | |
Nothing -> tkError "Expecting JSON value, got end-of-input" | |
Just (!w, !bs') -> case w of | |
91 {- [ -} -> TkArrayOpen :> runTokenParser array0 (arrayK :> ks) (skipSpace bs') | |
_ | 48 <= w && w <= 75 || w == 45 -> case A.parse scientific bs of | |
A.Fail _ _ err -> tkError err | |
A.Done bs'' s -> TkNumber s :> pop ks bs'' | |
_ -> tkError $ "Expecting JSON value, got " ++ BSL8.unpack (BSL.take 30 bs) | |
-- this parser does look ahead, | |
-- and dispatches either to arrayK or goes to value | |
array0 :: TokenParser ('Right ('ArrTag ': ts)) ('Right ('ArrTag ': ts)) | |
array0 = TokenParser $ \ !ks !bs -> case BSL.uncons bs of | |
Nothing -> tkError "Expecting JSON value or ']', got end-of-input" | |
Just (!w, _) -> case w of | |
93 {- ] -} -> pop ks bs -- TkArrayClose : pop (tail ks) bs' | |
_ -> runTokenParser value ks bs | |
arrayK :: TokenParser ('Right ('ArrTag ': ts)) ('Right ts) | |
arrayK = TokenParser $ \ !ks !bs -> case BSL.uncons bs of | |
Nothing -> tkError "Expecting ',' or ']', got end-of-input" | |
Just (!w, !bs') -> case w of | |
44 {- , -} -> runTokenParser value (arrayK :> ks) (skipSpace bs') | |
93 {- ] -} -> TkArrayClose :> pop ks bs' | |
_ -> tkError $ "Expecting ',' or ']', got " ++ BSL8.unpack (BSL.take 30 bs) | |
skipSpace :: BSL.ByteString -> BSL.ByteString | |
skipSpace bs = case BSL.uncons bs of | |
Just (w, bs') | w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09 | |
-> skipSpace bs' | |
_ -> bs | |
{-# INLINE skipSpace #-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment