Skip to content

Instantly share code, notes, and snippets.

@phadej
Last active July 12, 2017 20:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phadej/31bdb72c815766edde1eaf3efe8cf0ff to your computer and use it in GitHub Desktop.
Save phadej/31bdb72c815766edde1eaf3efe8cf0ff to your computer and use it in GitHub Desktop.
Fancy encoding of JSON token stream
{-# LANGUAGE BangPatterns, OverloadedStrings, RankNTypes #-}
{-# LANGUAGE TypeFamilies, GADTs, DataKinds, PolyKinds, TypeOperators #-}
{-# LANGUAGE StandaloneDeriving, UndecidableInstances #-}
module Data.Aeson.Stream.Fancy (
Token (..),
Path (..),
TokenStream,
tokenStream,
--decodeWith,
--eitherDecodeWith,
--decodeValue,
--parseValue,
) where
import Prelude ()
import Prelude.Compat
import Control.DeepSeq (NFData (..))
import Control.Monad (ap)
import Data.Aeson.Parser.Internal (jstring_, scientific)
import Data.Aeson.Types.Internal (IResult(..), JSONPath, Result(..), Value(..))
import Data.Scientific (Scientific)
import Data.Text (Text)
import qualified Data.Aeson.Types as Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Char8 as BSL8
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Vector as V
-- import Debug.Trace
-------------------------------------------------------------------------------
-- Fancy stuff
-------------------------------------------------------------------------------
-- This is essentially free Category, isn't it?
data Path e a b where
Nil :: Path e a a
(:>) :: e a b -> Path e b c -> Path e a c
infixr 5 :>
instance (ShowP2 e) => Show (Path e a b) where
showsPrec _ Nil = showString "Nil"
showsPrec d (h :> t) = showParen (d > 5)
$ showsPrec2P 6 h
. showString " :> "
. showsPrec 5 t
class ShowP2 e where
showsPrec2P :: Int -> e a b -> ShowS
-------------------------------------------------------------------------------
-- Token
-------------------------------------------------------------------------------
data Tag = ArrTag | ObjTag
-- value ::= scalar | list | array
-- scalar ::= null | true | false | text | number
-- list ::= [ value* ]
-- object ::= { pair* }
-- pair ::= key value
-- value ::= scalar | list | array
-- scalar ::= null | true | false | text | number
-- list ::= [ list'
-- list' ::= ] | value list'
-- object ::= { object'
-- object' ::= } | key value object'
-- To explain this indices one need to draw a graph of
-- stack automaton recognizing above language
--
-- shortly: there are
-- - two states: "start" and "expecting key" (Either)
-- - stack language consist of nested elements
--
-- Another (and probably more correct) encoding of tags
-- should be (Bool, [Tag])
data Token e a b where
TkNull :: Token e ('Right a) ('Right a)
TkTrue :: Token e ('Right a) ('Right a)
TkFalse :: Token e ('Right a) ('Right a)
TkArrayOpen :: Token e ('Right a) ('Right ('ArrTag ': a))
TkArrayClose :: Token e ('Right ('ArrTag ': a)) ('Right a)
TkObjectOpen :: Token e ('Right a) ('Right ('ObjTag ': a))
TkObjectClose :: Token e ('Right ('ObjTag ': a)) ('Right a)
TkKey :: !Text -> Token e ('Right ('ObjTag ': a)) ('Left ('ObjTag ': a))
-- TODO: rename to TkString
TkText :: !Text -> Token e ('Right a) ('Right a)
TkNumber :: !Scientific -> Token e ('Right a) ('Right a)
TkError :: e -> Token e a b
deriving instance Show e => Show (Token e a b)
instance Show e => ShowP2 (Token e) where
showsPrec2P = showsPrec
instance NFData (Token e a b) where
rnf !_ = () -- should do, error message not forced
type TokenStream' = Path (Token String)
-- token stream can be contain multiple correct values
-- (also none)
--
-- to require *exactly* one, the encoding of indices
-- becomes more complicated
-- and one might need type-families at that point
-- (two states make things easy)
type TokenStream = TokenStream' ('Right '[]) ('Right '[])
-------------------------------------------------------------------------------
-- Token recognize
-------------------------------------------------------------------------------
tkError :: String -> TokenStream' a b
tkError err = TkError err :> Nil
newtype TokenParser a b = TokenParser { runTokenParser ::
forall c. Path TokenParser b c -> BSL.ByteString -> TokenStream' a c }
end :: TokenParser a a
end = TokenParser $ \ !ks !bs -> case ks of
Nil ->
if BSL.null bs
then Nil
else tkError "expecting end-of-input"
_ :> _ -> tkError "asking for end even we have continuations"
pop :: Path TokenParser a b -> BSL.ByteString -> TokenStream' a b
pop Nil bs = tkError "Nothing to pop" -- runTokenParser end [] (skipSpace bs)
pop (k :> ks) bs = runTokenParser k ks (skipSpace bs)
-- | Parse input into a token stream.
--
-- >>> tokenStream "[1, 2, 3]"
-- TkArrayOpen :> TkNumber 1.0 :> TkNumber 2.0 :> TkNumber 3.0 :> TkArrayClose :> Nil
tokenStream :: BSL.ByteString -> TokenStream
tokenStream = runTokenParser value (end :> Nil) . skipSpace
where
value :: TokenParser ('Right ts) ('Right ts)
value = TokenParser $ \ !ks !bs -> case BSL.uncons bs of
Nothing -> tkError "Expecting JSON value, got end-of-input"
Just (!w, !bs') -> case w of
91 {- [ -} -> TkArrayOpen :> runTokenParser array0 (arrayK :> ks) (skipSpace bs')
_ | 48 <= w && w <= 75 || w == 45 -> case A.parse scientific bs of
A.Fail _ _ err -> tkError err
A.Done bs'' s -> TkNumber s :> pop ks bs''
_ -> tkError $ "Expecting JSON value, got " ++ BSL8.unpack (BSL.take 30 bs)
-- this parser does look ahead,
-- and dispatches either to arrayK or goes to value
array0 :: TokenParser ('Right ('ArrTag ': ts)) ('Right ('ArrTag ': ts))
array0 = TokenParser $ \ !ks !bs -> case BSL.uncons bs of
Nothing -> tkError "Expecting JSON value or ']', got end-of-input"
Just (!w, _) -> case w of
93 {- ] -} -> pop ks bs -- TkArrayClose : pop (tail ks) bs'
_ -> runTokenParser value ks bs
arrayK :: TokenParser ('Right ('ArrTag ': ts)) ('Right ts)
arrayK = TokenParser $ \ !ks !bs -> case BSL.uncons bs of
Nothing -> tkError "Expecting ',' or ']', got end-of-input"
Just (!w, !bs') -> case w of
44 {- , -} -> runTokenParser value (arrayK :> ks) (skipSpace bs')
93 {- ] -} -> TkArrayClose :> pop ks bs'
_ -> tkError $ "Expecting ',' or ']', got " ++ BSL8.unpack (BSL.take 30 bs)
skipSpace :: BSL.ByteString -> BSL.ByteString
skipSpace bs = case BSL.uncons bs of
Just (w, bs') | w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09
-> skipSpace bs'
_ -> bs
{-# INLINE skipSpace #-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment