Created
September 19, 2018 14:14
-
-
Save mjepronk/81d2d673b80084fd1931448aa0c90371 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- | |
Use like this: | |
runResourceT $ S.print $ (parsed myparser (decodeByteString decodeUtf8Pure (Q.readFile "my-utf8-encoded.txt"))) | |
-} | |
module Data.Attoparsec.Text.Streaming | |
( parsed | |
, decodeByteString | |
-- * Decoding functions | |
, DST.decodeUtf8Pure | |
, DST.decodeUtf16LE | |
, DST.decodeUtf16BE | |
, DST.decodeUtf32LE | |
, DST.decodeUtf32BE | |
) | |
where | |
import qualified Data.Attoparsec.Text as A | |
import qualified Data.ByteString as B | |
import Streaming (Stream, Of(..), lift) | |
import Streaming.Prelude (next, yield) | |
import Streaming.Internal (Stream(..)) | |
import Data.ByteString.Streaming.Internal (ByteString(..)) | |
import qualified Data.Streaming.Text as DST | |
import qualified Data.Text as T | |
decodeByteString :: (Monad m) | |
=> (B.ByteString -> DST.DecodeResult) -- ^ decoding function from 'Data.Streaming.Text' | |
-> ByteString m r -- ^ raw input | |
-> Stream (Of T.Text) m (Either DST.DecodeResult r) -- ^ Stream of 'T.Text' values | |
decodeByteString = go | |
where | |
go decoder p0 = | |
case p0 of | |
Go m -> lift m >>= go decoder | |
Empty r -> case decoder B.empty of -- end of stream, feed empty chunk to decoder | |
DST.DecodeResultSuccess _ _ -> Return (Right r) | |
err@(DST.DecodeResultFailure _ _) -> Return (Left err) | |
Chunk bs p1 | B.null bs -> go decoder p1 -- ignore emty chunks from input | |
| otherwise -> case decoder bs of -- decode chunk | |
DST.DecodeResultSuccess t cont -> Step (t :> go cont p1) | |
err@(DST.DecodeResultFailure _ _) -> Return (Left err) | |
type Errors = ([String], String) | |
parsed | |
:: Monad m | |
=> A.Parser a -- ^ Attoparsec Text parser | |
-> Stream (Of T.Text) m r -- ^ Raw input | |
-> Stream (Of a) m (Either (Errors, Stream (Of T.Text) m r) r) | |
parsed parser = begin | |
where | |
begin p0 = do | |
x <- lift $ next p0 | |
case x of | |
Left r -> Return (Right r) | |
Right (t, p1) | |
| T.null t -> begin p1 | |
| otherwise -> step (yield t >>) (A.parse parser t) p1 | |
step diffP res p0 = case res of | |
A.Fail _ c m -> Return (Left ((c, m), diffP p0)) | |
A.Done t a | T.null t -> Step (a :> begin p0) | |
| otherwise -> Step (a :> begin (yield t >> p0)) | |
A.Partial k -> do | |
x <- lift $ next p0 | |
case x of | |
Left r -> step diffP (k T.empty) (Return r) | |
Right (t, p1) | T.null t -> step diffP res p1 | |
| otherwise -> step (diffP . (yield t >>)) (k t) p1 | |
{-# INLINABLE parsed #-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment