{-# LANGUAGE DeriveDataTypeable #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
import Blaze.ByteString.Builder (Builder, fromByteString, toByteString) | |
import Control.Exception (Exception) | |
import Control.Monad.Trans.Class (lift) | |
import Data.ByteString (ByteString) | |
import qualified Data.ByteString as S | |
import qualified Data.ByteString.Lazy as L | |
import Data.Conduit | |
import qualified Data.Conduit.Binary as CB | |
import qualified Data.Conduit.Combinators as CC | |
import Data.Conduit.Extra (fuseLeftovers) | |
import qualified Data.Conduit.List as CL | |
import qualified Data.Conduit.Text as CT | |
import Data.Monoid | |
import Data.Text (Text) | |
import qualified Data.Text as T | |
import qualified Data.Text.Encoding as TEE | |
import qualified Data.Text.Lazy as TL | |
import qualified Data.Text.Lazy.Encoding as TLE | |
import Data.Typeable (Typeable) | |
input :: [File] | |
input = | |
[ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8" | |
, File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16" | |
, File "binary.dat" "we'll pretend to be binary" | |
] | |
data File = File | |
{ fileName :: !Text | |
, fileContents :: !ByteString | |
} | |
deriving Show | |
encodeFile :: File -> Builder | |
encodeFile (File name contents) = | |
tellLen (T.length name) <> | |
fromByteString (TEE.encodeUtf8 name) <> | |
tellLen (S.length contents) <> | |
fromByteString contents | |
where | |
tellLen i = fromByteString $ TEE.encodeUtf8 $ T.pack $ shows i ":" | |
encodeFiles :: [File] -> Builder | |
encodeFiles = mconcat . map encodeFile | |
withUtf8 :: MonadThrow m | |
=> ConduitM Text o m r | |
-> ConduitM ByteString o m r | |
withUtf8 = | |
fuseLeftovers toBS (CT.decode CT.utf8) | |
where | |
toBS = L.toChunks . TLE.encodeUtf8 . TL.fromChunks | |
conduitPharse :: MonadThrow m => Conduit ByteString m File | |
conduitPharse = do | |
mx <- CL.peek | |
case mx of | |
Nothing -> return () | |
Just _ -> do | |
(name, contentLen) <- withUtf8 $ do | |
t <- CL.peek | |
nameLen <- parseNumber | |
name <- CT.take nameLen =$= CL.consume | |
contentLen <- parseNumber | |
return (T.concat name, contentLen) | |
content <- CB.take contentLen | |
yield $ File name $ S.concat $ L.toChunks content | |
conduitPharse | |
parseNumber :: MonadThrow m => Consumer Text m Int | |
parseNumber = | |
loop 0 | |
where | |
loop x = CC.headE >>= maybe (monadThrow PrematureEnd) (go x) | |
go x ':' = return x | |
go x c | |
| '0' <= c && c <= '9' = | |
let x' = x * 10 + (fromEnum c - fromEnum '0') | |
in x' `seq` loop x' | |
| otherwise = monadThrow $ UnexpectedChar c | |
data ParseNumberException = PrematureEnd | UnexpectedChar Char | |
deriving (Show, Typeable) | |
instance Exception ParseNumberException | |
main :: IO () | |
main = do | |
let fp = "encoded.pharse" | |
S.writeFile fp $ toByteString $ encodeFiles input | |
runResourceT $ CB.sourceFile fp $$ conduitPharse =$ CL.mapM_ (lift . print) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment