Created
February 8, 2014 18:52
-
-
Save snoyberg/8888288 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DeriveDataTypeable #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
import Blaze.ByteString.Builder (Builder, fromByteString, toByteString) | |
import Control.Exception (Exception) | |
import Control.Monad.Trans.Class (lift) | |
import Data.ByteString (ByteString) | |
import qualified Data.ByteString as S | |
import qualified Data.ByteString.Lazy as L | |
import Data.Conduit | |
import qualified Data.Conduit.Binary as CB | |
import qualified Data.Conduit.Combinators as CC | |
import Data.Conduit.Extra (fuseLeftovers) | |
import qualified Data.Conduit.List as CL | |
import qualified Data.Conduit.Text as CT | |
import Data.Monoid | |
import Data.Text (Text) | |
import qualified Data.Text as T | |
import qualified Data.Text.Encoding as TEE | |
import qualified Data.Text.Lazy as TL | |
import qualified Data.Text.Lazy.Encoding as TLE | |
import Data.Typeable (Typeable) | |
input :: [File] | |
input = | |
[ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8" | |
, File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16" | |
, File "binary.dat" "we'll pretend to be binary" | |
] | |
data File = File | |
{ fileName :: !Text | |
, fileContents :: !ByteString | |
} | |
deriving Show | |
encodeFile :: File -> Builder | |
encodeFile (File name contents) = | |
tellLen (T.length name) <> | |
fromByteString (TEE.encodeUtf8 name) <> | |
tellLen (S.length contents) <> | |
fromByteString contents | |
where | |
tellLen i = fromByteString $ TEE.encodeUtf8 $ T.pack $ shows i ":" | |
encodeFiles :: [File] -> Builder | |
encodeFiles = mconcat . map encodeFile | |
withUtf8 :: MonadThrow m | |
=> ConduitM Text o m r | |
-> ConduitM ByteString o m r | |
withUtf8 = | |
fuseLeftovers toBS (CT.decode CT.utf8) | |
where | |
toBS = L.toChunks . TLE.encodeUtf8 . TL.fromChunks | |
conduitPharse :: MonadThrow m => Conduit ByteString m File | |
conduitPharse = do | |
mx <- CL.peek | |
case mx of | |
Nothing -> return () | |
Just _ -> do | |
(name, contentLen) <- withUtf8 $ do | |
t <- CL.peek | |
nameLen <- parseNumber | |
name <- CT.take nameLen =$= CL.consume | |
contentLen <- parseNumber | |
return (T.concat name, contentLen) | |
content <- CB.take contentLen | |
yield $ File name $ S.concat $ L.toChunks content | |
conduitPharse | |
parseNumber :: MonadThrow m => Consumer Text m Int | |
parseNumber = | |
loop 0 | |
where | |
loop x = CC.headE >>= maybe (monadThrow PrematureEnd) (go x) | |
go x ':' = return x | |
go x c | |
| '0' <= c && c <= '9' = | |
let x' = x * 10 + (fromEnum c - fromEnum '0') | |
in x' `seq` loop x' | |
| otherwise = monadThrow $ UnexpectedChar c | |
data ParseNumberException = PrematureEnd | UnexpectedChar Char | |
deriving (Show, Typeable) | |
instance Exception ParseNumberException | |
main :: IO () | |
main = do | |
let fp = "encoded.pharse" | |
S.writeFile fp $ toByteString $ encodeFiles input | |
runResourceT $ CB.sourceFile fp $$ conduitPharse =$ CL.mapM_ (lift . print) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment