Skip to content

Instantly share code, notes, and snippets.

@snoyberg
Created February 8, 2014 18:52
Show Gist options
  • Save snoyberg/8888288 to your computer and use it in GitHub Desktop.
Save snoyberg/8888288 to your computer and use it in GitHub Desktop.
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
import Control.Exception (Exception)
import Control.Monad.Trans.Class (lift)
import Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import Data.Conduit.Extra (fuseLeftovers)
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TEE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Typeable (Typeable)
input :: [File]
input =
[ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
, File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
, File "binary.dat" "we'll pretend to be binary"
]
data File = File
{ fileName :: !Text
, fileContents :: !ByteString
}
deriving Show
encodeFile :: File -> Builder
encodeFile (File name contents) =
tellLen (T.length name) <>
fromByteString (TEE.encodeUtf8 name) <>
tellLen (S.length contents) <>
fromByteString contents
where
tellLen i = fromByteString $ TEE.encodeUtf8 $ T.pack $ shows i ":"
encodeFiles :: [File] -> Builder
encodeFiles = mconcat . map encodeFile
withUtf8 :: MonadThrow m
=> ConduitM Text o m r
-> ConduitM ByteString o m r
withUtf8 =
fuseLeftovers toBS (CT.decode CT.utf8)
where
toBS = L.toChunks . TLE.encodeUtf8 . TL.fromChunks
conduitPharse :: MonadThrow m => Conduit ByteString m File
conduitPharse = do
mx <- CL.peek
case mx of
Nothing -> return ()
Just _ -> do
(name, contentLen) <- withUtf8 $ do
t <- CL.peek
nameLen <- parseNumber
name <- CT.take nameLen =$= CL.consume
contentLen <- parseNumber
return (T.concat name, contentLen)
content <- CB.take contentLen
yield $ File name $ S.concat $ L.toChunks content
conduitPharse
parseNumber :: MonadThrow m => Consumer Text m Int
parseNumber =
loop 0
where
loop x = CC.headE >>= maybe (monadThrow PrematureEnd) (go x)
go x ':' = return x
go x c
| '0' <= c && c <= '9' =
let x' = x * 10 + (fromEnum c - fromEnum '0')
in x' `seq` loop x'
| otherwise = monadThrow $ UnexpectedChar c
data ParseNumberException = PrematureEnd | UnexpectedChar Char
deriving (Show, Typeable)
instance Exception ParseNumberException
main :: IO ()
main = do
let fp = "encoded.pharse"
S.writeFile fp $ toByteString $ encodeFiles input
runResourceT $ CB.sourceFile fp $$ conduitPharse =$ CL.mapM_ (lift . print)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment