Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist
View pharse.hs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
import Blaze.ByteString.Builder (Builder, fromByteString, toByteString)
import Control.Exception (Exception)
import Control.Monad.Trans.Class (lift)
import Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.Conduit
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import Data.Conduit.Extra (fuseLeftovers)
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Text as CT
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TEE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Typeable (Typeable)
 
input :: [File]
input =
[ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
, File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
, File "binary.dat" "we'll pretend to be binary"
]
 
data File = File
{ fileName :: !Text
, fileContents :: !ByteString
}
deriving Show
 
encodeFile :: File -> Builder
encodeFile (File name contents) =
tellLen (T.length name) <>
fromByteString (TEE.encodeUtf8 name) <>
tellLen (S.length contents) <>
fromByteString contents
where
tellLen i = fromByteString $ TEE.encodeUtf8 $ T.pack $ shows i ":"
 
encodeFiles :: [File] -> Builder
encodeFiles = mconcat . map encodeFile
 
withUtf8 :: MonadThrow m
=> ConduitM Text o m r
-> ConduitM ByteString o m r
withUtf8 =
fuseLeftovers toBS (CT.decode CT.utf8)
where
toBS = L.toChunks . TLE.encodeUtf8 . TL.fromChunks
 
conduitPharse :: MonadThrow m => Conduit ByteString m File
conduitPharse = do
mx <- CL.peek
case mx of
Nothing -> return ()
Just _ -> do
(name, contentLen) <- withUtf8 $ do
t <- CL.peek
nameLen <- parseNumber
name <- CT.take nameLen =$= CL.consume
contentLen <- parseNumber
return (T.concat name, contentLen)
content <- CB.take contentLen
yield $ File name $ S.concat $ L.toChunks content
conduitPharse
 
parseNumber :: MonadThrow m => Consumer Text m Int
parseNumber =
loop 0
where
loop x = CC.headE >>= maybe (monadThrow PrematureEnd) (go x)
 
go x ':' = return x
go x c
| '0' <= c && c <= '9' =
let x' = x * 10 + (fromEnum c - fromEnum '0')
in x' `seq` loop x'
| otherwise = monadThrow $ UnexpectedChar c
 
data ParseNumberException = PrematureEnd | UnexpectedChar Char
deriving (Show, Typeable)
instance Exception ParseNumberException
 
main :: IO ()
main = do
let fp = "encoded.pharse"
S.writeFile fp $ toByteString $ encodeFiles input
 
runResourceT $ CB.sourceFile fp $$ conduitPharse =$ CL.mapM_ (lift . print)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.