Skip to content

Instantly share code, notes, and snippets.

@michaelt
Created November 8, 2013 21:22
Show Gist options
  • Save michaelt/7377887 to your computer and use it in GitHub Desktop.
Save michaelt/7377887 to your computer and use it in GitHub Desktop.
{-#LANGUAGE LambdaCase#-}
import qualified Pipes.ByteString as BP
import Pipes
import Pipes.Parse
import qualified Data.ByteString as B
import qualified Data.Text as T
import Data.Text (Text(..))
import qualified Data.Text.IO as T
import Data.Text.Encoding
import Data.Text.Encoding.Error
main = runEffect $ for (splits a' >-> decode) (lift . T.putStr)
a = encodeUtf8 $ T.pack $ "你好世界"
a' = encodeUtf8 $ T.pack $ "你好"
sing = B.singleton
decode :: Pipe B.ByteString Text IO r
decode = decodeWith lenient
lenient _ (Just 228) = Just 'X'
lenient _ (Just 229) = Just 'Y'
lenient _ _ = Just '?'
decodeWith :: OnDecodeError
-> Pipe B.ByteString Text IO r
decodeWith onErr = go 1 (streamDecodeUtf8With onErr)
where go n dec = do chunk <- await
case dec chunk of
Some text l dec' ->
do lift $ do putChar '\n'
putStr "Chunk "
print n
putStr "Input Bytestring: "
print chunk
putStr "Text output as ByteString: "
print (encodeUtf8 text)
putStr "ByteString leftover: "
print l
yield text
go (n+1) dec'
splits bs = go len
where len = B.length bs
go 0 = return ()
go n = do let (a,b) = B.splitAt (len - n) bs
yield a
yield b
go (n-1)
Chunk 1
Input Bytestring: ""
Text output as ByteString: ""
ByteString leftover: ""
Chunk 2
Input Bytestring: "\228\189\160\229\165\189"
Text output as ByteString: "\228\189\160\229\165\189"
ByteString leftover: ""
你好
Chunk 3
Input Bytestring: "\228"
Text output as ByteString: ""
ByteString leftover: "\228"
Chunk 4
Input Bytestring: "\189\160\229\165\189"
Text output as ByteString: "\228\189\160\229\165\189"
ByteString leftover: ""
你好
Chunk 5
Input Bytestring: "\228\189"
Text output as ByteString: ""
ByteString leftover: "\228\189"
Chunk 6
Input Bytestring: "\160\229\165\189"
Text output as ByteString: "\228\189\160\229\165\189"
ByteString leftover: ""
你好
Chunk 7
Input Bytestring: "\228\189\160"
Text output as ByteString: "\228\189\160"
ByteString leftover: ""
Chunk 8
Input Bytestring: "\229\165\189"
Text output as ByteString: "\229\165\189"
ByteString leftover: ""
Chunk 9
Input Bytestring: "\228\189\160\229"
Text output as ByteString: "\228\189\160"
ByteString leftover: "\229"
Chunk 10
Input Bytestring: "\165\189"
Text output as ByteString: "\229\165\189"
ByteString leftover: ""
Chunk 11
Input Bytestring: "\228\189\160\229\165"
Text output as ByteString: "\228\189\160"
ByteString leftover: "\229\165"
Chunk 12
Input Bytestring: "\189"
Text output as ByteString: "\229\165\189"
ByteString leftover: ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment