Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Reproduction of a bug in pipes(?)
#!/usr/bin/env stack
{- stack
--resolver lts-6.24
runghc
--package attoparsec
--package pipes-bytestring
--package containers
--package pipes-zlib -}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Data.Attoparsec.ByteString
import Data.Bits
import Data.List (foldl')
import Control.Arrow (first)
import Control.Monad.Trans.State.Strict
import Data.ByteString.Base16 (encode, decode)
import Data.ByteString.Lazy (toStrict, fromStrict)
import Data.Maybe (fromJust)
import Prelude hiding (take)
import qualified Codec.Compression.Zlib as Z
import qualified Data.ByteString as B
import qualified Data.Map.Strict as M
import qualified Pipes.Attoparsec as PA
import qualified Pipes.ByteString as PB
import qualified Pipes.Zlib as PZ
import qualified Prelude
import qualified System.IO as SI
import Pipes
import GHC.Word (Word8)
main = do
(entries, problemP) <- indexPackFile "pack-393b15f040d54876a0917a71ac6a060afbc5c0f7.pack"
(header, ref, compressedP, level) <- evalStateT getNextEntry problemP
-- This is the line that causes the ZlibException
(Right wrong) <- next compressedP
return ()
indexPackFile path = do
handle <- PB.fromHandle <$> SI.openFile path SI.ReadMode
((start, count), entries) <- runStateT parsePackFileStart handle
loopEntries' entries start count M.empty
where parsePackFileStart = do
(Right (len, count)) <- fromJust <$> PA.parseL parsePackFileHeader
return (len, count)
loopEntries'
:: Producer B.ByteString IO a -- remaining packfile input
-> Int -- number of bytes read so far
-> Int -- number of entries remaining
-> SeparatedEntries -- map of offsets to bytestrings
-> IO (SeparatedEntries, Producer B.ByteString IO a)
loopEntries' producer offset remaining indexedMap = case remaining of
663 -> return (indexedMap, producer)
_ -> do
(header, ref, decompressedP, level) <- evalStateT getNextEntry producer
step <- next decompressedP
let (decompressed, eitherP) = either ((,) "" . return) id step
(output, producer') <- advanceToCompletion decompressed eitherP
let content = B.concat [header, ref, compressToLevel level output]
let indexedMap' = M.insert offset content indexedMap
let offset' = offset + B.length content
let remaining' = remaining - 1
loopEntries' producer' offset' remaining' indexedMap'
getNextEntry = do
(Right tLen) <- fromJust <$> PA.parse parseTypeLen
baseRef <- case fst tLen of
OfsDeltaObject -> do
(Right offset) <- fromJust <$> PA.parse parseOffset
return $ encodeOffset offset
RefDeltaObject -> do
(Right ref) <- fromJust <$> PA.parse parseBinRef
return $ fst $ decode ref
_ -> return ""
decompressed <- PZ.decompress' PZ.defaultWindowBits <$> get
PB.drawByte
level <- getCompressionLevel . fromJust <$> PB.peekByte
return (uncurry encodeTypeLen tLen, baseRef, decompressed, level)
advanceToCompletion decompressed producer = next producer >>= \step ->
case step of
(Left (Left p)) -> return (decompressed, p)
(Right (d, p')) ->
first (B.append decompressed) <$> advanceToCompletion d p'
_ -> error "No idea how to handle Left (Right _)"
-- Copied verbatim from https://github.com/vaibhavsagar/duffer
type SeparatedEntries = M.Map Int B.ByteString
type Ref = B.ByteString
parseBinRef :: Parser Ref
parseBinRef = encode <$> take 20
data PackObjectType
= UnusedPackObjectType0
| CommitObject
| TreeObject
| BlobObject
| TagObject
| UnusedPackObjectType5
| OfsDeltaObject
| RefDeltaObject
deriving (Enum, Eq, Show)
encodeTypeLen :: PackObjectType -> Int -> B.ByteString
encodeTypeLen packObjType len = let
(last4, rest) = packEntryLenList len
firstByte = (fromEnum packObjType `shiftL` 4) .|. last4
firstByte' = if rest /= "" then setBit firstByte 7 else firstByte
in B.cons (fromIntegral firstByte') rest
parseTypeLen :: (Bits t, Integral t) => Parser (PackObjectType, t)
parseTypeLen = do
header <- anyWord8
let packType = packObjectType header
let initial = fromIntegral $ header .&. 15
size <- if testMSB header
then do
rest <- littleEndian <$> parseVarInt
return $ initial + (rest `shiftL` 4)
else
return initial
return (packType, size)
parseVarInt :: (Bits t, Integral t) => Parser [t]
parseVarInt = anyWord8 >>= \byte ->
let value = fromIntegral $ byte .&. 127
more = testMSB byte
in (value:) <$> if more then parseVarInt else return []
testMSB :: Bits t => t -> Bool
testMSB = flip testBit 7
parseOffset :: (Bits t, Integral t) => Parser t
parseOffset = parseVarInt >>= \values ->
let len = length values - 1
concatenated = bigEndian values
in return $ concatenated + if len > 0
-- I think the addition reinstates the MSBs that are otherwise
-- used to indicate whether there is more of the variable length
-- integer to parse.
then sum $ map (\i -> 2^(7*i)) [1..len]
else 0
littleEndian, bigEndian :: (Bits t, Integral t) => [t] -> t
littleEndian = foldr (\a b -> a + (b `shiftL` 7)) 0
bigEndian = foldl' (\a b -> (a `shiftL` 7) + b) 0
parsePackFileHeader :: Parser Int
parsePackFileHeader =
word8s (B.unpack "PACK") *> take 4 *> (fromBytes <$> take 4)
fromBytes :: (Bits t, Integral t) => B.ByteString -> t
fromBytes = B.foldl (\a b -> (a `shiftL` 8) + fromIntegral b) 0
word8s :: [Word8] -> Parser [Word8]
word8s = mapM word8
packEntryLenList :: Int -> (Int, B.ByteString)
packEntryLenList n = let
last4 = fromIntegral n .&. 15
rest = fromIntegral n `shiftR` 4 :: Int
last4' = if rest > 0
then setBit last4 7
else last4
restL = to7BitList rest
restL' = if restL /= [0]
then map fromIntegral $ head restL:map (`setBit` 7) (tail restL)
else []
in (last4', B.pack $ reverse restL')
encodeOffset :: Int -> B.ByteString
encodeOffset n = let
noTerms = floor $ logBase 128 (fromIntegral n * (128 - 1) + 128) - 1
remove = sum $ Prelude.take noTerms $ map (128^) [1..]
remainder = n - remove
varInt = to7BitList remainder
encodedInts = setMSBs $ leftPadZeros varInt (noTerms + 1)
in B.pack $ map fromIntegral encodedInts
leftPadZeros :: [Int] -> Int -> [Int]
leftPadZeros ints n
| length ints >= n = ints
| otherwise = leftPadZeros (0:ints) n
setMSBs :: [Int] -> [Int]
setMSBs ints = let
ints' = reverse ints
ints'' = head ints' : map (`setBit` 7) ( tail ints')
in reverse ints''
toByteList, to7BitList :: (Bits t, Integral t) => t -> [t]
toByteList = toSomeBitList 8
to7BitList = toSomeBitList 7
toSomeBitList :: (Bits t, Integral t) => Int -> t -> [t]
toSomeBitList some n = reverse $ toSomeBitList' some n
where toSomeBitList' some n = case divMod n (bit some) of
(0, i) -> [fromIntegral i]
(x, y) -> fromIntegral y : toSomeBitList' some x
compressToLevel :: Z.CompressionLevel -> B.ByteString -> B.ByteString
compressToLevel level content = toStrict $
Z.compressWith Z.defaultCompressParams
{ Z.compressLevel = level }
$ fromStrict content
getCompressionLevel :: Word8 -> Z.CompressionLevel
getCompressionLevel levelByte = case levelByte of
1 -> Z.bestSpeed
156 -> Z.defaultCompression
_ -> error "I can't make sense of this compression level"
packObjectType :: (Bits t, Integral t) => t -> PackObjectType
packObjectType header = toEnum . fromIntegral $ (header `shiftR` 4) .&. 7
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment