Skip to content

Instantly share code, notes, and snippets.

@yiding
Last active May 21, 2021 08:56
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yiding/0be1cabcdc2b1b51411f to your computer and use it in GitHub Desktop.
Save yiding/0be1cabcdc2b1b51411f to your computer and use it in GitHub Desktop.
Parsing xcode DGPH files. This one handles DGPH 1.04 (used in xcode 7.0, 7.1) Xcode DGPH files contains dependency graph information that lets Xcode do incremental builds.
{-# LANGUAGE OverloadedStrings #-}
module DGPHParser(parseDgph) where
import qualified Data.Attoparsec.ByteString as A
import qualified Data.ByteString as B
import Data.Bits
import Data.Foldable (foldl')
import qualified Data.ByteString as B
import qualified Data.Attoparsec.ByteString as A
import Control.Monad.ST ( runST, ST )
import Data.Array.Unsafe (castSTUArray)
import Data.Array.ST ( newArray, readArray, MArray, STUArray )
import Data.Bits
import Data.Word
--
-- Utility functions for parsing doubles and such.
--
parseWord64LE :: A.Parser Word64
parseWord64LE = do
B.foldr (\byte word -> fromIntegral byte .|. word `shiftL` 8 ) 0 <$> A.take 8
-- | reinterpret_cast
cast :: (MArray (STUArray s) a (ST s),
MArray (STUArray s) b (ST s)) =>
a -> ST s b
cast x = newArray (0 :: Int, 0) x >>= castSTUArray >>= flip readArray 0
parseDoubleLE :: A.Parser Double
parseDoubleLE = do
word <- parseWord64LE
return $ runST (cast word)
--
-- parsing particularly prevalent structures
--
-- | Parse a 7 bit little endian variable length encoded number.
--
-- The encoding takes 7 bit blocks of the number and encodes it in a byte,
-- and set the msb of that byte to 1 if there are additional bytes to follow.
--
-- For example, a hypothetical 4 byte number encodes as follows:
--
-- @
-- 0000 0000 0000z zzzz zzyy yyyy yxxx xxxx
--
-- 1xxxxxxx 1yyyyyyy 0zzzzzzz
-- ^msb ^lsb
-- @
parseVarLenWordLE :: A.Parser Int
parseVarLenWordLE = go []
where
go :: [Int] -> A.Parser Int
go acc =
A.anyWord8 >>= \x ->
let acc' = fromIntegral (x .&. 0x7f) : acc in
case x .&. 0x80 of
-- more to go
0x80 -> go acc'
-- done, summarize it into a number
-- now the numbers are [z, y, x]
_ -> return $ foldl' (\a new -> a `shiftL` 7 .|. new) 0 acc'
-- | Parse a single byte, and then parse that many number of bytes.
parseByteLengthPrefixedString = do
b <- A.anyWord8
A.take (fromIntegral b)
parseVarLenPrefixedString = do
len <- parseVarLenWordLE
A.take len
parseVarLenPrefixedList :: A.Parser a -> A.Parser [a]
parseVarLenPrefixedList itemParser = do
len <- parseVarLenWordLE
A.count len itemParser
--
-- parsing the DGPH file itself
--
parseDgph = do
A.string "DGPH"
_version <- A.take 4
_versionBuildDate <- parseByteLengthPrefixedString
_versionBuildTime <- parseByteLengthPrefixedString
-- Following, there comes a bunch of XCDependencyNode.
-- A count of nodes is followed by nodes, node refers to parents by their node number.
-- The first node in the list has number 1, 0 represents ... null
nodes <- parseVarLenPrefixedList $ do
isVirtual <- (/= 0) <$> A.anyWord8
parentNum <- if not isVirtual then Just <$> parseVarLenWordLE else return Nothing
name <- parseVarLenPrefixedString
return (parentNum, name)
-- filesystem root node.
rootNodeNum <- parseVarLenWordLE
projectNodeNum <- parseVarLenWordLE
-- Next, XCDepGraphNodeState
nodeStates <- parseVarLenPrefixedList $ do
nodeNum <- parseVarLenWordLE
options <- parseVarLenWordLE
errNum <- parseVarLenWordLE
if errNum == 0
then Right <$> do
mtime <- parseVarLenWordLE
size <- parseVarLenWordLE
mode <- parseVarLenWordLE
return (mtime, size, mode)
else return $ Left errNum
-- Next, a list of XCDependencyCommandInvocationRecord
invocations <- parseVarLenPrefixedList $ do
identifier <- parseVarLenPrefixedString
signature <- A.take 16
desc <- parseVarLenPrefixedString
args <- parseVarLenPrefixedList parseVarLenPrefixedString
env <- parseVarLenPrefixedList parseVarLenPrefixedString
workingDirNodeNum <- parseVarLenWordLE
startTime <- parseDoubleLE
endTime <- parseDoubleLE
exitStatus <- parseVarLenWordLE
builderId <- parseVarLenPrefixedString
activityLogData <- parseVarLenPrefixedString
inputNodeStateNums <- parseVarLenPrefixedList parseVarLenWordLE
outputNodeNums <- parseVarLenPrefixedList parseVarLenWordLE
return (identifier, signature, desc, args, env, workingDirNodeNum, startTime, endTime, exitStatus, builderId, activityLogData, inputNodeStateNums, outputNodeNums)
artifactNodeNums <- parseVarLenPrefixedList parseVarLenWordLE
return (nodes, rootNodeNum, projectNodeNum, nodeStates, invocations, artifactNodeNums)
@yiding
Copy link
Author

yiding commented Nov 19, 2015

See https://gist.github.com/yiding/7a22deff33160c84b04e for parsing the SLF0 encoded activityLogData field.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment