Skip to content

Instantly share code, notes, and snippets.

@facundominguez
Last active August 17, 2017 14:29
Show Gist options
  • Save facundominguez/b257a6b1d79533131917066716cdfb13 to your computer and use it in GitHub Desktop.
Save facundominguez/b257a6b1d79533131917066716cdfb13 to your computer and use it in GitHub Desktop.
Bounded memory sorting of events in an eventlog file
diff --git a/ghc-events.cabal b/ghc-events.cabal
index 7fc2c1c..ece7861 100644
--- a/ghc-events.cabal
+++ b/ghc-events.cabal
@@ -63,6 +63,8 @@ library
binary >= 0.7 && < 0.10,
bytestring >= 0.10.4,
array >= 0.2 && < 0.6,
+ filepath >= 1.4.1.1,
+ temporary >= 1.2.0.4,
text >= 0.11.2.3 && < 1.3,
vector >= 0.7 && < 0.13
exposed-modules: GHC.RTS.Events,
diff --git a/src/GHC/RTS/Events.hs b/src/GHC/RTS/Events.hs
index e5c3736..9495d06 100644
--- a/src/GHC/RTS/Events.hs
+++ b/src/GHC/RTS/Events.hs
@@ -44,7 +44,7 @@ module GHC.RTS.Events (
serialiseEventLog,
-- * Utilities
- CapEvent(..), sortEvents,
+ CapEvent(..), sortEvents, sortEventLog,
buildEventTypeMap,
-- * Printing
@@ -67,8 +67,10 @@ module GHC.RTS.Events (
{- Libraries. -}
import Control.Applicative
import Control.Concurrent hiding (ThreadId)
+import Control.Monad (forM, forM_)
import qualified Data.Binary.Put as P
import qualified Data.ByteString as B
+import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Lazy as BL
import Data.IntMap (IntMap)
import qualified Data.IntMap as IM
@@ -84,7 +86,9 @@ import qualified Data.Text.Lazy.Builder.Int as TB
import qualified Data.Text.Lazy.IO as TL
import qualified Data.Vector.Unboxed as VU
import Data.Word
+import System.FilePath ((</>))
import System.IO
+import System.IO.Temp (withSystemTempDirectory)
import Prelude hiding (gcd, rem, id)
import GHC.RTS.EventTypes
@@ -179,6 +183,64 @@ addBlockMarker cap evts =
-- -----------------------------------------------------------------------------
-- Utilities
+
+-- | @sortEventLog eventLog f@ sorts the events in @eventLog@
+-- and passes them to @f@. This function runs in bounded memory.
+--
+-- The events are sent to different temporary files, each file has the events
+-- corresponding to a given capability. Then @f@ is given the result of
+-- merging the files.
+--
+-- @f@ must not use the sorted events after it completes as the temporary
+-- files are removed and the list of events is produced lazily.
+sortEventLog :: EventLog -> ([Event] -> IO a) -> IO a
+sortEventLog eLog f =
+ withSystemTempDirectory "ghc-events" $ \dir -> do
+ m <- splitEvents dir IM.empty (events $ dat eLog)
+ forM_ m $ \h ->
+ BL.hPut h (P.runPut putEVENT_DATA_END) >> hClose h
+ mergeEvents dir (IM.size m) >>= f
+ where
+ capFile :: FilePath -> Int -> FilePath
+ capFile dir capId = dir </> (show capId ++ ".eventlog")
+
+ splitEvents :: FilePath -> IM.IntMap Handle -> [Event]
+ -> IO (IM.IntMap Handle)
+ splitEvents dir m [] = return m
+ splitEvents dir m evs@(e : _) =
+ let evCapId = maybe 0 (+1) . evCap
+ capId = evCapId e
+ in case IM.lookup capId m of
+ -- Create the temporary capability file.
+ Nothing -> do
+ h <- openBinaryFile (capFile dir capId) WriteMode
+ hSetBuffering h (BlockBuffering Nothing)
+ BL.hPut h $ P.runPut $
+ putHeader (header eLog) >> putEVENT_DATA_BEGIN
+ splitEvents dir (IM.insert capId h m) evs
+ -- Send the events to the capability file.
+ Just h ->
+ let (capEvents, rest) = span ((capId ==) . evCapId) evs
+ in do BL.hPut h $ mconcat $
+ map (P.runPut . putEvent) capEvents
+ splitEvents dir m rest
+
+ mergeEvents :: FilePath -> Int -> IO [Event]
+ mergeEvents dir n = do
+ ees <- forM [0..n - 1] $ readEventLogFromFile . capFile dir
+ evss <- mapM (either error (return . events . dat)) ees
+ let mergeAll [] = []
+ mergeAll [xs] = xs
+ mergeAll xs = mergeAll (mergePairs xs)
+ mergePairs (x : y : xss) = merge x y : mergePairs xss
+ mergePairs xs = xs
+ merge (x : xs) (y : ys) =
+ if evTime x <= evTime y then x : y : merge xs ys
+ else y : x : merge xs ys
+ merge [] ys = ys
+ merge xs [] = xs
+ return (mergeAll evss)
+
sortEvents :: [Event] -> [Event]
sortEvents = sortBy (compare `on` evTime)
diff --git a/src/GHC/RTS/Events/Binary.hs b/src/GHC/RTS/Events/Binary.hs
index 39f707a..7ce8b07 100644
--- a/src/GHC/RTS/Events/Binary.hs
+++ b/src/GHC/RTS/Events/Binary.hs
@@ -18,6 +18,8 @@ module GHC.RTS.Events.Binary
, putEventLog
, putHeader
, putEvent
+ , putEVENT_DATA_BEGIN
+ , putEVENT_DATA_END
-- * Perf events
, nEVENT_PERF_NAME
@@ -850,11 +852,19 @@ putHeader (Header ets) = do
putE (0 :: Word32)
putMarker EVENT_ET_END
+putEVENT_DATA_BEGIN :: PutM ()
+putEVENT_DATA_BEGIN =
+ putMarker EVENT_DATA_BEGIN -- Word32
+
+putEVENT_DATA_END :: PutM ()
+putEVENT_DATA_END =
+ putMarker EVENT_DATA_END -- Word16
+
putData :: Data -> PutM ()
putData (Data es) = do
- putMarker EVENT_DATA_BEGIN -- Word32
+ putEVENT_DATA_BEGIN -- Word32
mapM_ putEvent es
- putType EVENT_DATA_END -- Word16
+ putEVENT_DATA_END -- Word16
eventTypeNum :: EventInfo -> EventTypeNum
eventTypeNum e = case e of
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment