-
-
Save bitemyapp/17b440c0e3fdf4f0951154f5abd2022f to your computer and use it in GitHub Desktop.
Conduit solution to: http://www.reddit.com/r/haskell/comments/2ejzst/streaming_tabseparated_logfile_analysis_with/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module-template: ! 'module MODULE_NAME where | |
' | |
extensions: {} | |
environment: default | |
cabal-file: project.cabal | |
version: 1 | |
ghc-args: [] | |
excluded-modules: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
server user timestamp event | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
foo 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
bar 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Login", "contents": {"foo": "bar"}} | |
baz 1 2014-10-10 {"tag": "Something else", "contents": {"foo": "bar"}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE NoImplicitPrelude, OverloadedStrings #-} | |
import ClassyPrelude.Conduit | |
import Data.Conduit.Attoparsec (sinkParser) | |
import Data.Aeson.Parser (json) | |
import Data.Aeson.Types (parseEither) | |
import Data.Aeson (withObject, (.:)) | |
main :: IO () | |
main = do | |
runResourceT | |
$ sourceFile "input.txt" | |
$$ (dropLine >> convertLines) | |
=$ countResults | |
-- Use lineAsciiC to stream in all the contents of the line. | |
-- Then we just ignore those contents. | |
dropLine = lineAsciiC $ return () | |
convertLines = do | |
-- peek to see if there are more contents. If so, convert the | |
-- next line and then loop | |
peekC >>= maybe (return ()) (const $ convertLine >> convertLines) | |
-- Stream the contents of a single line | |
convertLine = lineAsciiC $ do | |
-- Get the serverName: everything up until the first tab. | |
-- takeCE 80 is to prevent a memory attack | |
-- The CE nomenclature means "conduit elementwise", since | |
-- we're dealing with chunked data (ByteStrings) | |
serverName <- takeWhileCE (/= 9) =$= takeCE 80 =$= foldC | |
dropField -- user | |
dropField -- timestamp | |
dropCE 1 -- drop the tab after timestamp | |
val <- sinkParser json -- stream into aeson's parser | |
tag <- case parseEither getTag val of | |
Left e -> error e -- could do nicer error handling if we wanted | |
Right tag -> return tag | |
when ((tag :: Text) == "Login") $ yield $ decodeUtf8 serverName | |
dropField = do | |
dropCE 1 | |
dropWhileCE (/= 9) | |
getTag = withObject "" (.: "tag") | |
countResults = do | |
mname <- peekC | |
case mname of | |
Nothing -> return () | |
Just name -> do | |
cnt <- takeWhileC (== name) =$= lengthC | |
liftIO $ putStrLn $ name ++ ": " ++ tshow cnt | |
countResults |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment