Skip to content

Instantly share code, notes, and snippets.

@zsol
Created June 3, 2012 08:48
Show Gist options
  • Save zsol/2862617 to your computer and use it in GitHub Desktop.
Save zsol/2862617 to your computer and use it in GitHub Desktop.
parse logs
{-# LANGUAGE NoMonomorphismRestriction #-}
import Text.Parsec
import Data.List.Split (splitOn)
--field = lexeme $ noneOf " "
field = manyTill anyChar space
logline = do
try (do
Text.Parsec.count 3 field
string "usage INFO "
manyTill anyChar (char ':')
string "presentations_delete "
return True)
<|> return False
parseLogline = parse logline ""
parseLoglines = map parseLogline
isRight (Right x) = x
isRight _ = False
-- I guess there is a built in function to get Xth element of a list
-- at fields x = head (drop x fields)
at = (!!)
-- this is a simple pattern matching
match_usage_and_function pattern fields = if at fields 3 == "usage" then (at (splitOn ":" (at fields 5 )) 1) == pattern else False
-- this is the reduce of map reduce
countTrue True acc = acc + 1
countTrue _ acc = acc
count :: (a -> Bool) -> [a] -> Int
count p = length . filter p
-- would it be nicer to write
-- count x ys = length (filter (== x) ys)
-- count True
reducer = Main.count (== True)
-- split every line to list of words
tokenized = map words
-- and the actual map / reduce to count frequence of events
m = map (match_usage_and_function "presentations_delete") . tokenized
r = reducer . m
s = "2012-06-01 01:23:03,657 app6 usage INFO 264:presentations_delete User 10179708 deleted presentation 30334553 (owner: 10179708, title: Frank Macfarlane Burnet, public: 0, version: 14, created: 2012-05-"
inp n = take n $ repeat s
a = inp 100000
main = do
print $ length $ filter isRight (parseLoglines a)
print $ r a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment