{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE LiberalTypeSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as CH
import qualified Data.ByteString.Lazy as L
import qualified Data.Csv as C
import qualified Data.Csv.Streaming as CS
import qualified Data.Vector as V
import Control.Applicative ( (<$>), (<*>) )
import Control.Lens ( defaultRules, lensField, makeLensesWith, (&), (.~), (^.), (??) )
import Control.Monad ( mzero, unless, void )
import Data.Char ( ord )
import Data.Conduit ( GInfConduit, Sink, awaitE, runResourceT, yield, ($=), (=$=), ($$) )
import Data.Conduit.Binary ( lines, sourceFile )
import Data.Conduit.Util ( SinkStateResult(StateProcessing), sinkState )
import Data.Csv ( DecodeOptions(..), defaultDecodeOptions, (.!) )
import Data.Data ( Data, Typeable )
import Data.DateTime ( DateTime )
import Data.Table ( Tabular(..), Candidate, Primary, Supplemental, Tab, Table
, autoIncrement, empty, insert, table )
import Prelude hiding ( lines )
import Debug.Trace
==========
手元にパケットのログデータをつめたTSVファイルがあったので、それを使用します。
2012-10-18 08:56:43.337266000000 -40 -92 66 3 00:26:B6:AA:AA:AA Askey Computer tp
ログの形式ほぼそのままに、先頭にIDだけを追加したデータ型を作ります。
data Packet = Packet
{ pId :: Int
, pDateTime :: DateTime
, pDbmSignal :: Int
, pDbmNoise :: Int
, pDbSignal :: Int
, pDbNoise :: Int
, pMacAddr :: String
, pVendor :: String
, pSource :: String
}
deriving (Eq,Data,Ord,Read,Show,Typeable)
cassava (Data.Csv) を使います。
instance C.FromRecord Packet where
parseRecord v
| V.length v == 8 = Packet 0
<$> v .! 0
<*> v .! 1
<*> v .! 2
<*> v .! 3
<*> v .! 4
<*> v .! 5
<*> v .! 6
<*> v .! 7
| otherwise = mzero
instance C.FromField DateTime where
parseField s = return (read $ CH.unpack s)
makeLensesWith ?? ''Packet $ defaultRules & lensField .~ \x -> Just (x ++ "_")
class Ord (PKT t) => Tabular t where
instance Tabular Packet where
-- The primary key type
type PKT Packet = Int
-- The type used internally for colmuns
data Key k Packet b where
PID :: Key Primary Packet Int
PDateTime :: Key Supplemental Packet DateTime
PDbmSignal :: Key Supplemental Packet Int
PMacAddr :: Key Supplemental Packet String
PVendor :: Key Supplemental Packet String
PSource :: Key Supplemental Packet String
-- Used to store indices
data Tab Packet i = PacketTab (i Primary Int)
(i Supplemental DateTime)
(i Supplemental Int)
(i Supplemental String)
(i Supplemental String)
(i Supplemental String)
-- Extract the value of a Key
-- fetch :: Key k t a -> t -> a
fetch PID = pId
fetch PDateTime = pDateTime
fetch PDbmSignal = pDbmSignal
fetch PMacAddr = pMacAddr
fetch PVendor = pVendor
fetch PSource = pSource
-- Every Table has one Primary Key
-- primary :: Key Primary t (PKT t)
primary = PID
-- ... and so if you find one, it had better be that one!
-- primarily :: Key Primary t a -> (a ~ PKT t => r) -> r
primarily PID r = r
-- Construct a Tab given a function from key to index
-- mkTab :: Applicative h => (forall k a. IsKeyType k a => Key k t a -> h (i k a)) -> h (Tab t i)
mkTab f = PacketTab <$> f PID
<*> f PDateTime
<*> f PDbmSignal
<*> f PMacAddr
<*> f PVendor
<*> f PSource
-- Lookup an index in a Tab
-- ixTab :: Tab t i -> Key k t a -> i k a
ixTab (PacketTab x _ _ _ _ _) PID = x
ixTab (PacketTab _ x _ _ _ _) PDateTime = x
ixTab (PacketTab _ _ x _ _ _) PDbmSignal = x
ixTab (PacketTab _ _ _ x _ _) PMacAddr = x
ixTab (PacketTab _ _ _ _ x _) PVendor = x
ixTab (PacketTab _ _ _ _ _ x) PSource = x
forTab (PacketTab p a b c d e) f = PacketTab <$> f PID p
<*> f PDateTime a
<*> f PDbmSignal b
<*> f PMacAddr c
<*> f PVendor d
<*> f PSource e
autoTab = autoIncrement pId_
sinkPacketIntoTable :: (Monad m) => Sink Packet m (Table Packet)
sinkPacketIntoTable = sinkState empty push finish
where
push table pac = return (StateProcessing $ insert pac table)
finish table = return table
decodeCsv :: (Monad m)
=> C.DecodeOptions
-> Bool
-> GInfConduit S.ByteString m Packet
decodeCsv decOpt header = loop header id
where
loop skip front = awaitE >>= either (finish skip front) (go skip front)
finish skip front r = do
unless (L.null final) (void $ push $ CS.decodeWith decOpt skip final)
return r
where
final = front L.empty
go skip sofar more = do
left <- push $ CS.decodeWith decOpt skip (sofar $ L.fromChunks [more])
loop False (L.append left)
push (CS.Cons (Right r) rs) = yield r >> push rs
push (CS.Cons (Left m) rs) = push rs
push (CS.Nil Nothing left) = return left
push (CS.Nil (Just m) left) = return left
tsvDecodeOption :: DecodeOptions
tsvDecodeOption = defaultDecodeOptions { decDelimiter = fromIntegral (ord '\t') }
- ghci
- :m + Control.Lens Data.Foldable Data.Table
- :set prompt "ghci> "
- :l Wifi
testList :: [Packet]
testList =
[ Packet 0 ((read "2012-10-17 23:56:42.8315900 UTC")::DateTime) (-42) (-91) 63 4 "00:26:B6:AA:AA:AA" "Askey Computer" "tp"
, Packet 0 ((read "2012-10-17 23:56:42.8322130 UTC")::DateTime) (-41) (-91) 65 4 "00:26:B6:AA:AA:AA" "Askey Computer" "tp"
, Packet 0 ((read "2012-10-17 23:56:52.6748680 UTC")::DateTime) (-92) (-92) 3 3 "44:6D:57:BB:BB:BB" "Liteon Technology Corporation" "tp"
, Packet 0 ((read "2012-10-17 23:56:52.7029370 UTC")::DateTime) (-92) (-92) 3 3 "44:6D:57:BB:BB:BB" "Liteon Technology Corporation" "tp"
]
test1 :: Table Packet
test1 = testList ^. table
- try some queries
- test1
- test1 ^. with PDbmSignal (>=) (-90)
- test1 ^. with PDbmSignal (<) (-90)
- test1 & with PDbmSignal (<) (-90) .~ empty
- test1 ^. with PDbmSignal (>=) (-90) ^@.. group PMacAddr
- for_ test1 $ putStrLn . show
- test1 ^. folded.act $ putStrLn.show
- toList $ test1
test2 :: FilePath -> IO (Table Packet)
test2 f = runResourceT $ sourceFile f $= lines =$= decodeCsv opt False $$ sinkPacketIntoTable
where
opt = tsvDecodeOption
- pt <- test2 "wifi_300.tsv"
- try some queries
- count test2
- pt ^?! ix 300
- take 2 $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
- length $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
- length $ filter ((2 <=) . (count.snd)) $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
- map fst $ filter ((2 <=) . (count.snd)) $ pt ^. with PDbmSignal (>=) (-70) ^@.. group PMacAddr
- let cnt a t = if n > 1 then a + n else a where { n = (count.snd) t }
- Data.Foldable.foldl' cnt 0 $ pt ^. with PDbmSignal (>=) (-90) ^. with PSource (==) "tp" ^@.. group PMacAddr
- projection?
- いろんなこと